Port folder renaming changes from AOM
Manually cherry-picked commits:
ceef058 libvpx->libaom part2
3d26d91 libvpx -> libaom
cfea7dd vp10/ -> av1/
3a8eff7 Fix a build issue for a test
bf4202e Rename vpx to aom
Change-Id: I1b0eb5a40796e3aaf41c58984b4229a439a597dc
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
new file mode 100644
index 0000000..b6ff12a
--- /dev/null
+++ b/av1/common/alloccommon.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "aom_mem/vpx_mem.h"
+
+#include "av1/common/alloccommon.h"
+#include "av1/common/blockd.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/entropymv.h"
+#include "av1/common/onyxc_int.h"
+
+void vp10_set_mb_mi(VP10_COMMON *cm, int width, int height) {
+ const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
+ const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
+
+ cm->mi_cols = aligned_width >> MI_SIZE_LOG2;
+ cm->mi_rows = aligned_height >> MI_SIZE_LOG2;
+ cm->mi_stride = calc_mi_size(cm->mi_cols);
+
+ cm->mb_cols = (cm->mi_cols + 1) >> 1;
+ cm->mb_rows = (cm->mi_rows + 1) >> 1;
+ cm->MBs = cm->mb_rows * cm->mb_cols;
+}
+
+static int alloc_seg_map(VP10_COMMON *cm, int seg_map_size) {
+ int i;
+
+ for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
+ cm->seg_map_array[i] = (uint8_t *)vpx_calloc(seg_map_size, 1);
+ if (cm->seg_map_array[i] == NULL) return 1;
+ }
+ cm->seg_map_alloc_size = seg_map_size;
+
+ // Init the index.
+ cm->seg_map_idx = 0;
+ cm->prev_seg_map_idx = 1;
+
+ cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
+ if (!cm->frame_parallel_decode)
+ cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
+
+ return 0;
+}
+
+static void free_seg_map(VP10_COMMON *cm) {
+ int i;
+
+ for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) {
+ vpx_free(cm->seg_map_array[i]);
+ cm->seg_map_array[i] = NULL;
+ }
+
+ cm->current_frame_seg_map = NULL;
+
+ if (!cm->frame_parallel_decode) {
+ cm->last_frame_seg_map = NULL;
+ }
+}
+
+void vp10_free_ref_frame_buffers(BufferPool *pool) {
+ int i;
+
+ for (i = 0; i < FRAME_BUFFERS; ++i) {
+ if (pool->frame_bufs[i].ref_count > 0 &&
+ pool->frame_bufs[i].raw_frame_buffer.data != NULL) {
+ pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer);
+ pool->frame_bufs[i].ref_count = 0;
+ }
+ vpx_free(pool->frame_bufs[i].mvs);
+ pool->frame_bufs[i].mvs = NULL;
+ vpx_free_frame_buffer(&pool->frame_bufs[i].buf);
+ }
+}
+
+#if CONFIG_LOOP_RESTORATION
+void vp10_free_restoration_buffers(VP10_COMMON *cm) {
+ vpx_free(cm->rst_info.bilateral_level);
+ cm->rst_info.bilateral_level = NULL;
+ vpx_free(cm->rst_info.vfilter);
+ cm->rst_info.vfilter = NULL;
+ vpx_free(cm->rst_info.hfilter);
+ cm->rst_info.hfilter = NULL;
+ vpx_free(cm->rst_info.wiener_level);
+ cm->rst_info.wiener_level = NULL;
+}
+#endif // CONFIG_LOOP_RESTORATION
+
+void vp10_free_context_buffers(VP10_COMMON *cm) {
+ int i;
+ cm->free_mi(cm);
+ free_seg_map(cm);
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ vpx_free(cm->above_context[i]);
+ cm->above_context[i] = NULL;
+ }
+ vpx_free(cm->above_seg_context);
+ cm->above_seg_context = NULL;
+#if CONFIG_VAR_TX
+ vpx_free(cm->above_txfm_context);
+ cm->above_txfm_context = NULL;
+#endif
+}
+
+int vp10_alloc_context_buffers(VP10_COMMON *cm, int width, int height) {
+ int new_mi_size;
+
+ vp10_set_mb_mi(cm, width, height);
+ new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
+ if (cm->mi_alloc_size < new_mi_size) {
+ cm->free_mi(cm);
+ if (cm->alloc_mi(cm, new_mi_size)) goto fail;
+ }
+
+ if (cm->seg_map_alloc_size < cm->mi_rows * cm->mi_cols) {
+ // Create the segmentation map structure and set to 0.
+ free_seg_map(cm);
+ if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) goto fail;
+ }
+
+ if (cm->above_context_alloc_cols < cm->mi_cols) {
+ // TODO(geza.lore): These are bigger than they need to be.
+ // cm->tile_width would be enough but it complicates indexing a
+ // little elsewhere.
+ const int aligned_mi_cols =
+ ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
+ int i;
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ vpx_free(cm->above_context[i]);
+ cm->above_context[i] = (ENTROPY_CONTEXT *)vpx_calloc(
+ 2 * aligned_mi_cols, sizeof(*cm->above_context[0]));
+ if (!cm->above_context[i]) goto fail;
+ }
+
+ vpx_free(cm->above_seg_context);
+ cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc(
+ aligned_mi_cols, sizeof(*cm->above_seg_context));
+ if (!cm->above_seg_context) goto fail;
+
+#if CONFIG_VAR_TX
+ vpx_free(cm->above_txfm_context);
+ cm->above_txfm_context = (TXFM_CONTEXT *)vpx_calloc(
+ aligned_mi_cols, sizeof(*cm->above_txfm_context));
+ if (!cm->above_txfm_context) goto fail;
+#endif
+
+ cm->above_context_alloc_cols = aligned_mi_cols;
+ }
+
+ return 0;
+
+fail:
+ // clear the mi_* values to force a realloc on resync
+ vp10_set_mb_mi(cm, 0, 0);
+ vp10_free_context_buffers(cm);
+ return 1;
+}
+
+void vp10_remove_common(VP10_COMMON *cm) {
+ vp10_free_context_buffers(cm);
+
+ vpx_free(cm->fc);
+ cm->fc = NULL;
+ vpx_free(cm->frame_contexts);
+ cm->frame_contexts = NULL;
+}
+
+void vp10_init_context_buffers(VP10_COMMON *cm) {
+ cm->setup_mi(cm);
+ if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
+ memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
+}
+
+void vp10_swap_current_and_last_seg_map(VP10_COMMON *cm) {
+ // Swap indices.
+ const int tmp = cm->seg_map_idx;
+ cm->seg_map_idx = cm->prev_seg_map_idx;
+ cm->prev_seg_map_idx = tmp;
+
+ cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx];
+ cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx];
+}
diff --git a/av1/common/alloccommon.h b/av1/common/alloccommon.h
new file mode 100644
index 0000000..d2d2643
--- /dev/null
+++ b/av1/common/alloccommon.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ALLOCCOMMON_H_
+#define VP10_COMMON_ALLOCCOMMON_H_
+
+#define INVALID_IDX -1 // Invalid buffer index.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP10Common;
+struct BufferPool;
+
+void vp10_remove_common(struct VP10Common *cm);
+
+int vp10_alloc_context_buffers(struct VP10Common *cm, int width, int height);
+void vp10_init_context_buffers(struct VP10Common *cm);
+void vp10_free_context_buffers(struct VP10Common *cm);
+
+void vp10_free_ref_frame_buffers(struct BufferPool *pool);
+#if CONFIG_LOOP_RESTORATION
+void vp10_free_restoration_buffers(struct VP10Common *cm);
+#endif // CONFIG_LOOP_RESTORATION
+
+int vp10_alloc_state_buffers(struct VP10Common *cm, int width, int height);
+void vp10_free_state_buffers(struct VP10Common *cm);
+
+void vp10_set_mb_mi(struct VP10Common *cm, int width, int height);
+
+void vp10_swap_current_and_last_seg_map(struct VP10Common *cm);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_ALLOCCOMMON_H_
diff --git a/av1/common/ans.h b/av1/common/ans.h
new file mode 100644
index 0000000..c974ada
--- /dev/null
+++ b/av1/common/ans.h
@@ -0,0 +1,414 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ANS_H_
+#define VP10_COMMON_ANS_H_
+// An implementation of Asymmetric Numeral Systems
+// http://arxiv.org/abs/1311.2540v2
+
+#include <assert.h>
+#include "./vpx_config.h"
+#include "aom/vpx_integer.h"
+#include "aom_dsp/prob.h"
+#include "aom_ports/mem_ops.h"
+
+#define ANS_DIVIDE_BY_MULTIPLY 1
+#if ANS_DIVIDE_BY_MULTIPLY
+#include "av1/common/divide.h"
+#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
+ do { \
+ quotient = fastdiv(dividend, divisor); \
+ remainder = dividend - quotient * divisor; \
+ } while (0)
+#define ANS_DIV(dividend, divisor) fastdiv(dividend, divisor)
+#else
+#define ANS_DIVREM(quotient, remainder, dividend, divisor) \
+ do { \
+ quotient = dividend / divisor; \
+ remainder = dividend % divisor; \
+ } while (0)
+#define ANS_DIV(dividend, divisor) ((dividend) / (divisor))
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+struct AnsCoder {
+ uint8_t *buf;
+ int buf_offset;
+ uint32_t state;
+};
+
+struct AnsDecoder {
+ const uint8_t *buf;
+ int buf_offset;
+ uint32_t state;
+};
+
+typedef uint8_t AnsP8;
+#define ans_p8_precision 256u
+#define ans_p8_shift 8
+typedef uint16_t AnsP10;
+#define ans_p10_precision 1024u
+
+#define rans_precision ans_p10_precision
+
+#define l_base (ans_p10_precision * 4) // l_base % precision must be 0
+#define io_base 256
+// Range I = { l_base, l_base + 1, ..., l_base * io_base - 1 }
+
+static INLINE void ans_write_init(struct AnsCoder *const ans,
+ uint8_t *const buf) {
+ ans->buf = buf;
+ ans->buf_offset = 0;
+ ans->state = l_base;
+}
+
+static INLINE int ans_write_end(struct AnsCoder *const ans) {
+ uint32_t state;
+ assert(ans->state >= l_base);
+ assert(ans->state < l_base * io_base);
+ state = ans->state - l_base;
+ if (state < (1 << 6)) {
+ ans->buf[ans->buf_offset] = (0x00 << 6) + state;
+ return ans->buf_offset + 1;
+ } else if (state < (1 << 14)) {
+ mem_put_le16(ans->buf + ans->buf_offset, (0x01 << 14) + state);
+ return ans->buf_offset + 2;
+ } else if (state < (1 << 22)) {
+ mem_put_le24(ans->buf + ans->buf_offset, (0x02 << 22) + state);
+ return ans->buf_offset + 3;
+ } else {
+ assert(0 && "State is too large to be serialized");
+ return ans->buf_offset;
+ }
+}
+
+// rABS with descending spread
+// p or p0 takes the place of l_s from the paper
+// ans_p8_precision is m
+static INLINE void rabs_desc_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+ const AnsP8 p = ans_p8_precision - p0;
+ const unsigned l_s = val ? p : p0;
+ unsigned quot, rem;
+ if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+ ans->buf[ans->buf_offset++] = ans->state % io_base;
+ ans->state /= io_base;
+ }
+ ANS_DIVREM(quot, rem, ans->state, l_s);
+ ans->state = quot * ans_p8_precision + rem + (val ? 0 : p);
+}
+
+#define ANS_IMPL1 0
+#define UNPREDICTABLE(x) x
+static INLINE int rabs_desc_read(struct AnsDecoder *ans, AnsP8 p0) {
+ int val;
+#if ANS_IMPL1
+ unsigned l_s;
+#else
+ unsigned quot, rem, x, xn;
+#endif
+ const AnsP8 p = ans_p8_precision - p0;
+ if (ans->state < l_base) {
+ ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
+ }
+#if ANS_IMPL1
+ val = ans->state % ans_p8_precision < p;
+ l_s = val ? p : p0;
+ ans->state = (ans->state / ans_p8_precision) * l_s +
+ ans->state % ans_p8_precision - (!val * p);
+#else
+ x = ans->state;
+ quot = x / ans_p8_precision;
+ rem = x % ans_p8_precision;
+ xn = quot * p;
+ val = rem < p;
+ if (UNPREDICTABLE(val)) {
+ ans->state = xn + rem;
+ } else {
+ // ans->state = quot * p0 + rem - p;
+ ans->state = x - xn - p;
+ }
+#endif
+ return val;
+}
+
+// rABS with ascending spread
+// p or p0 takes the place of l_s from the paper
+// ans_p8_precision is m
+static INLINE void rabs_asc_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+ const AnsP8 p = ans_p8_precision - p0;
+ const unsigned l_s = val ? p : p0;
+ unsigned quot, rem;
+ if (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+ ans->buf[ans->buf_offset++] = ans->state % io_base;
+ ans->state /= io_base;
+ }
+ ANS_DIVREM(quot, rem, ans->state, l_s);
+ ans->state = quot * ans_p8_precision + rem + (val ? p0 : 0);
+}
+
+static INLINE int rabs_asc_read(struct AnsDecoder *ans, AnsP8 p0) {
+ int val;
+#if ANS_IMPL1
+ unsigned l_s;
+#else
+ unsigned quot, rem, x, xn;
+#endif
+ const AnsP8 p = ans_p8_precision - p0;
+ if (ans->state < l_base) {
+ ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
+ }
+#if ANS_IMPL1
+ val = ans->state % ans_p8_precision < p;
+ l_s = val ? p : p0;
+ ans->state = (ans->state / ans_p8_precision) * l_s +
+ ans->state % ans_p8_precision - (!val * p);
+#else
+ x = ans->state;
+ quot = x / ans_p8_precision;
+ rem = x % ans_p8_precision;
+ xn = quot * p;
+ val = rem >= p0;
+ if (UNPREDICTABLE(val)) {
+ ans->state = xn + rem - p0;
+ } else {
+ // ans->state = quot * p0 + rem - p0;
+ ans->state = x - xn;
+ }
+#endif
+ return val;
+}
+
+#define rabs_read rabs_desc_read
+#define rabs_write rabs_desc_write
+
+// uABS with normalization
+static INLINE void uabs_write(struct AnsCoder *ans, int val, AnsP8 p0) {
+ AnsP8 p = ans_p8_precision - p0;
+ const unsigned l_s = val ? p : p0;
+ while (ans->state >= l_base / ans_p8_precision * io_base * l_s) {
+ ans->buf[ans->buf_offset++] = ans->state % io_base;
+ ans->state /= io_base;
+ }
+ if (!val)
+ ans->state = ANS_DIV(ans->state * ans_p8_precision, p0);
+ else
+ ans->state = ANS_DIV((ans->state + 1) * ans_p8_precision + p - 1, p) - 1;
+}
+
+static INLINE int uabs_read(struct AnsDecoder *ans, AnsP8 p0) {
+ AnsP8 p = ans_p8_precision - p0;
+ int s;
+ // unsigned int xp1;
+ unsigned xp, sp;
+ unsigned state = ans->state;
+ while (state < l_base && ans->buf_offset > 0) {
+ state = state * io_base + ans->buf[--ans->buf_offset];
+ }
+ sp = state * p;
+ // xp1 = (sp + p) / ans_p8_precision;
+ xp = sp / ans_p8_precision;
+ // s = xp1 - xp;
+ s = (sp & 0xFF) >= p0;
+ if (UNPREDICTABLE(s))
+ ans->state = xp;
+ else
+ ans->state = state - xp;
+ return s;
+}
+
+static INLINE int uabs_read_bit(struct AnsDecoder *ans) {
+ int s;
+ unsigned state = ans->state;
+ while (state < l_base && ans->buf_offset > 0) {
+ state = state * io_base + ans->buf[--ans->buf_offset];
+ }
+ s = (int)(state & 1);
+ ans->state = state >> 1;
+ return s;
+}
+
+static INLINE int uabs_read_literal(struct AnsDecoder *ans, int bits) {
+ int literal = 0, bit;
+ assert(bits < 31);
+
+ // TODO(aconverse): Investigate ways to read/write literals faster,
+ // e.g. 8-bit chunks.
+ for (bit = bits - 1; bit >= 0; bit--) literal |= uabs_read_bit(ans) << bit;
+
+ return literal;
+}
+
+// TODO(aconverse): Replace trees with tokensets.
+static INLINE int uabs_read_tree(struct AnsDecoder *ans,
+ const vpx_tree_index *tree,
+ const AnsP8 *probs) {
+ vpx_tree_index i = 0;
+
+ while ((i = tree[i + uabs_read(ans, probs[i >> 1])]) > 0) continue;
+
+ return -i;
+}
+
+struct rans_sym {
+ AnsP10 prob;
+ AnsP10 cum_prob; // not-inclusive
+};
+
+struct rans_dec_sym {
+ uint8_t val;
+ AnsP10 prob;
+ AnsP10 cum_prob; // not-inclusive
+};
+
+// This is now just a boring cdf. It starts with an explicit zero.
+// TODO(aconverse): Remove starting zero.
+typedef uint16_t rans_dec_lut[16];
+
+static INLINE void rans_build_cdf_from_pdf(const AnsP10 token_probs[],
+ rans_dec_lut cdf_tab) {
+ int i;
+ cdf_tab[0] = 0;
+ for (i = 1; cdf_tab[i - 1] < rans_precision; ++i) {
+ cdf_tab[i] = cdf_tab[i - 1] + token_probs[i - 1];
+ }
+ assert(cdf_tab[i - 1] == rans_precision);
+}
+
+static INLINE int ans_find_largest(const AnsP10 *const pdf_tab, int num_syms) {
+ int largest_idx = -1;
+ int largest_p = -1;
+ int i;
+ for (i = 0; i < num_syms; ++i) {
+ int p = pdf_tab[i];
+ if (p > largest_p) {
+ largest_p = p;
+ largest_idx = i;
+ }
+ }
+ return largest_idx;
+}
+
+static INLINE void rans_merge_prob8_pdf(AnsP10 *const out_pdf,
+ const AnsP8 node_prob,
+ const AnsP10 *const src_pdf,
+ int in_syms) {
+ int i;
+ int adjustment = rans_precision;
+ const int round_fact = ans_p8_precision >> 1;
+ const AnsP8 p1 = ans_p8_precision - node_prob;
+ const int out_syms = in_syms + 1;
+ assert(src_pdf != out_pdf);
+
+ out_pdf[0] = node_prob << (10 - 8);
+ adjustment -= out_pdf[0];
+ for (i = 0; i < in_syms; ++i) {
+ int p = (p1 * src_pdf[i] + round_fact) >> ans_p8_shift;
+ p = VPXMIN(p, (int)rans_precision - in_syms);
+ p = VPXMAX(p, 1);
+ out_pdf[i + 1] = p;
+ adjustment -= p;
+ }
+
+ // Adjust probabilities so they sum to the total probability
+ if (adjustment > 0) {
+ i = ans_find_largest(out_pdf, out_syms);
+ out_pdf[i] += adjustment;
+ } else {
+ while (adjustment < 0) {
+ i = ans_find_largest(out_pdf, out_syms);
+ --out_pdf[i];
+ assert(out_pdf[i] > 0);
+ adjustment++;
+ }
+ }
+}
+
+// rANS with normalization
+// sym->prob takes the place of l_s from the paper
+// ans_p10_precision is m
+static INLINE void rans_write(struct AnsCoder *ans,
+ const struct rans_sym *const sym) {
+ const AnsP10 p = sym->prob;
+ while (ans->state >= l_base / rans_precision * io_base * p) {
+ ans->buf[ans->buf_offset++] = ans->state % io_base;
+ ans->state /= io_base;
+ }
+ ans->state =
+ (ans->state / p) * rans_precision + ans->state % p + sym->cum_prob;
+}
+
+static INLINE void fetch_sym(struct rans_dec_sym *out, const rans_dec_lut cdf,
+ AnsP10 rem) {
+ int i = 0;
+ // TODO(skal): if critical, could be a binary search.
+ // Or, better, an O(1) alias-table.
+ while (rem >= cdf[i]) {
+ ++i;
+ }
+ out->val = i - 1;
+ out->prob = (AnsP10)(cdf[i] - cdf[i - 1]);
+ out->cum_prob = (AnsP10)cdf[i - 1];
+}
+
+static INLINE int rans_read(struct AnsDecoder *ans, const rans_dec_lut tab) {
+ unsigned rem;
+ unsigned quo;
+ struct rans_dec_sym sym;
+ while (ans->state < l_base && ans->buf_offset > 0) {
+ ans->state = ans->state * io_base + ans->buf[--ans->buf_offset];
+ }
+ quo = ans->state / rans_precision;
+ rem = ans->state % rans_precision;
+ fetch_sym(&sym, tab, rem);
+ ans->state = quo * sym.prob + rem - sym.cum_prob;
+ return sym.val;
+}
+
+static INLINE int ans_read_init(struct AnsDecoder *const ans,
+ const uint8_t *const buf, int offset) {
+ unsigned x;
+ if (offset < 1) return 1;
+ ans->buf = buf;
+ x = buf[offset - 1] >> 6;
+ if (x == 0) {
+ ans->buf_offset = offset - 1;
+ ans->state = buf[offset - 1] & 0x3F;
+ } else if (x == 1) {
+ if (offset < 2) return 1;
+ ans->buf_offset = offset - 2;
+ ans->state = mem_get_le16(buf + offset - 2) & 0x3FFF;
+ } else if (x == 2) {
+ if (offset < 3) return 1;
+ ans->buf_offset = offset - 3;
+ ans->state = mem_get_le24(buf + offset - 3) & 0x3FFFFF;
+ } else {
+ // x == 3 implies this byte is a superframe marker
+ return 1;
+ }
+ ans->state += l_base;
+ if (ans->state >= l_base * io_base) return 1;
+ return 0;
+}
+
+static INLINE int ans_read_end(struct AnsDecoder *const ans) {
+ return ans->state == l_base;
+}
+
+static INLINE int ans_reader_has_error(const struct AnsDecoder *const ans) {
+ return ans->state < l_base && ans->buf_offset == 0;
+}
+#undef ANS_DIVREM
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+#endif // VP10_COMMON_ANS_H_
diff --git a/av1/common/arm/neon/iht4x4_add_neon.c b/av1/common/arm/neon/iht4x4_add_neon.c
new file mode 100644
index 0000000..600e66b
--- /dev/null
+++ b/av1/common/arm/neon/iht4x4_add_neon.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "av1/common/common.h"
+
+static int16_t sinpi_1_9 = 0x14a3;
+static int16_t sinpi_2_9 = 0x26c9;
+static int16_t sinpi_3_9 = 0x3441;
+static int16_t sinpi_4_9 = 0x3b6c;
+static int16_t cospi_8_64 = 0x3b21;
+static int16_t cospi_16_64 = 0x2d41;
+static int16_t cospi_24_64 = 0x187e;
+
+static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
+ int32x4_t q8s32, q9s32;
+ int16x4x2_t d0x2s16, d1x2s16;
+ int32x4x2_t q0x2s32;
+
+ d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16));
+ d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16));
+
+ q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]));
+ q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]));
+ q0x2s32 = vtrnq_s32(q8s32, q9s32);
+
+ *q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]);
+ *q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]);
+ return;
+}
+
+static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16,
+ int16x4_t *d2s16) {
+ *d0s16 = vdup_n_s16(cospi_8_64);
+ *d1s16 = vdup_n_s16(cospi_16_64);
+ *d2s16 = vdup_n_s16(cospi_24_64);
+ return;
+}
+
+static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16,
+ int16x4_t *d5s16, int16x8_t *q3s16) {
+ *d3s16 = vdup_n_s16(sinpi_1_9);
+ *d4s16 = vdup_n_s16(sinpi_2_9);
+ *q3s16 = vdupq_n_s16(sinpi_3_9);
+ *d5s16 = vdup_n_s16(sinpi_4_9);
+ return;
+}
+
+static INLINE void IDCT4x4_1D(int16x4_t *d0s16, int16x4_t *d1s16,
+ int16x4_t *d2s16, int16x8_t *q8s16,
+ int16x8_t *q9s16) {
+ int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16;
+ int16x4_t d26s16, d27s16, d28s16, d29s16;
+ int32x4_t q10s32, q13s32, q14s32, q15s32;
+ int16x8_t q13s16, q14s16;
+
+ d16s16 = vget_low_s16(*q8s16);
+ d17s16 = vget_high_s16(*q8s16);
+ d18s16 = vget_low_s16(*q9s16);
+ d19s16 = vget_high_s16(*q9s16);
+
+ d23s16 = vadd_s16(d16s16, d18s16);
+ d24s16 = vsub_s16(d16s16, d18s16);
+
+ q15s32 = vmull_s16(d17s16, *d2s16);
+ q10s32 = vmull_s16(d17s16, *d0s16);
+ q13s32 = vmull_s16(d23s16, *d1s16);
+ q14s32 = vmull_s16(d24s16, *d1s16);
+ q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16);
+ q10s32 = vmlal_s16(q10s32, d19s16, *d2s16);
+
+ d26s16 = vqrshrn_n_s32(q13s32, 14);
+ d27s16 = vqrshrn_n_s32(q14s32, 14);
+ d29s16 = vqrshrn_n_s32(q15s32, 14);
+ d28s16 = vqrshrn_n_s32(q10s32, 14);
+
+ q13s16 = vcombine_s16(d26s16, d27s16);
+ q14s16 = vcombine_s16(d28s16, d29s16);
+ *q8s16 = vaddq_s16(q13s16, q14s16);
+ *q9s16 = vsubq_s16(q13s16, q14s16);
+ *q9s16 = vcombine_s16(vget_high_s16(*q9s16), vget_low_s16(*q9s16)); // vswp
+ return;
+}
+
+static INLINE void IADST4x4_1D(int16x4_t *d3s16, int16x4_t *d4s16,
+ int16x4_t *d5s16, int16x8_t *q3s16,
+ int16x8_t *q8s16, int16x8_t *q9s16) {
+ int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16;
+ int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
+
+ d6s16 = vget_low_s16(*q3s16);
+
+ d16s16 = vget_low_s16(*q8s16);
+ d17s16 = vget_high_s16(*q8s16);
+ d18s16 = vget_low_s16(*q9s16);
+ d19s16 = vget_high_s16(*q9s16);
+
+ q10s32 = vmull_s16(*d3s16, d16s16);
+ q11s32 = vmull_s16(*d4s16, d16s16);
+ q12s32 = vmull_s16(d6s16, d17s16);
+ q13s32 = vmull_s16(*d5s16, d18s16);
+ q14s32 = vmull_s16(*d3s16, d18s16);
+ q15s32 = vmovl_s16(d16s16);
+ q15s32 = vaddw_s16(q15s32, d19s16);
+ q8s32 = vmull_s16(*d4s16, d19s16);
+ q15s32 = vsubw_s16(q15s32, d18s16);
+ q9s32 = vmull_s16(*d5s16, d19s16);
+
+ q10s32 = vaddq_s32(q10s32, q13s32);
+ q10s32 = vaddq_s32(q10s32, q8s32);
+ q11s32 = vsubq_s32(q11s32, q14s32);
+ q8s32 = vdupq_n_s32(sinpi_3_9);
+ q11s32 = vsubq_s32(q11s32, q9s32);
+ q15s32 = vmulq_s32(q15s32, q8s32);
+
+ q13s32 = vaddq_s32(q10s32, q12s32);
+ q10s32 = vaddq_s32(q10s32, q11s32);
+ q14s32 = vaddq_s32(q11s32, q12s32);
+ q10s32 = vsubq_s32(q10s32, q12s32);
+
+ d16s16 = vqrshrn_n_s32(q13s32, 14);
+ d17s16 = vqrshrn_n_s32(q14s32, 14);
+ d18s16 = vqrshrn_n_s32(q15s32, 14);
+ d19s16 = vqrshrn_n_s32(q10s32, 14);
+
+ *q8s16 = vcombine_s16(d16s16, d17s16);
+ *q9s16 = vcombine_s16(d18s16, d19s16);
+ return;
+}
+
+void vp10_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
+ int dest_stride, int tx_type) {
+ uint8x8_t d26u8, d27u8;
+ int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16;
+ uint32x2_t d26u32, d27u32;
+ int16x8_t q3s16, q8s16, q9s16;
+ uint16x8_t q8u16, q9u16;
+
+ d26u32 = d27u32 = vdup_n_u32(0);
+
+ q8s16 = vld1q_s16(input);
+ q9s16 = vld1q_s16(input + 8);
+
+ TRANSPOSE4X4(&q8s16, &q9s16);
+
+ switch (tx_type) {
+ case 0: // idct_idct is not supported. Fall back to C
+ vp10_iht4x4_16_add_c(input, dest, dest_stride, tx_type);
+ return;
+ break;
+ case 1: // iadst_idct
+ // generate constants
+ GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
+ GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
+
+ // first transform rows
+ IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
+
+ // transpose the matrix
+ TRANSPOSE4X4(&q8s16, &q9s16);
+
+ // then transform columns
+ IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
+ break;
+ case 2: // idct_iadst
+ // generate constantsyy
+ GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
+ GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
+
+ // first transform rows
+ IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
+
+ // transpose the matrix
+ TRANSPOSE4X4(&q8s16, &q9s16);
+
+ // then transform columns
+ IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
+ break;
+ case 3: // iadst_iadst
+ // generate constants
+ GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
+
+ // first transform rows
+ IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
+
+ // transpose the matrix
+ TRANSPOSE4X4(&q8s16, &q9s16);
+
+ // then transform columns
+ IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
+ break;
+ default: // iadst_idct
+ assert(0);
+ break;
+ }
+
+ q8s16 = vrshrq_n_s16(q8s16, 4);
+ q9s16 = vrshrq_n_s16(q9s16, 4);
+
+ d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0);
+ dest += dest_stride;
+ d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1);
+ dest += dest_stride;
+ d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0);
+ dest += dest_stride;
+ d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1);
+
+ q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32));
+ q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32));
+
+ d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
+ d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
+
+ vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1);
+ dest -= dest_stride;
+ vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0);
+ dest -= dest_stride;
+ vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1);
+ dest -= dest_stride;
+ vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0);
+ return;
+}
diff --git a/av1/common/arm/neon/iht8x8_add_neon.c b/av1/common/arm/neon/iht8x8_add_neon.c
new file mode 100644
index 0000000..ff5578d
--- /dev/null
+++ b/av1/common/arm/neon/iht8x8_add_neon.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "av1/common/common.h"
+
+static int16_t cospi_2_64 = 16305;
+static int16_t cospi_4_64 = 16069;
+static int16_t cospi_6_64 = 15679;
+static int16_t cospi_8_64 = 15137;
+static int16_t cospi_10_64 = 14449;
+static int16_t cospi_12_64 = 13623;
+static int16_t cospi_14_64 = 12665;
+static int16_t cospi_16_64 = 11585;
+static int16_t cospi_18_64 = 10394;
+static int16_t cospi_20_64 = 9102;
+static int16_t cospi_22_64 = 7723;
+static int16_t cospi_24_64 = 6270;
+static int16_t cospi_26_64 = 4756;
+static int16_t cospi_28_64 = 3196;
+static int16_t cospi_30_64 = 1606;
+
+static INLINE void TRANSPOSE8X8(int16x8_t *q8s16, int16x8_t *q9s16,
+ int16x8_t *q10s16, int16x8_t *q11s16,
+ int16x8_t *q12s16, int16x8_t *q13s16,
+ int16x8_t *q14s16, int16x8_t *q15s16) {
+ int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
+ int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
+ int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32;
+ int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16;
+
+ d16s16 = vget_low_s16(*q8s16);
+ d17s16 = vget_high_s16(*q8s16);
+ d18s16 = vget_low_s16(*q9s16);
+ d19s16 = vget_high_s16(*q9s16);
+ d20s16 = vget_low_s16(*q10s16);
+ d21s16 = vget_high_s16(*q10s16);
+ d22s16 = vget_low_s16(*q11s16);
+ d23s16 = vget_high_s16(*q11s16);
+ d24s16 = vget_low_s16(*q12s16);
+ d25s16 = vget_high_s16(*q12s16);
+ d26s16 = vget_low_s16(*q13s16);
+ d27s16 = vget_high_s16(*q13s16);
+ d28s16 = vget_low_s16(*q14s16);
+ d29s16 = vget_high_s16(*q14s16);
+ d30s16 = vget_low_s16(*q15s16);
+ d31s16 = vget_high_s16(*q15s16);
+
+ *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24
+ *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26
+ *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28
+ *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30
+ *q12s16 = vcombine_s16(d17s16, d25s16);
+ *q13s16 = vcombine_s16(d19s16, d27s16);
+ *q14s16 = vcombine_s16(d21s16, d29s16);
+ *q15s16 = vcombine_s16(d23s16, d31s16);
+
+ q0x2s32 =
+ vtrnq_s32(vreinterpretq_s32_s16(*q8s16), vreinterpretq_s32_s16(*q10s16));
+ q1x2s32 =
+ vtrnq_s32(vreinterpretq_s32_s16(*q9s16), vreinterpretq_s32_s16(*q11s16));
+ q2x2s32 =
+ vtrnq_s32(vreinterpretq_s32_s16(*q12s16), vreinterpretq_s32_s16(*q14s16));
+ q3x2s32 =
+ vtrnq_s32(vreinterpretq_s32_s16(*q13s16), vreinterpretq_s32_s16(*q15s16));
+
+ q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8
+ vreinterpretq_s16_s32(q1x2s32.val[0])); // q9
+ q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10
+ vreinterpretq_s16_s32(q1x2s32.val[1])); // q11
+ q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12
+ vreinterpretq_s16_s32(q3x2s32.val[0])); // q13
+ q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14
+ vreinterpretq_s16_s32(q3x2s32.val[1])); // q15
+
+ *q8s16 = q0x2s16.val[0];
+ *q9s16 = q0x2s16.val[1];
+ *q10s16 = q1x2s16.val[0];
+ *q11s16 = q1x2s16.val[1];
+ *q12s16 = q2x2s16.val[0];
+ *q13s16 = q2x2s16.val[1];
+ *q14s16 = q3x2s16.val[0];
+ *q15s16 = q3x2s16.val[1];
+ return;
+}
+
+static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
+ int16x8_t *q10s16, int16x8_t *q11s16,
+ int16x8_t *q12s16, int16x8_t *q13s16,
+ int16x8_t *q14s16, int16x8_t *q15s16) {
+ int16x4_t d0s16, d1s16, d2s16, d3s16;
+ int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
+ int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
+ int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
+ int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
+ int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
+ int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
+
+ d0s16 = vdup_n_s16(cospi_28_64);
+ d1s16 = vdup_n_s16(cospi_4_64);
+ d2s16 = vdup_n_s16(cospi_12_64);
+ d3s16 = vdup_n_s16(cospi_20_64);
+
+ d16s16 = vget_low_s16(*q8s16);
+ d17s16 = vget_high_s16(*q8s16);
+ d18s16 = vget_low_s16(*q9s16);
+ d19s16 = vget_high_s16(*q9s16);
+ d20s16 = vget_low_s16(*q10s16);
+ d21s16 = vget_high_s16(*q10s16);
+ d22s16 = vget_low_s16(*q11s16);
+ d23s16 = vget_high_s16(*q11s16);
+ d24s16 = vget_low_s16(*q12s16);
+ d25s16 = vget_high_s16(*q12s16);
+ d26s16 = vget_low_s16(*q13s16);
+ d27s16 = vget_high_s16(*q13s16);
+ d28s16 = vget_low_s16(*q14s16);
+ d29s16 = vget_high_s16(*q14s16);
+ d30s16 = vget_low_s16(*q15s16);
+ d31s16 = vget_high_s16(*q15s16);
+
+ q2s32 = vmull_s16(d18s16, d0s16);
+ q3s32 = vmull_s16(d19s16, d0s16);
+ q5s32 = vmull_s16(d26s16, d2s16);
+ q6s32 = vmull_s16(d27s16, d2s16);
+
+ q2s32 = vmlsl_s16(q2s32, d30s16, d1s16);
+ q3s32 = vmlsl_s16(q3s32, d31s16, d1s16);
+ q5s32 = vmlsl_s16(q5s32, d22s16, d3s16);
+ q6s32 = vmlsl_s16(q6s32, d23s16, d3s16);
+
+ d8s16 = vqrshrn_n_s32(q2s32, 14);
+ d9s16 = vqrshrn_n_s32(q3s32, 14);
+ d10s16 = vqrshrn_n_s32(q5s32, 14);
+ d11s16 = vqrshrn_n_s32(q6s32, 14);
+ q4s16 = vcombine_s16(d8s16, d9s16);
+ q5s16 = vcombine_s16(d10s16, d11s16);
+
+ q2s32 = vmull_s16(d18s16, d1s16);
+ q3s32 = vmull_s16(d19s16, d1s16);
+ q9s32 = vmull_s16(d26s16, d3s16);
+ q13s32 = vmull_s16(d27s16, d3s16);
+
+ q2s32 = vmlal_s16(q2s32, d30s16, d0s16);
+ q3s32 = vmlal_s16(q3s32, d31s16, d0s16);
+ q9s32 = vmlal_s16(q9s32, d22s16, d2s16);
+ q13s32 = vmlal_s16(q13s32, d23s16, d2s16);
+
+ d14s16 = vqrshrn_n_s32(q2s32, 14);
+ d15s16 = vqrshrn_n_s32(q3s32, 14);
+ d12s16 = vqrshrn_n_s32(q9s32, 14);
+ d13s16 = vqrshrn_n_s32(q13s32, 14);
+ q6s16 = vcombine_s16(d12s16, d13s16);
+ q7s16 = vcombine_s16(d14s16, d15s16);
+
+ d0s16 = vdup_n_s16(cospi_16_64);
+
+ q2s32 = vmull_s16(d16s16, d0s16);
+ q3s32 = vmull_s16(d17s16, d0s16);
+ q13s32 = vmull_s16(d16s16, d0s16);
+ q15s32 = vmull_s16(d17s16, d0s16);
+
+ q2s32 = vmlal_s16(q2s32, d24s16, d0s16);
+ q3s32 = vmlal_s16(q3s32, d25s16, d0s16);
+ q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
+ q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
+
+ d0s16 = vdup_n_s16(cospi_24_64);
+ d1s16 = vdup_n_s16(cospi_8_64);
+
+ d18s16 = vqrshrn_n_s32(q2s32, 14);
+ d19s16 = vqrshrn_n_s32(q3s32, 14);
+ d22s16 = vqrshrn_n_s32(q13s32, 14);
+ d23s16 = vqrshrn_n_s32(q15s32, 14);
+ *q9s16 = vcombine_s16(d18s16, d19s16);
+ *q11s16 = vcombine_s16(d22s16, d23s16);
+
+ q2s32 = vmull_s16(d20s16, d0s16);
+ q3s32 = vmull_s16(d21s16, d0s16);
+ q8s32 = vmull_s16(d20s16, d1s16);
+ q12s32 = vmull_s16(d21s16, d1s16);
+
+ q2s32 = vmlsl_s16(q2s32, d28s16, d1s16);
+ q3s32 = vmlsl_s16(q3s32, d29s16, d1s16);
+ q8s32 = vmlal_s16(q8s32, d28s16, d0s16);
+ q12s32 = vmlal_s16(q12s32, d29s16, d0s16);
+
+ d26s16 = vqrshrn_n_s32(q2s32, 14);
+ d27s16 = vqrshrn_n_s32(q3s32, 14);
+ d30s16 = vqrshrn_n_s32(q8s32, 14);
+ d31s16 = vqrshrn_n_s32(q12s32, 14);
+ *q13s16 = vcombine_s16(d26s16, d27s16);
+ *q15s16 = vcombine_s16(d30s16, d31s16);
+
+ q0s16 = vaddq_s16(*q9s16, *q15s16);
+ q1s16 = vaddq_s16(*q11s16, *q13s16);
+ q2s16 = vsubq_s16(*q11s16, *q13s16);
+ q3s16 = vsubq_s16(*q9s16, *q15s16);
+
+ *q13s16 = vsubq_s16(q4s16, q5s16);
+ q4s16 = vaddq_s16(q4s16, q5s16);
+ *q14s16 = vsubq_s16(q7s16, q6s16);
+ q7s16 = vaddq_s16(q7s16, q6s16);
+ d26s16 = vget_low_s16(*q13s16);
+ d27s16 = vget_high_s16(*q13s16);
+ d28s16 = vget_low_s16(*q14s16);
+ d29s16 = vget_high_s16(*q14s16);
+
+ d16s16 = vdup_n_s16(cospi_16_64);
+
+ q9s32 = vmull_s16(d28s16, d16s16);
+ q10s32 = vmull_s16(d29s16, d16s16);
+ q11s32 = vmull_s16(d28s16, d16s16);
+ q12s32 = vmull_s16(d29s16, d16s16);
+
+ q9s32 = vmlsl_s16(q9s32, d26s16, d16s16);
+ q10s32 = vmlsl_s16(q10s32, d27s16, d16s16);
+ q11s32 = vmlal_s16(q11s32, d26s16, d16s16);
+ q12s32 = vmlal_s16(q12s32, d27s16, d16s16);
+
+ d10s16 = vqrshrn_n_s32(q9s32, 14);
+ d11s16 = vqrshrn_n_s32(q10s32, 14);
+ d12s16 = vqrshrn_n_s32(q11s32, 14);
+ d13s16 = vqrshrn_n_s32(q12s32, 14);
+ q5s16 = vcombine_s16(d10s16, d11s16);
+ q6s16 = vcombine_s16(d12s16, d13s16);
+
+ *q8s16 = vaddq_s16(q0s16, q7s16);
+ *q9s16 = vaddq_s16(q1s16, q6s16);
+ *q10s16 = vaddq_s16(q2s16, q5s16);
+ *q11s16 = vaddq_s16(q3s16, q4s16);
+ *q12s16 = vsubq_s16(q3s16, q4s16);
+ *q13s16 = vsubq_s16(q2s16, q5s16);
+ *q14s16 = vsubq_s16(q1s16, q6s16);
+ *q15s16 = vsubq_s16(q0s16, q7s16);
+ return;
+}
+
+static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
+ int16x8_t *q10s16, int16x8_t *q11s16,
+ int16x8_t *q12s16, int16x8_t *q13s16,
+ int16x8_t *q14s16, int16x8_t *q15s16) {
+ int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16;
+ int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
+ int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
+ int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
+ int16x8_t q2s16, q4s16, q5s16, q6s16;
+ int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32;
+ int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
+
+ d16s16 = vget_low_s16(*q8s16);
+ d17s16 = vget_high_s16(*q8s16);
+ d18s16 = vget_low_s16(*q9s16);
+ d19s16 = vget_high_s16(*q9s16);
+ d20s16 = vget_low_s16(*q10s16);
+ d21s16 = vget_high_s16(*q10s16);
+ d22s16 = vget_low_s16(*q11s16);
+ d23s16 = vget_high_s16(*q11s16);
+ d24s16 = vget_low_s16(*q12s16);
+ d25s16 = vget_high_s16(*q12s16);
+ d26s16 = vget_low_s16(*q13s16);
+ d27s16 = vget_high_s16(*q13s16);
+ d28s16 = vget_low_s16(*q14s16);
+ d29s16 = vget_high_s16(*q14s16);
+ d30s16 = vget_low_s16(*q15s16);
+ d31s16 = vget_high_s16(*q15s16);
+
+ d14s16 = vdup_n_s16(cospi_2_64);
+ d15s16 = vdup_n_s16(cospi_30_64);
+
+ q1s32 = vmull_s16(d30s16, d14s16);
+ q2s32 = vmull_s16(d31s16, d14s16);
+ q3s32 = vmull_s16(d30s16, d15s16);
+ q4s32 = vmull_s16(d31s16, d15s16);
+
+ d30s16 = vdup_n_s16(cospi_18_64);
+ d31s16 = vdup_n_s16(cospi_14_64);
+
+ q1s32 = vmlal_s16(q1s32, d16s16, d15s16);
+ q2s32 = vmlal_s16(q2s32, d17s16, d15s16);
+ q3s32 = vmlsl_s16(q3s32, d16s16, d14s16);
+ q4s32 = vmlsl_s16(q4s32, d17s16, d14s16);
+
+ q5s32 = vmull_s16(d22s16, d30s16);
+ q6s32 = vmull_s16(d23s16, d30s16);
+ q7s32 = vmull_s16(d22s16, d31s16);
+ q8s32 = vmull_s16(d23s16, d31s16);
+
+ q5s32 = vmlal_s16(q5s32, d24s16, d31s16);
+ q6s32 = vmlal_s16(q6s32, d25s16, d31s16);
+ q7s32 = vmlsl_s16(q7s32, d24s16, d30s16);
+ q8s32 = vmlsl_s16(q8s32, d25s16, d30s16);
+
+ q11s32 = vaddq_s32(q1s32, q5s32);
+ q12s32 = vaddq_s32(q2s32, q6s32);
+ q1s32 = vsubq_s32(q1s32, q5s32);
+ q2s32 = vsubq_s32(q2s32, q6s32);
+
+ d22s16 = vqrshrn_n_s32(q11s32, 14);
+ d23s16 = vqrshrn_n_s32(q12s32, 14);
+ *q11s16 = vcombine_s16(d22s16, d23s16);
+
+ q12s32 = vaddq_s32(q3s32, q7s32);
+ q15s32 = vaddq_s32(q4s32, q8s32);
+ q3s32 = vsubq_s32(q3s32, q7s32);
+ q4s32 = vsubq_s32(q4s32, q8s32);
+
+ d2s16 = vqrshrn_n_s32(q1s32, 14);
+ d3s16 = vqrshrn_n_s32(q2s32, 14);
+ d24s16 = vqrshrn_n_s32(q12s32, 14);
+ d25s16 = vqrshrn_n_s32(q15s32, 14);
+ d6s16 = vqrshrn_n_s32(q3s32, 14);
+ d7s16 = vqrshrn_n_s32(q4s32, 14);
+ *q12s16 = vcombine_s16(d24s16, d25s16);
+
+ d0s16 = vdup_n_s16(cospi_10_64);
+ d1s16 = vdup_n_s16(cospi_22_64);
+ q4s32 = vmull_s16(d26s16, d0s16);
+ q5s32 = vmull_s16(d27s16, d0s16);
+ q2s32 = vmull_s16(d26s16, d1s16);
+ q6s32 = vmull_s16(d27s16, d1s16);
+
+ d30s16 = vdup_n_s16(cospi_26_64);
+ d31s16 = vdup_n_s16(cospi_6_64);
+
+ q4s32 = vmlal_s16(q4s32, d20s16, d1s16);
+ q5s32 = vmlal_s16(q5s32, d21s16, d1s16);
+ q2s32 = vmlsl_s16(q2s32, d20s16, d0s16);
+ q6s32 = vmlsl_s16(q6s32, d21s16, d0s16);
+
+ q0s32 = vmull_s16(d18s16, d30s16);
+ q13s32 = vmull_s16(d19s16, d30s16);
+
+ q0s32 = vmlal_s16(q0s32, d28s16, d31s16);
+ q13s32 = vmlal_s16(q13s32, d29s16, d31s16);
+
+ q10s32 = vmull_s16(d18s16, d31s16);
+ q9s32 = vmull_s16(d19s16, d31s16);
+
+ q10s32 = vmlsl_s16(q10s32, d28s16, d30s16);
+ q9s32 = vmlsl_s16(q9s32, d29s16, d30s16);
+
+ q14s32 = vaddq_s32(q2s32, q10s32);
+ q15s32 = vaddq_s32(q6s32, q9s32);
+ q2s32 = vsubq_s32(q2s32, q10s32);
+ q6s32 = vsubq_s32(q6s32, q9s32);
+
+ d28s16 = vqrshrn_n_s32(q14s32, 14);
+ d29s16 = vqrshrn_n_s32(q15s32, 14);
+ d4s16 = vqrshrn_n_s32(q2s32, 14);
+ d5s16 = vqrshrn_n_s32(q6s32, 14);
+ *q14s16 = vcombine_s16(d28s16, d29s16);
+
+ q9s32 = vaddq_s32(q4s32, q0s32);
+ q10s32 = vaddq_s32(q5s32, q13s32);
+ q4s32 = vsubq_s32(q4s32, q0s32);
+ q5s32 = vsubq_s32(q5s32, q13s32);
+
+ d30s16 = vdup_n_s16(cospi_8_64);
+ d31s16 = vdup_n_s16(cospi_24_64);
+
+ d18s16 = vqrshrn_n_s32(q9s32, 14);
+ d19s16 = vqrshrn_n_s32(q10s32, 14);
+ d8s16 = vqrshrn_n_s32(q4s32, 14);
+ d9s16 = vqrshrn_n_s32(q5s32, 14);
+ *q9s16 = vcombine_s16(d18s16, d19s16);
+
+ q5s32 = vmull_s16(d2s16, d30s16);
+ q6s32 = vmull_s16(d3s16, d30s16);
+ q7s32 = vmull_s16(d2s16, d31s16);
+ q0s32 = vmull_s16(d3s16, d31s16);
+
+ q5s32 = vmlal_s16(q5s32, d6s16, d31s16);
+ q6s32 = vmlal_s16(q6s32, d7s16, d31s16);
+ q7s32 = vmlsl_s16(q7s32, d6s16, d30s16);
+ q0s32 = vmlsl_s16(q0s32, d7s16, d30s16);
+
+ q1s32 = vmull_s16(d4s16, d30s16);
+ q3s32 = vmull_s16(d5s16, d30s16);
+ q10s32 = vmull_s16(d4s16, d31s16);
+ q2s32 = vmull_s16(d5s16, d31s16);
+
+ q1s32 = vmlsl_s16(q1s32, d8s16, d31s16);
+ q3s32 = vmlsl_s16(q3s32, d9s16, d31s16);
+ q10s32 = vmlal_s16(q10s32, d8s16, d30s16);
+ q2s32 = vmlal_s16(q2s32, d9s16, d30s16);
+
+ *q8s16 = vaddq_s16(*q11s16, *q9s16);
+ *q11s16 = vsubq_s16(*q11s16, *q9s16);
+ q4s16 = vaddq_s16(*q12s16, *q14s16);
+ *q12s16 = vsubq_s16(*q12s16, *q14s16);
+
+ q14s32 = vaddq_s32(q5s32, q1s32);
+ q15s32 = vaddq_s32(q6s32, q3s32);
+ q5s32 = vsubq_s32(q5s32, q1s32);
+ q6s32 = vsubq_s32(q6s32, q3s32);
+
+ d18s16 = vqrshrn_n_s32(q14s32, 14);
+ d19s16 = vqrshrn_n_s32(q15s32, 14);
+ d10s16 = vqrshrn_n_s32(q5s32, 14);
+ d11s16 = vqrshrn_n_s32(q6s32, 14);
+ *q9s16 = vcombine_s16(d18s16, d19s16);
+
+ q1s32 = vaddq_s32(q7s32, q10s32);
+ q3s32 = vaddq_s32(q0s32, q2s32);
+ q7s32 = vsubq_s32(q7s32, q10s32);
+ q0s32 = vsubq_s32(q0s32, q2s32);
+
+ d28s16 = vqrshrn_n_s32(q1s32, 14);
+ d29s16 = vqrshrn_n_s32(q3s32, 14);
+ d14s16 = vqrshrn_n_s32(q7s32, 14);
+ d15s16 = vqrshrn_n_s32(q0s32, 14);
+ *q14s16 = vcombine_s16(d28s16, d29s16);
+
+ d30s16 = vdup_n_s16(cospi_16_64);
+
+ d22s16 = vget_low_s16(*q11s16);
+ d23s16 = vget_high_s16(*q11s16);
+ q2s32 = vmull_s16(d22s16, d30s16);
+ q3s32 = vmull_s16(d23s16, d30s16);
+ q13s32 = vmull_s16(d22s16, d30s16);
+ q1s32 = vmull_s16(d23s16, d30s16);
+
+ d24s16 = vget_low_s16(*q12s16);
+ d25s16 = vget_high_s16(*q12s16);
+ q2s32 = vmlal_s16(q2s32, d24s16, d30s16);
+ q3s32 = vmlal_s16(q3s32, d25s16, d30s16);
+ q13s32 = vmlsl_s16(q13s32, d24s16, d30s16);
+ q1s32 = vmlsl_s16(q1s32, d25s16, d30s16);
+
+ d4s16 = vqrshrn_n_s32(q2s32, 14);
+ d5s16 = vqrshrn_n_s32(q3s32, 14);
+ d24s16 = vqrshrn_n_s32(q13s32, 14);
+ d25s16 = vqrshrn_n_s32(q1s32, 14);
+ q2s16 = vcombine_s16(d4s16, d5s16);
+ *q12s16 = vcombine_s16(d24s16, d25s16);
+
+ q13s32 = vmull_s16(d10s16, d30s16);
+ q1s32 = vmull_s16(d11s16, d30s16);
+ q11s32 = vmull_s16(d10s16, d30s16);
+ q0s32 = vmull_s16(d11s16, d30s16);
+
+ q13s32 = vmlal_s16(q13s32, d14s16, d30s16);
+ q1s32 = vmlal_s16(q1s32, d15s16, d30s16);
+ q11s32 = vmlsl_s16(q11s32, d14s16, d30s16);
+ q0s32 = vmlsl_s16(q0s32, d15s16, d30s16);
+
+ d20s16 = vqrshrn_n_s32(q13s32, 14);
+ d21s16 = vqrshrn_n_s32(q1s32, 14);
+ d12s16 = vqrshrn_n_s32(q11s32, 14);
+ d13s16 = vqrshrn_n_s32(q0s32, 14);
+ *q10s16 = vcombine_s16(d20s16, d21s16);
+ q6s16 = vcombine_s16(d12s16, d13s16);
+
+ q5s16 = vdupq_n_s16(0);
+
+ *q9s16 = vsubq_s16(q5s16, *q9s16);
+ *q11s16 = vsubq_s16(q5s16, q2s16);
+ *q13s16 = vsubq_s16(q5s16, q6s16);
+ *q15s16 = vsubq_s16(q5s16, q4s16);
+ return;
+}
+
+void vp10_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest,
+ int dest_stride, int tx_type) {
+ int i;
+ uint8_t *d1, *d2;
+ uint8x8_t d0u8, d1u8, d2u8, d3u8;
+ uint64x1_t d0u64, d1u64, d2u64, d3u64;
+ int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
+ uint16x8_t q8u16, q9u16, q10u16, q11u16;
+
+ q8s16 = vld1q_s16(input);
+ q9s16 = vld1q_s16(input + 8);
+ q10s16 = vld1q_s16(input + 8 * 2);
+ q11s16 = vld1q_s16(input + 8 * 3);
+ q12s16 = vld1q_s16(input + 8 * 4);
+ q13s16 = vld1q_s16(input + 8 * 5);
+ q14s16 = vld1q_s16(input + 8 * 6);
+ q15s16 = vld1q_s16(input + 8 * 7);
+
+ TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+ &q15s16);
+
+ switch (tx_type) {
+ case 0: // idct_idct is not supported. Fall back to C
+ vp10_iht8x8_64_add_c(input, dest, dest_stride, tx_type);
+ return;
+ break;
+ case 1: // iadst_idct
+ // generate IDCT constants
+ // GENERATE_IDCT_CONSTANTS
+
+ // first transform rows
+ IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+ &q15s16);
+
+ // transpose the matrix
+ TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+ &q15s16);
+
+ // generate IADST constants
+ // GENERATE_IADST_CONSTANTS
+
+ // then transform columns
+ IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+ &q15s16);
+ break;
+ case 2: // idct_iadst
+ // generate IADST constants
+ // GENERATE_IADST_CONSTANTS
+
+ // first transform rows
+ IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+ &q15s16);
+
+ // transpose the matrix
+ TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+ &q15s16);
+
+ // generate IDCT constants
+ // GENERATE_IDCT_CONSTANTS
+
+ // then transform columns
+ IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+ &q15s16);
+ break;
+ case 3: // iadst_iadst
+ // generate IADST constants
+ // GENERATE_IADST_CONSTANTS
+
+ // first transform rows
+ IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+ &q15s16);
+
+ // transpose the matrix
+ TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+ &q15s16);
+
+ // then transform columns
+ IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
+ &q15s16);
+ break;
+ default: // iadst_idct
+ assert(0);
+ break;
+ }
+
+ q8s16 = vrshrq_n_s16(q8s16, 5);
+ q9s16 = vrshrq_n_s16(q9s16, 5);
+ q10s16 = vrshrq_n_s16(q10s16, 5);
+ q11s16 = vrshrq_n_s16(q11s16, 5);
+ q12s16 = vrshrq_n_s16(q12s16, 5);
+ q13s16 = vrshrq_n_s16(q13s16, 5);
+ q14s16 = vrshrq_n_s16(q14s16, 5);
+ q15s16 = vrshrq_n_s16(q15s16, 5);
+
+ for (d1 = d2 = dest, i = 0; i < 2; i++) {
+ if (i != 0) {
+ q8s16 = q12s16;
+ q9s16 = q13s16;
+ q10s16 = q14s16;
+ q11s16 = q15s16;
+ }
+
+ d0u64 = vld1_u64((uint64_t *)d1);
+ d1 += dest_stride;
+ d1u64 = vld1_u64((uint64_t *)d1);
+ d1 += dest_stride;
+ d2u64 = vld1_u64((uint64_t *)d1);
+ d1 += dest_stride;
+ d3u64 = vld1_u64((uint64_t *)d1);
+ d1 += dest_stride;
+
+ q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
+ q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
+ q10u16 =
+ vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
+ q11u16 =
+ vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
+
+ d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
+ d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
+ d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
+ d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
+
+ vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
+ d2 += dest_stride;
+ vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
+ d2 += dest_stride;
+ vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
+ d2 += dest_stride;
+ vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
+ d2 += dest_stride;
+ }
+ return;
+}
diff --git a/av1/common/blockd.c b/av1/common/blockd.c
new file mode 100644
index 0000000..ee95271
--- /dev/null
+++ b/av1/common/blockd.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "aom_ports/system_state.h"
+
+#include "av1/common/blockd.h"
+
+PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi,
+ const MODE_INFO *left_mi, int b) {
+ if (b == 0 || b == 2) {
+ if (!left_mi || is_inter_block(&left_mi->mbmi)) return DC_PRED;
+
+ return get_y_mode(left_mi, b + 1);
+ } else {
+ assert(b == 1 || b == 3);
+ return cur_mi->bmi[b - 1].as_mode;
+ }
+}
+
+PREDICTION_MODE vp10_above_block_mode(const MODE_INFO *cur_mi,
+ const MODE_INFO *above_mi, int b) {
+ if (b == 0 || b == 1) {
+ if (!above_mi || is_inter_block(&above_mi->mbmi)) return DC_PRED;
+
+ return get_y_mode(above_mi, b + 2);
+ } else {
+ assert(b == 2 || b == 3);
+ return cur_mi->bmi[b - 2].as_mode;
+ }
+}
+
+void vp10_foreach_transformed_block_in_plane(
+ const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
+ foreach_transformed_block_visitor visit, void *arg) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
+ // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
+ // transform size varies per plane, look it up in a common way.
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ const uint8_t num_4x4_tw = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const uint8_t num_4x4_th = num_4x4_blocks_high_txsize_lookup[tx_size];
+ const int step = num_4x4_tw * num_4x4_th;
+ int i = 0, r, c;
+
+ // If mb_to_right_edge is < 0 we are in a situation in which
+ // the current block size extends into the UMV and we won't
+ // visit the sub blocks that are wholly within the UMV.
+ const int max_blocks_wide =
+ num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >>
+ (5 + pd->subsampling_x));
+ const int max_blocks_high =
+ num_4x4_h + (xd->mb_to_bottom_edge >= 0
+ ? 0
+ : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+ const int extra_step = ((num_4x4_w - max_blocks_wide) >>
+ num_4x4_blocks_wide_txsize_log2_lookup[tx_size]) *
+ step;
+
+ // Keep track of the row and column of the blocks we use so that we know
+ // if we are in the unrestricted motion border.
+ for (r = 0; r < max_blocks_high; r += num_4x4_th) {
+ // Skip visiting the sub blocks that are wholly within the UMV.
+ for (c = 0; c < max_blocks_wide; c += num_4x4_tw) {
+ visit(plane, i, r, c, plane_bsize, tx_size, arg);
+ i += step;
+ }
+ i += extra_step;
+ }
+}
+
+void vp10_foreach_transformed_block(const MACROBLOCKD *const xd,
+ BLOCK_SIZE bsize,
+ foreach_transformed_block_visitor visit,
+ void *arg) {
+ int plane;
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg);
+}
+
+void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
+ int aoff, int loff) {
+ ENTROPY_CONTEXT *const a = pd->above_context + aoff;
+ ENTROPY_CONTEXT *const l = pd->left_context + loff;
+ const int tx_w_in_blocks = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int tx_h_in_blocks = num_4x4_blocks_high_txsize_lookup[tx_size];
+
+ // above
+ if (has_eob && xd->mb_to_right_edge < 0) {
+ int i;
+ const int blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize] +
+ (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ int above_contexts = tx_w_in_blocks;
+ if (above_contexts + aoff > blocks_wide)
+ above_contexts = blocks_wide - aoff;
+
+ for (i = 0; i < above_contexts; ++i) a[i] = has_eob;
+ for (i = above_contexts; i < tx_w_in_blocks; ++i) a[i] = 0;
+ } else {
+ memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_w_in_blocks);
+ }
+
+ // left
+ if (has_eob && xd->mb_to_bottom_edge < 0) {
+ int i;
+ const int blocks_high = num_4x4_blocks_high_lookup[plane_bsize] +
+ (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+ int left_contexts = tx_h_in_blocks;
+ if (left_contexts + loff > blocks_high) left_contexts = blocks_high - loff;
+
+ for (i = 0; i < left_contexts; ++i) l[i] = has_eob;
+ for (i = left_contexts; i < tx_h_in_blocks; ++i) l[i] = 0;
+ } else {
+ memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_h_in_blocks);
+ }
+}
+
+void vp10_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) {
+ int i;
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].plane_type = i ? PLANE_TYPE_UV : PLANE_TYPE_Y;
+ xd->plane[i].subsampling_x = i ? ss_x : 0;
+ xd->plane[i].subsampling_y = i ? ss_y : 0;
+ }
+}
+
+#if CONFIG_EXT_INTRA
+const int16_t dr_intra_derivative[90] = {
+ 1, 14666, 7330, 4884, 3660, 2926, 2435, 2084, 1821, 1616, 1451, 1317, 1204,
+ 1108, 1026, 955, 892, 837, 787, 743, 703, 666, 633, 603, 574, 548,
+ 524, 502, 481, 461, 443, 426, 409, 394, 379, 365, 352, 339, 327,
+ 316, 305, 294, 284, 274, 265, 256, 247, 238, 230, 222, 214, 207,
+ 200, 192, 185, 179, 172, 166, 159, 153, 147, 141, 136, 130, 124,
+ 119, 113, 108, 103, 98, 93, 88, 83, 78, 73, 68, 63, 59,
+ 54, 49, 45, 40, 35, 31, 26, 22, 17, 13, 8, 4,
+};
+
+// Returns whether filter selection is needed for a given
+// intra prediction angle.
+int vp10_is_intra_filter_switchable(int angle) {
+ assert(angle > 0 && angle < 270);
+ if (angle % 45 == 0) return 0;
+ if (angle > 90 && angle < 180) {
+ return 1;
+ } else {
+ return ((angle < 90 ? dr_intra_derivative[angle]
+ : dr_intra_derivative[270 - angle]) &
+ 0xFF) > 0;
+ }
+}
+#endif // CONFIG_EXT_INTRA
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
new file mode 100644
index 0000000..4dcc1f0
--- /dev/null
+++ b/av1/common/blockd.h
@@ -0,0 +1,833 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_BLOCKD_H_
+#define VP10_COMMON_BLOCKD_H_
+
+#include "./vpx_config.h"
+
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_ports/mem.h"
+#include "aom_scale/yv12config.h"
+
+#include "av1/common/common_data.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/entropy.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/mv.h"
+#include "av1/common/scale.h"
+#include "av1/common/seg_common.h"
+#include "av1/common/tile_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_MB_PLANE 3
+
+typedef enum {
+ KEY_FRAME = 0,
+ INTER_FRAME = 1,
+ FRAME_TYPES,
+} FRAME_TYPE;
+
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+#define IsInterpolatingFilter(filter) (vp10_is_interpolating_filter(filter))
+#else
+#define IsInterpolatingFilter(filter) (1)
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+
+static INLINE int is_inter_mode(PREDICTION_MODE mode) {
+#if CONFIG_EXT_INTER
+ return mode >= NEARESTMV && mode <= NEW_NEWMV;
+#else
+ return mode >= NEARESTMV && mode <= NEWMV;
+#endif // CONFIG_EXT_INTER
+}
+
+#if CONFIG_EXT_INTER
+static INLINE int is_inter_singleref_mode(PREDICTION_MODE mode) {
+ return mode >= NEARESTMV && mode <= NEWFROMNEARMV;
+}
+
+static INLINE int is_inter_compound_mode(PREDICTION_MODE mode) {
+ return mode >= NEAREST_NEARESTMV && mode <= NEW_NEWMV;
+}
+
+static INLINE PREDICTION_MODE compound_ref0_mode(PREDICTION_MODE mode) {
+ static PREDICTION_MODE lut[MB_MODE_COUNT] = {
+ MB_MODE_COUNT, // DC_PRED 0
+ MB_MODE_COUNT, // V_PRED 1
+ MB_MODE_COUNT, // H_PRED 2
+ MB_MODE_COUNT, // D45_PRED 3
+ MB_MODE_COUNT, // D135_PRED 4
+ MB_MODE_COUNT, // D117_PRED 5
+ MB_MODE_COUNT, // D153_PRED 6
+ MB_MODE_COUNT, // D207_PRED 7
+ MB_MODE_COUNT, // D63_PRED 8
+ MB_MODE_COUNT, // TM_PRED 9
+ MB_MODE_COUNT, // NEARESTMV 10
+ MB_MODE_COUNT, // NEARMV 11
+ MB_MODE_COUNT, // ZEROMV 12
+ MB_MODE_COUNT, // NEWMV 13
+ MB_MODE_COUNT, // NEWFROMNEARMV 14
+ NEARESTMV, // NEAREST_NEARESTMV 15
+ NEARESTMV, // NEAREST_NEARMV 16
+ NEARMV, // NEAR_NEARESTMV 17
+ NEARMV, // NEAR_NEARMV 18
+ NEARESTMV, // NEAREST_NEWMV 19
+ NEWMV, // NEW_NEARESTMV 20
+ NEARMV, // NEAR_NEWMV 21
+ NEWMV, // NEW_NEARMV 22
+ ZEROMV, // ZERO_ZEROMV 23
+ NEWMV, // NEW_NEWMV 24
+ };
+ assert(is_inter_compound_mode(mode));
+ return lut[mode];
+}
+
+static INLINE PREDICTION_MODE compound_ref1_mode(PREDICTION_MODE mode) {
+ static PREDICTION_MODE lut[MB_MODE_COUNT] = {
+ MB_MODE_COUNT, // DC_PRED 0
+ MB_MODE_COUNT, // V_PRED 1
+ MB_MODE_COUNT, // H_PRED 2
+ MB_MODE_COUNT, // D45_PRED 3
+ MB_MODE_COUNT, // D135_PRED 4
+ MB_MODE_COUNT, // D117_PRED 5
+ MB_MODE_COUNT, // D153_PRED 6
+ MB_MODE_COUNT, // D207_PRED 7
+ MB_MODE_COUNT, // D63_PRED 8
+ MB_MODE_COUNT, // TM_PRED 9
+ MB_MODE_COUNT, // NEARESTMV 10
+ MB_MODE_COUNT, // NEARMV 11
+ MB_MODE_COUNT, // ZEROMV 12
+ MB_MODE_COUNT, // NEWMV 13
+ MB_MODE_COUNT, // NEWFROMNEARMV 14
+ NEARESTMV, // NEAREST_NEARESTMV 15
+ NEARMV, // NEAREST_NEARMV 16
+ NEARESTMV, // NEAR_NEARESTMV 17
+ NEARMV, // NEAR_NEARMV 18
+ NEWMV, // NEAREST_NEWMV 19
+ NEARESTMV, // NEW_NEARESTMV 20
+ NEWMV, // NEAR_NEWMV 21
+ NEARMV, // NEW_NEARMV 22
+ ZEROMV, // ZERO_ZEROMV 23
+ NEWMV, // NEW_NEWMV 24
+ };
+ assert(is_inter_compound_mode(mode));
+ return lut[mode];
+}
+
+static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) {
+ return (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV ||
+ mode == NEAREST_NEWMV || mode == NEW_NEARESTMV ||
+ mode == NEAR_NEWMV || mode == NEW_NEARMV);
+}
+#else
+
+static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) {
+ return (mode == NEWMV);
+}
+#endif // CONFIG_EXT_INTER
+
+/* For keyframes, intra block modes are predicted by the (already decoded)
+ modes for the Y blocks to the left and above us; for interframes, there
+ is a single probability table. */
+
+typedef struct {
+ PREDICTION_MODE as_mode;
+ int_mv as_mv[2]; // first, second inter predictor motion vectors
+#if CONFIG_REF_MV
+ int_mv pred_mv_s8[2];
+#endif
+#if CONFIG_EXT_INTER
+ int_mv ref_mv[2];
+#endif // CONFIG_EXT_INTER
+} b_mode_info;
+
+typedef int8_t MV_REFERENCE_FRAME;
+
+typedef struct {
+ // Number of base colors for Y (0) and UV (1)
+ uint8_t palette_size[2];
+// Value of base colors for Y, U, and V
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint16_t palette_colors[3 * PALETTE_MAX_SIZE];
+#else
+ uint8_t palette_colors[3 * PALETTE_MAX_SIZE];
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ // Only used by encoder to store the color index of the top left pixel.
+ // TODO(huisu): move this to encoder
+ uint8_t palette_first_color_idx[2];
+} PALETTE_MODE_INFO;
+
+#if CONFIG_EXT_INTRA
+typedef struct {
+ // 1: an ext intra mode is used; 0: otherwise.
+ uint8_t use_ext_intra_mode[PLANE_TYPES];
+ EXT_INTRA_MODE ext_intra_mode[PLANE_TYPES];
+} EXT_INTRA_MODE_INFO;
+#endif // CONFIG_EXT_INTRA
+
+// This structure now relates to 8x8 block regions.
+typedef struct {
+ // Common for both INTER and INTRA blocks
+ BLOCK_SIZE sb_type;
+ PREDICTION_MODE mode;
+ TX_SIZE tx_size;
+#if CONFIG_VAR_TX
+ // TODO(jingning): This effectively assigned a separate entry for each
+ // 8x8 block. Apparently it takes much more space than needed.
+ TX_SIZE inter_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
+#endif
+ int8_t skip;
+ int8_t has_no_coeffs;
+ int8_t segment_id;
+#if CONFIG_SUPERTX
+ // Minimum of all segment IDs under the current supertx block.
+ int8_t segment_id_supertx;
+#endif // CONFIG_SUPERTX
+ int8_t seg_id_predicted; // valid only when temporal_update is enabled
+
+ // Only for INTRA blocks
+ PREDICTION_MODE uv_mode;
+ PALETTE_MODE_INFO palette_mode_info;
+
+// Only for INTER blocks
+#if CONFIG_DUAL_FILTER
+ INTERP_FILTER interp_filter[4];
+#else
+ INTERP_FILTER interp_filter;
+#endif
+ MV_REFERENCE_FRAME ref_frame[2];
+ TX_TYPE tx_type;
+
+#if CONFIG_EXT_INTRA
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+ int8_t angle_delta[2];
+ // To-Do (huisu): this may be replaced by interp_filter
+ INTRA_FILTER intra_filter;
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTER
+ INTERINTRA_MODE interintra_mode;
+ // TODO(debargha): Consolidate these flags
+ int use_wedge_interintra;
+ int interintra_wedge_index;
+ int interintra_wedge_sign;
+ int use_wedge_interinter;
+ int interinter_wedge_index;
+ int interinter_wedge_sign;
+#endif // CONFIG_EXT_INTER
+ MOTION_VARIATION motion_variation;
+ int_mv mv[2];
+ int_mv pred_mv[2];
+#if CONFIG_REF_MV
+ uint8_t ref_mv_idx;
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition;
+#endif
+#if CONFIG_NEW_QUANT
+ int dq_off_index;
+ int send_dq_bit;
+#endif // CONFIG_NEW_QUANT
+ /* deringing gain *per-superblock* */
+ int8_t dering_gain;
+} MB_MODE_INFO;
+
+typedef struct MODE_INFO {
+ MB_MODE_INFO mbmi;
+ b_mode_info bmi[4];
+} MODE_INFO;
+
+static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) {
+ return mi->mbmi.sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode : mi->mbmi.mode;
+}
+
+static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) {
+ return mbmi->ref_frame[0] > INTRA_FRAME;
+}
+
+static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) {
+ return mbmi->ref_frame[1] > INTRA_FRAME;
+}
+
+PREDICTION_MODE vp10_left_block_mode(const MODE_INFO *cur_mi,
+ const MODE_INFO *left_mi, int b);
+
+PREDICTION_MODE vp10_above_block_mode(const MODE_INFO *cur_mi,
+ const MODE_INFO *above_mi, int b);
+
+enum mv_precision { MV_PRECISION_Q3, MV_PRECISION_Q4 };
+
+struct buf_2d {
+ uint8_t *buf;
+ uint8_t *buf0;
+ int width;
+ int height;
+ int stride;
+};
+
+typedef struct macroblockd_plane {
+ tran_low_t *dqcoeff;
+ PLANE_TYPE plane_type;
+ int subsampling_x;
+ int subsampling_y;
+ struct buf_2d dst;
+ struct buf_2d pre[2];
+ ENTROPY_CONTEXT *above_context;
+ ENTROPY_CONTEXT *left_context;
+ int16_t seg_dequant[MAX_SEGMENTS][2];
+#if CONFIG_NEW_QUANT
+ dequant_val_type_nuq
+ seg_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS];
+#endif
+ uint8_t *color_index_map;
+
+ // number of 4x4s in current block
+ uint16_t n4_w, n4_h;
+ // log2 of n4_w, n4_h
+ uint8_t n4_wl, n4_hl;
+
+#if CONFIG_AOM_QM
+ const qm_val_t *seg_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
+#endif
+ // encoder
+ const int16_t *dequant;
+#if CONFIG_NEW_QUANT
+ const dequant_val_type_nuq *dequant_val_nuq[QUANT_PROFILES];
+#endif // CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+ const qm_val_t *seg_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
+#endif
+} MACROBLOCKD_PLANE;
+
+#define BLOCK_OFFSET(x, i) ((x) + (i)*16)
+
+typedef struct RefBuffer {
+ // TODO(dkovalev): idx is not really required and should be removed, now it
+ // is used in vp10_onyxd_if.c
+ int idx;
+ YV12_BUFFER_CONFIG *buf;
+ struct scale_factors sf;
+} RefBuffer;
+
+typedef struct macroblockd {
+ struct macroblockd_plane plane[MAX_MB_PLANE];
+ uint8_t bmode_blocks_wl;
+ uint8_t bmode_blocks_hl;
+
+ FRAME_COUNTS *counts;
+ TileInfo tile;
+
+ int mi_stride;
+
+ MODE_INFO **mi;
+ MODE_INFO *left_mi;
+ MODE_INFO *above_mi;
+ MB_MODE_INFO *left_mbmi;
+ MB_MODE_INFO *above_mbmi;
+
+ int up_available;
+ int left_available;
+
+ const vpx_prob (*partition_probs)[PARTITION_TYPES - 1];
+
+ /* Distance of MB away from frame edges */
+ int mb_to_left_edge;
+ int mb_to_right_edge;
+ int mb_to_top_edge;
+ int mb_to_bottom_edge;
+
+ FRAME_CONTEXT *fc;
+
+ /* pointers to reference frames */
+ RefBuffer *block_refs[2];
+
+ /* pointer to current frame */
+ const YV12_BUFFER_CONFIG *cur_buf;
+
+ ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
+ ENTROPY_CONTEXT left_context[MAX_MB_PLANE][2 * MAX_MIB_SIZE];
+
+ PARTITION_CONTEXT *above_seg_context;
+ PARTITION_CONTEXT left_seg_context[MAX_MIB_SIZE];
+
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT *above_txfm_context;
+ TXFM_CONTEXT *left_txfm_context;
+ TXFM_CONTEXT left_txfm_context_buffer[MAX_MIB_SIZE];
+
+ TX_SIZE max_tx_size;
+#if CONFIG_SUPERTX
+ TX_SIZE supertx_size;
+#endif
+#endif
+
+ // dimension in the unit of 8x8 block of the current block
+ uint8_t n8_w, n8_h;
+
+#if CONFIG_REF_MV
+ uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
+ CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
+ uint8_t is_sec_rect;
+#endif
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ /* Bit depth: 8, 10, 12 */
+ int bd;
+#endif
+
+ int lossless[MAX_SEGMENTS];
+ int corrupted;
+
+ struct vpx_internal_error_info *error_info;
+#if CONFIG_GLOBAL_MOTION
+ Global_Motion_Params *global_motion;
+#endif // CONFIG_GLOBAL_MOTION
+} MACROBLOCKD;
+
+static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize,
+ PARTITION_TYPE partition) {
+ if (partition == PARTITION_INVALID)
+ return PARTITION_INVALID;
+ else
+ return subsize_lookup[partition][bsize];
+}
+
+static const TX_TYPE intra_mode_to_tx_type_context[INTRA_MODES] = {
+ DCT_DCT, // DC
+ ADST_DCT, // V
+ DCT_ADST, // H
+ DCT_DCT, // D45
+ ADST_ADST, // D135
+ ADST_DCT, // D117
+ DCT_ADST, // D153
+ DCT_ADST, // D207
+ ADST_DCT, // D63
+ ADST_ADST, // TM
+};
+
+#if CONFIG_SUPERTX
+static INLINE int supertx_enabled(const MB_MODE_INFO *mbmi) {
+ return (int)txsize_sqr_map[mbmi->tx_size] >
+ VPXMIN(b_width_log2_lookup[mbmi->sb_type],
+ b_height_log2_lookup[mbmi->sb_type]);
+}
+#endif // CONFIG_SUPERTX
+
+static INLINE int get_tx1d_width(TX_SIZE tx_size) {
+ return num_4x4_blocks_wide_txsize_lookup[tx_size] << 2;
+}
+
+static INLINE int get_tx1d_height(TX_SIZE tx_size) {
+ return num_4x4_blocks_high_txsize_lookup[tx_size] << 2;
+}
+
+static INLINE int get_tx2d_size(TX_SIZE tx_size) {
+ return num_4x4_blocks_txsize_lookup[tx_size] << 4;
+}
+
+#if CONFIG_EXT_TX
+#define ALLOW_INTRA_EXT_TX 1
+// whether masked transforms are used for 32X32
+#define USE_MSKTX_FOR_32X32 0
+#define USE_REDUCED_TXSET_FOR_16X16 1
+
+static const int num_ext_tx_set_inter[EXT_TX_SETS_INTER] = { 1, 16, 12, 2 };
+static const int num_ext_tx_set_intra[EXT_TX_SETS_INTRA] = { 1, 7, 5 };
+
+#if EXT_TX_SIZES == 4
+static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs, int is_inter) {
+ tx_size = txsize_sqr_map[tx_size];
+ if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0;
+#if USE_REDUCED_TXSET_FOR_16X16
+ if (tx_size == TX_32X32) return is_inter ? 3 - USE_MSKTX_FOR_32X32 : 0;
+ return (tx_size == TX_16X16 ? 2 : 1);
+#else
+ if (tx_size == TX_32X32) return is_inter ? 3 - 2 * USE_MSKTX_FOR_32X32 : 0;
+ return (tx_size == TX_16X16 && !is_inter ? 2 : 1);
+#endif // USE_REDUCED_TXSET_FOR_16X16
+}
+
+static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][TX_SIZES] = {
+ { 0, 0, 0, 0 }, // unused
+ { 1, 1, 0, 0 },
+ { 0, 0, 1, 0 },
+};
+
+static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][TX_SIZES] = {
+ { 0, 0, 0, 0 }, // unused
+ { 1, 1, (!USE_REDUCED_TXSET_FOR_16X16), USE_MSKTX_FOR_32X32 },
+ { 0, 0, USE_REDUCED_TXSET_FOR_16X16, 0 },
+ { 0, 0, 0, (!USE_MSKTX_FOR_32X32) },
+};
+
+#else // EXT_TX_SIZES == 4
+
+static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs, int is_inter) {
+ (void)is_inter;
+ tx_size = txsize_sqr_map[tx_size];
+ if (tx_size > TX_32X32 || bs < BLOCK_8X8) return 0;
+ if (tx_size == TX_32X32) return 0;
+#if USE_REDUCED_TXSET_FOR_16X16
+ return (tx_size == TX_16X16 ? 2 : 1);
+#else
+ return (tx_size == TX_16X16 && !is_inter ? 2 : 1);
+#endif // USE_REDUCED_TXSET_FOR_16X16
+}
+
+static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][TX_SIZES] = {
+ { 0, 0, 0, 0 }, // unused
+ { 1, 1, 0, 0 },
+ { 0, 0, 1, 0 },
+};
+
+static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][TX_SIZES] = {
+ { 0, 0, 0, 0 }, // unused
+ { 1, 1, (!USE_REDUCED_TXSET_FOR_16X16), 0 },
+ { 0, 0, USE_REDUCED_TXSET_FOR_16X16, 0 },
+ { 0, 0, 0, 1 },
+};
+#endif // EXT_TX_SIZES == 4
+
+// Transform types used in each intra set
+static const int ext_tx_used_intra[EXT_TX_SETS_INTRA][TX_TYPES] = {
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0 },
+ { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
+};
+
+// Transform types used in each inter set
+static const int ext_tx_used_inter[EXT_TX_SETS_INTER][TX_TYPES] = {
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
+};
+
+// 1D Transforms used in inter set, this needs to be changed if
+// ext_tx_used_inter is changed
+static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = {
+ { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 0, 0, 1 },
+};
+
+static INLINE int get_ext_tx_types(TX_SIZE tx_size, BLOCK_SIZE bs,
+ int is_inter) {
+ const int set = get_ext_tx_set(tx_size, bs, is_inter);
+ return is_inter ? num_ext_tx_set_inter[set] : num_ext_tx_set_intra[set];
+}
+
+#if CONFIG_RECT_TX
+static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
+ static const char LUT[BLOCK_SIZES] = {
+ 0, // BLOCK_4X4
+ 1, // BLOCK_4X8
+ 1, // BLOCK_8X4
+ 0, // BLOCK_8X8
+ 1, // BLOCK_8X16
+ 1, // BLOCK_16X8
+ 0, // BLOCK_16X16
+ 1, // BLOCK_16X32
+ 1, // BLOCK_32X16
+ 0, // BLOCK_32X32
+ 0, // BLOCK_32X64
+ 0, // BLOCK_64X32
+ 0, // BLOCK_64X64
+#if CONFIG_EXT_PARTITION
+ 0, // BLOCK_64X128
+ 0, // BLOCK_128X64
+ 0, // BLOCK_128X128
+#endif // CONFIG_EXT_PARTITION
+ };
+
+ return LUT[bsize];
+}
+
+static INLINE int is_rect_tx_allowed(const MB_MODE_INFO *mbmi) {
+ return is_inter_block(mbmi) && is_rect_tx_allowed_bsize(mbmi->sb_type);
+}
+
+static INLINE int is_rect_tx(TX_SIZE tx_size) { return tx_size >= TX_SIZES; }
+#endif // CONFIG_RECT_TX
+#endif // CONFIG_EXT_TX
+
+static INLINE TX_SIZE tx_size_from_tx_mode(BLOCK_SIZE bsize, TX_MODE tx_mode,
+ int is_inter) {
+ const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ if (!is_inter) {
+ return VPXMIN(max_tx_size, largest_tx_size);
+ } else {
+ const TX_SIZE max_rect_tx_size = max_txsize_rect_lookup[bsize];
+ if (txsize_sqr_up_map[max_rect_tx_size] <= largest_tx_size) {
+ return max_rect_tx_size;
+ } else {
+ return largest_tx_size;
+ }
+ }
+#else
+ (void)is_inter;
+ return VPXMIN(max_tx_size, largest_tx_size);
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+}
+
+#if CONFIG_EXT_INTRA
+#define ALLOW_FILTER_INTRA_MODES 1
+#define ANGLE_STEP 3
+#define MAX_ANGLE_DELTAS 3
+
+extern const int16_t dr_intra_derivative[90];
+
+static const uint8_t mode_to_angle_map[INTRA_MODES] = {
+ 0, 90, 180, 45, 135, 111, 157, 203, 67, 0,
+};
+
+static const TX_TYPE filter_intra_mode_to_tx_type_lookup[FILTER_INTRA_MODES] = {
+ DCT_DCT, // FILTER_DC
+ ADST_DCT, // FILTER_V
+ DCT_ADST, // FILTER_H
+ DCT_DCT, // FILTER_D45
+ ADST_ADST, // FILTER_D135
+ ADST_DCT, // FILTER_D117
+ DCT_ADST, // FILTER_D153
+ DCT_ADST, // FILTER_D207
+ ADST_DCT, // FILTER_D63
+ ADST_ADST, // FILTER_TM
+};
+
+int vp10_is_intra_filter_switchable(int angle);
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_TILE
+#define FIXED_TX_TYPE 1
+#else
+#define FIXED_TX_TYPE 0
+#endif
+
+static INLINE TX_TYPE get_default_tx_type(PLANE_TYPE plane_type,
+ const MACROBLOCKD *xd, int block_idx,
+ TX_SIZE tx_size) {
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+
+ if (is_inter_block(mbmi) || plane_type != PLANE_TYPE_Y ||
+ xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32)
+ return DCT_DCT;
+
+ return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y
+ ? get_y_mode(xd->mi[0], block_idx)
+ : mbmi->uv_mode];
+}
+
+static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd,
+ int block_idx, TX_SIZE tx_size) {
+ const MODE_INFO *const mi = xd->mi[0];
+ const MB_MODE_INFO *const mbmi = &mi->mbmi;
+
+ if (FIXED_TX_TYPE)
+ return get_default_tx_type(plane_type, xd, block_idx, tx_size);
+
+#if CONFIG_EXT_INTRA
+ if (!is_inter_block(mbmi)) {
+ const int use_ext_intra_mode_info =
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[plane_type];
+ const EXT_INTRA_MODE ext_intra_mode =
+ mbmi->ext_intra_mode_info.ext_intra_mode[plane_type];
+ const PREDICTION_MODE mode = (plane_type == PLANE_TYPE_Y)
+ ? get_y_mode(mi, block_idx)
+ : mbmi->uv_mode;
+
+ if (xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32) return DCT_DCT;
+
+#if CONFIG_EXT_TX
+#if ALLOW_INTRA_EXT_TX
+ if (mbmi->sb_type >= BLOCK_8X8 && plane_type == PLANE_TYPE_Y)
+ return mbmi->tx_type;
+#endif // ALLOW_INTRA_EXT_TX
+#endif // CONFIG_EXT_TX
+
+ if (use_ext_intra_mode_info)
+ return filter_intra_mode_to_tx_type_lookup[ext_intra_mode];
+
+ if (mode == DC_PRED) {
+ return DCT_DCT;
+ } else if (mode == TM_PRED) {
+ return ADST_ADST;
+ } else {
+ int angle = mode_to_angle_map[mode];
+ if (mbmi->sb_type >= BLOCK_8X8)
+ angle += mbmi->angle_delta[plane_type] * ANGLE_STEP;
+ assert(angle > 0 && angle < 270);
+ if (angle == 135)
+ return ADST_ADST;
+ else if (angle < 45 || angle > 225)
+ return DCT_DCT;
+ else if (angle < 135)
+ return ADST_DCT;
+ else
+ return DCT_ADST;
+ }
+ }
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_TX
+#if EXT_TX_SIZES == 4
+ if (xd->lossless[mbmi->segment_id] || txsize_sqr_map[tx_size] > TX_32X32 ||
+ (txsize_sqr_map[tx_size] >= TX_32X32 && !is_inter_block(mbmi)))
+#else
+ if (xd->lossless[mbmi->segment_id] || txsize_sqr_map[tx_size] >= TX_32X32)
+#endif
+ return DCT_DCT;
+ if (mbmi->sb_type >= BLOCK_8X8) {
+ if (plane_type == PLANE_TYPE_Y) {
+#if !ALLOW_INTRA_EXT_TX
+ if (is_inter_block(mbmi))
+#endif // ALLOW_INTRA_EXT_TX
+ return mbmi->tx_type;
+ }
+ if (is_inter_block(mbmi))
+ // UV Inter only
+ return (mbmi->tx_type == IDTX && txsize_sqr_map[tx_size] == TX_32X32)
+ ? DCT_DCT
+ : mbmi->tx_type;
+ }
+
+ // Sub8x8-Inter/Intra OR UV-Intra
+ if (is_inter_block(mbmi)) // Sub8x8-Inter
+ return DCT_DCT;
+ else // Sub8x8 Intra OR UV-Intra
+ return intra_mode_to_tx_type_context[plane_type == PLANE_TYPE_Y
+ ? get_y_mode(mi, block_idx)
+ : mbmi->uv_mode];
+#else // CONFIG_EXT_TX
+ (void)block_idx;
+ if (plane_type != PLANE_TYPE_Y || xd->lossless[mbmi->segment_id] ||
+ txsize_sqr_map[tx_size] >= TX_32X32)
+ return DCT_DCT;
+ return mbmi->tx_type;
+#endif // CONFIG_EXT_TX
+}
+
+void vp10_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y);
+
+static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize,
+ int xss, int yss) {
+ if (bsize < BLOCK_8X8) {
+ return TX_4X4;
+ } else {
+ const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss];
+ return VPXMIN(txsize_sqr_map[y_tx_size], max_txsize_lookup[plane_bsize]);
+ }
+}
+
+static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi,
+ const struct macroblockd_plane *pd) {
+#if CONFIG_SUPERTX
+ if (supertx_enabled(mbmi))
+ return uvsupertx_size_lookup[txsize_sqr_map[mbmi->tx_size]]
+ [pd->subsampling_x][pd->subsampling_y];
+#endif // CONFIG_SUPERTX
+ return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x,
+ pd->subsampling_y);
+}
+
+static INLINE BLOCK_SIZE
+get_plane_block_size(BLOCK_SIZE bsize, const struct macroblockd_plane *pd) {
+ return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y];
+}
+
+static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ struct macroblockd_plane *const pd = &xd->plane[i];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ memset(pd->above_context, 0,
+ sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[plane_bsize]);
+ memset(pd->left_context, 0,
+ sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[plane_bsize]);
+ }
+}
+
+typedef void (*foreach_transformed_block_visitor)(int plane, int block,
+ int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, void *arg);
+
+void vp10_foreach_transformed_block_in_plane(
+ const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
+ foreach_transformed_block_visitor visit, void *arg);
+
+void vp10_foreach_transformed_block(const MACROBLOCKD *const xd,
+ BLOCK_SIZE bsize,
+ foreach_transformed_block_visitor visit,
+ void *arg);
+
+void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob,
+ int aoff, int loff);
+
+#if CONFIG_EXT_INTER
+static INLINE int is_interintra_allowed_bsize(const BLOCK_SIZE bsize) {
+ // TODO(debargha): Should this be bsize < BLOCK_LARGEST?
+ return (bsize >= BLOCK_8X8) && (bsize < BLOCK_64X64);
+}
+
+static INLINE int is_interintra_allowed_mode(const PREDICTION_MODE mode) {
+ return (mode >= NEARESTMV) && (mode <= NEWMV);
+}
+
+static INLINE int is_interintra_allowed_ref(const MV_REFERENCE_FRAME rf[2]) {
+ return (rf[0] > INTRA_FRAME) && (rf[1] <= INTRA_FRAME);
+}
+
+static INLINE int is_interintra_allowed(const MB_MODE_INFO *mbmi) {
+ return is_interintra_allowed_bsize(mbmi->sb_type) &&
+ is_interintra_allowed_mode(mbmi->mode) &&
+ is_interintra_allowed_ref(mbmi->ref_frame);
+}
+
+static INLINE int is_interintra_allowed_bsize_group(const int group) {
+ int i;
+ for (i = 0; i < BLOCK_SIZES; i++) {
+ if (size_group_lookup[i] == group && is_interintra_allowed_bsize(i))
+ return 1;
+ }
+ return 0;
+}
+
+static INLINE int is_interintra_pred(const MB_MODE_INFO *mbmi) {
+ return (mbmi->ref_frame[1] == INTRA_FRAME) && is_interintra_allowed(mbmi);
+}
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+static INLINE int is_motvar_allowed(const MB_MODE_INFO *mbmi) {
+#if CONFIG_EXT_INTER
+ return (mbmi->sb_type >= BLOCK_8X8 && mbmi->ref_frame[1] != INTRA_FRAME);
+#else
+ return (mbmi->sb_type >= BLOCK_8X8);
+#endif // CONFIG_EXT_INTER
+}
+
+#if CONFIG_OBMC
+static INLINE int is_neighbor_overlappable(const MB_MODE_INFO *mbmi) {
+ return (is_inter_block(mbmi));
+}
+#endif // CONFIG_OBMC
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_BLOCKD_H_
diff --git a/av1/common/clpf.c b/av1/common/clpf.c
new file mode 100644
index 0000000..bba40cb
--- /dev/null
+++ b/av1/common/clpf.c
@@ -0,0 +1,99 @@
+/*
+Copyright (c) 2016 Cisco Systems
+(Replace with proper AOM header)
+*/
+
+#include "av1/common/clpf.h"
+
+// Apply the filter on a single block
+static void clpf_block(const uint8_t *src, uint8_t *dst, int sstride,
+ int dstride, int has_top, int has_left, int has_bottom,
+ int has_right, int width, int height) {
+ int x, y;
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++) {
+ int X = src[(y + 0) * sstride + x + 0];
+ int A = has_top ? src[(y - 1) * sstride + x + 0] : X;
+ int B = has_left ? src[(y + 0) * sstride + x - 1] : X;
+ int C = has_right ? src[(y + 0) * sstride + x + 1] : X;
+ int D = has_bottom ? src[(y + 1) * sstride + x + 0] : X;
+ int delta = ((A > X) + (B > X) + (C > X) + (D > X) > 2) -
+ ((A < X) + (B < X) + (C < X) + (D < X) > 2);
+ dst[y * dstride + x] = X + delta;
+ }
+ }
+}
+
+#define BS (MI_SIZE * MAX_MIB_SIZE)
+
+// Iterate over blocks within a superblock
+static void vp10_clpf_sb(const YV12_BUFFER_CONFIG *frame_buffer,
+ const VP10_COMMON *cm, MACROBLOCKD *xd,
+ MODE_INFO *const *mi_8x8, int xpos, int ypos) {
+ // Temporary buffer (to allow SIMD parallelism)
+ uint8_t buf_unaligned[BS * BS + 15];
+ uint8_t *buf = (uint8_t *)(((intptr_t)buf_unaligned + 15) & ~15);
+ int x, y, p;
+
+ for (p = 0; p < (CLPF_FILTER_ALL_PLANES ? MAX_MB_PLANE : 1); p++) {
+ for (y = 0; y < MAX_MIB_SIZE && ypos + y < cm->mi_rows; y++) {
+ for (x = 0; x < MAX_MIB_SIZE && xpos + x < cm->mi_cols; x++) {
+ const MB_MODE_INFO *mbmi =
+ &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
+
+ // Do not filter if there is no residual
+ if (!mbmi->skip) {
+ // Do not filter frame edges
+ int has_top = ypos + y > 0;
+ int has_left = xpos + x > 0;
+ int has_bottom = ypos + y < cm->mi_rows - 1;
+ int has_right = xpos + x < cm->mi_cols - 1;
+#if CLPF_ALLOW_BLOCK_PARALLELISM
+ // Do not filter superblock edges
+ has_top &= !!y;
+ has_left &= !!x;
+ has_bottom &= y != MAX_MIB_SIZE - 1;
+ has_right &= x != MAX_MIB_SIZE - 1;
+#endif
+ vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
+ clpf_block(
+ xd->plane[p].dst.buf, CLPF_ALLOW_PIXEL_PARALLELISM
+ ? buf + y * MI_SIZE * BS + x * MI_SIZE
+ : xd->plane[p].dst.buf,
+ xd->plane[p].dst.stride,
+ CLPF_ALLOW_PIXEL_PARALLELISM ? BS : xd->plane[p].dst.stride,
+ has_top, has_left, has_bottom, has_right,
+ MI_SIZE >> xd->plane[p].subsampling_x,
+ MI_SIZE >> xd->plane[p].subsampling_y);
+ }
+ }
+ }
+#if CLPF_ALLOW_PIXEL_PARALLELISM
+ for (y = 0; y < MAX_MIB_SIZE && ypos + y < cm->mi_rows; y++) {
+ for (x = 0; x < MAX_MIB_SIZE && xpos + x < cm->mi_cols; x++) {
+ const MB_MODE_INFO *mbmi =
+ &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
+ vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
+ if (!mbmi->skip) {
+ int i = 0;
+ for (i = 0; i<MI_SIZE>> xd->plane[p].subsampling_y; i++)
+ memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
+ buf + (y * MI_SIZE + i) * BS + x * MI_SIZE,
+ MI_SIZE >> xd->plane[p].subsampling_x);
+ }
+ }
+ }
+#endif
+ }
+}
+
+// Iterate over the superblocks of an entire frame
+void vp10_clpf_frame(const YV12_BUFFER_CONFIG *frame, const VP10_COMMON *cm,
+ MACROBLOCKD *xd) {
+ int x, y;
+
+ for (y = 0; y < cm->mi_rows; y += MAX_MIB_SIZE)
+ for (x = 0; x < cm->mi_cols; x += MAX_MIB_SIZE)
+ vp10_clpf_sb(frame, cm, xd, cm->mi_grid_visible, x, y);
+}
diff --git a/av1/common/clpf.h b/av1/common/clpf.h
new file mode 100644
index 0000000..5b9d55b
--- /dev/null
+++ b/av1/common/clpf.h
@@ -0,0 +1,22 @@
+/*
+Copyright (c) 2016, Cisco Systems
+(Replace with proper AOM header)
+*/
+
+#ifndef VP10_COMMON_CLPF_H_
+#define VP10_COMMON_CLPF_H_
+
+#include "av1/common/reconinter.h"
+
+// Configuration
+#define CLPF_ALLOW_PIXEL_PARALLELISM \
+ 1 // 1 = SIMD friendly (adds a buffer requirement)
+#define CLPF_ALLOW_BLOCK_PARALLELISM \
+ 0 // 1 = MT friendly (degrades quality slighty)
+#define CLPF_FILTER_ALL_PLANES \
+ 0 // 1 = filter both luma and chroma, 0 = filter only luma
+
+void vp10_clpf_frame(const YV12_BUFFER_CONFIG *frame, const VP10_COMMON *cm,
+ MACROBLOCKD *xd);
+
+#endif
diff --git a/av1/common/common.h b/av1/common/common.h
new file mode 100644
index 0000000..4e30034
--- /dev/null
+++ b/av1/common/common.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_COMMON_H_
+#define VP10_COMMON_COMMON_H_
+
+/* Interface header for common constant data structures and lookup tables */
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom/vpx_integer.h"
+#include "aom_ports/bitops.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PI 3.141592653589793238462643383279502884
+
+// Only need this for fixed-size arrays, for structs just assign.
+#define vp10_copy(dest, src) \
+ { \
+ assert(sizeof(dest) == sizeof(src)); \
+ memcpy(dest, src, sizeof(src)); \
+ }
+
+// Use this for variably-sized arrays.
+#define vp10_copy_array(dest, src, n) \
+ { \
+ assert(sizeof(*(dest)) == sizeof(*(src))); \
+ memcpy(dest, src, n * sizeof(*(src))); \
+ }
+
+#define vp10_zero(dest) memset(&(dest), 0, sizeof(dest))
+#define vp10_zero_array(dest, n) memset(dest, 0, n * sizeof(*(dest)))
+
+static INLINE int get_unsigned_bits(unsigned int num_values) {
+ return num_values > 0 ? get_msb(num_values) + 1 : 0;
+}
+
+#if CONFIG_DEBUG
+#define CHECK_MEM_ERROR(cm, lval, expr) \
+ do { \
+ lval = (expr); \
+ if (!lval) \
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \
+ "Failed to allocate " #lval " at %s:%d", __FILE__, \
+ __LINE__); \
+ } while (0)
+#else
+#define CHECK_MEM_ERROR(cm, lval, expr) \
+ do { \
+ lval = (expr); \
+ if (!lval) \
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \
+ "Failed to allocate " #lval); \
+ } while (0)
+#endif
+// TODO(yaowu: validate the usage of these codes or develop new ones.)
+#define VP10_SYNC_CODE_0 0x49
+#define VP10_SYNC_CODE_1 0x83
+#define VP10_SYNC_CODE_2 0x43
+
+#define VPX_FRAME_MARKER 0x2
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_COMMON_H_
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
new file mode 100644
index 0000000..4348f08
--- /dev/null
+++ b/av1/common/common_data.h
@@ -0,0 +1,586 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_COMMON_DATA_H_
+#define VP10_COMMON_COMMON_DATA_H_
+
+#include "av1/common/enums.h"
+#include "aom/vpx_integer.h"
+#include "aom_dsp/vpx_dsp_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if CONFIG_EXT_PARTITION
+#define IF_EXT_PARTITION(...) __VA_ARGS__
+#else
+#define IF_EXT_PARTITION(...)
+#endif
+
+// Log 2 conversion lookup tables for block width and height
+static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = {
+ 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5)
+};
+static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = {
+ 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5)
+};
+// Log 2 conversion lookup tables for modeinfo width and height
+static const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = {
+ 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, IF_EXT_PARTITION(3, 4, 4)
+};
+static const uint8_t mi_height_log2_lookup[BLOCK_SIZES] = {
+ 0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, IF_EXT_PARTITION(4, 3, 4)
+};
+
+// Width/height lookup tables in units of varios block sizes
+static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = {
+ 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32)
+};
+static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = {
+ 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, IF_EXT_PARTITION(32, 16, 32)
+};
+static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = {
+ 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16)
+};
+static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = {
+ 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16)
+};
+static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, IF_EXT_PARTITION(4, 8, 8)
+};
+static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] = {
+ 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8)
+};
+
+static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = {
+ 1, 4, 16, 64,
+#if CONFIG_EXT_TX
+ 2, 2, 8, 8, 32, 32
+#endif // CONFIG_EXT_TX
+};
+static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = {
+ 1, 2, 4, 8,
+#if CONFIG_EXT_TX
+ 1, 2, 2, 4, 4, 8
+#endif // CONFIG_EXT_TX
+};
+static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = {
+ 1, 2, 4, 8,
+#if CONFIG_EXT_TX
+ 2, 1, 4, 2, 8, 4
+#endif // CONFIG_EXT_TX
+};
+
+static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = {
+ 0, 2, 4, 6,
+#if CONFIG_EXT_TX
+ 1, 1, 3, 3, 5, 5
+#endif // CONFIG_EXT_TX
+};
+static const uint8_t num_4x4_blocks_wide_txsize_log2_lookup[TX_SIZES_ALL] = {
+ 0, 1, 2, 3,
+#if CONFIG_EXT_TX
+ 0, 1, 1, 2, 2, 3
+#endif // CONFIG_EXT_TX
+};
+static const uint8_t num_4x4_blocks_high_txsize_log2_lookup[TX_SIZES_ALL] = {
+ 0, 1, 2, 3,
+#if CONFIG_EXT_TX
+ 1, 0, 2, 1, 3, 2
+#endif // CONFIG_EXT_TX
+};
+
+// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize)))
+static const uint8_t size_group_lookup[BLOCK_SIZES] = {
+ 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, IF_EXT_PARTITION(3, 3, 3)
+};
+
+static const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = {
+ 4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, IF_EXT_PARTITION(13, 13, 14)
+};
+
+/* clang-format off */
+static const PARTITION_TYPE
+ partition_lookup[MAX_SB_SIZE_LOG2 - 1][BLOCK_SIZES] = {
+ { // 4X4 ->
+ // 4X4
+ PARTITION_NONE,
+ // 4X8, 8X4, 8X8
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+ // 8X16, 16X8, 16X16
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+ // 16X32, 32X16, 32X32
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+ // 32X64, 64X32, 64X64
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#if CONFIG_EXT_PARTITION
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#endif // CONFIG_EXT_PARTITION
+ }, { // 8X8 ->
+ // 4X4
+ PARTITION_SPLIT,
+ // 4X8, 8X4, 8X8
+ PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
+ // 8X16, 16X8, 16X16
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+ // 16X32, 32X16, 32X32
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+ // 32X64, 64X32, 64X64
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#endif // CONFIG_EXT_PARTITION
+ }, { // 16X16 ->
+ // 4X4
+ PARTITION_SPLIT,
+ // 4X8, 8X4, 8X8
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ // 8X16, 16X8, 16X16
+ PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
+ // 16X32, 32X16, 32X32
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+ // 32X64, 64X32, 64X64
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#endif // CONFIG_EXT_PARTITION
+ }, { // 32X32 ->
+ // 4X4
+ PARTITION_SPLIT,
+ // 4X8, 8X4, 8X8
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ // 8X16, 16X8, 16X16
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ // 16X32, 32X16, 32X32
+ PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
+ // 32X64, 64X32, 64X64
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+#endif // CONFIG_EXT_PARTITION
+ }, { // 64X64 ->
+ // 4X4
+ PARTITION_SPLIT,
+ // 4X8, 8X4, 8X8
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ // 8X16, 16X8, 16X16
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ // 16X32, 32X16, 32X32
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ // 32X64, 64X32, 64X64
+ PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID,
+ }, { // 128x128 ->
+ // 4X4
+ PARTITION_SPLIT,
+ // 4X8, 8X4, 8X8
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ // 8X16, 16X8, 16X16
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ // 16X32, 32X16, 32X32
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ // 32X64, 64X32, 64X64
+ PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT,
+ // 64x128, 128x64, 128x128
+ PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE,
+#endif // CONFIG_EXT_PARTITION
+ }
+};
+
+#if CONFIG_EXT_PARTITION_TYPES
+static const BLOCK_SIZE subsize_lookup[EXT_PARTITION_TYPES][BLOCK_SIZES] =
+#else
+static const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] =
+#endif // CONFIG_EXT_PARTITION_TYPES
+{
+ { // PARTITION_NONE
+ // 4X4
+ BLOCK_4X4,
+ // 4X8, 8X4, 8X8
+ BLOCK_4X8, BLOCK_8X4, BLOCK_8X8,
+ // 8X16, 16X8, 16X16
+ BLOCK_8X16, BLOCK_16X8, BLOCK_16X16,
+ // 16X32, 32X16, 32X32
+ BLOCK_16X32, BLOCK_32X16, BLOCK_32X32,
+ // 32X64, 64X32, 64X64
+ BLOCK_32X64, BLOCK_64X32, BLOCK_64X64,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ BLOCK_64X128, BLOCK_128X64, BLOCK_128X128,
+#endif // CONFIG_EXT_PARTITION
+ }, { // PARTITION_HORZ
+ // 4X4
+ BLOCK_INVALID,
+ // 4X8, 8X4, 8X8
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
+ // 8X16, 16X8, 16X16
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
+ // 16X32, 32X16, 32X32
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
+ // 32X64, 64X32, 64X64
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
+#endif // CONFIG_EXT_PARTITION
+ }, { // PARTITION_VERT
+ // 4X4
+ BLOCK_INVALID,
+ // 4X8, 8X4, 8X8
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
+ // 8X16, 16X8, 16X16
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
+ // 16X32, 32X16, 32X32
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
+ // 32X64, 64X32, 64X64
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
+#endif // CONFIG_EXT_PARTITION
+ }, { // PARTITION_SPLIT
+ // 4X4
+ BLOCK_INVALID,
+ // 4X8, 8X4, 8X8
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4,
+ // 8X16, 16X8, 16X16
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8,
+ // 16X32, 32X16, 32X32
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16,
+ // 32X64, 64X32, 64X64
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X64,
+#endif // CONFIG_EXT_PARTITION
+#if CONFIG_EXT_PARTITION_TYPES
+ }, { // PARTITION_HORZ_A
+ // 4X4
+ BLOCK_INVALID,
+ // 4X8, 8X4, 8X8
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
+ // 8X16, 16X8, 16X16
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
+ // 16X32, 32X16, 32X32
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
+ // 32X64, 64X32, 64X64
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
+#endif // CONFIG_EXT_PARTITION
+ }, { // PARTITION_HORZ_B
+ // 4X4
+ BLOCK_INVALID,
+ // 4X8, 8X4, 8X8
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4,
+ // 8X16, 16X8, 16X16
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8,
+ // 16X32, 32X16, 32X32
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16,
+ // 32X64, 64X32, 64X64
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_128X64,
+#endif // CONFIG_EXT_PARTITION
+ }, { // PARTITION_VERT_A
+ // 4X4
+ BLOCK_INVALID,
+ // 4X8, 8X4, 8X8
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
+ // 8X16, 16X8, 16X16
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
+ // 16X32, 32X16, 32X32
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
+ // 32X64, 64X32, 64X64
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
+#endif // CONFIG_EXT_PARTITION
+ }, { // PARTITION_VERT_B
+ // 4X4
+ BLOCK_INVALID,
+ // 4X8, 8X4, 8X8
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8,
+ // 8X16, 16X8, 16X16
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16,
+ // 16X32, 32X16, 32X32
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32,
+ // 32X64, 64X32, 64X64
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X128,
+#endif // CONFIG_EXT_PARTITION
+#endif // CONFIG_EXT_PARTITION_TYPES
+ }
+};
+
+static const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = {
+ // 4X4
+ TX_4X4,
+ // 4X8, 8X4, 8X8
+ TX_4X4, TX_4X4, TX_8X8,
+ // 8X16, 16X8, 16X16
+ TX_8X8, TX_8X8, TX_16X16,
+ // 16X32, 32X16, 32X32
+ TX_16X16, TX_16X16, TX_32X32,
+ // 32X64, 64X32, 64X64
+ TX_32X32, TX_32X32, TX_32X32,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ TX_32X32, TX_32X32, TX_32X32,
+#endif // CONFIG_EXT_PARTITION
+};
+
+#if CONFIG_EXT_TX
+static const TX_SIZE max_txsize_rect_lookup[BLOCK_SIZES] = {
+ // 4X4
+ TX_4X4,
+ // 4X8, 8X4, 8X8
+ TX_4X8, TX_8X4, TX_8X8,
+ // 8X16, 16X8, 16X16
+ TX_8X16, TX_16X8, TX_16X16,
+ // 16X32, 32X16, 32X32
+ TX_16X32, TX_32X16, TX_32X32,
+ // 32X64, 64X32, 64X64
+ TX_32X32, TX_32X32, TX_32X32,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ TX_32X32, TX_32X32, TX_32X32,
+#endif // CONFIG_EXT_PARTITION
+};
+#endif // CONFIG_EXT_TX
+
+// Same as "max_txsize_lookup[bsize] - TX_8X8", invalid for bsize < 8X8
+static const int32_t intra_tx_size_cat_lookup[BLOCK_SIZES] = {
+ // 4X4
+ INT32_MIN,
+ // 4X8, 8X4, 8X8
+ INT32_MIN, INT32_MIN, TX_8X8 - TX_8X8,
+ // 8X16, 16X8, 16X16
+ TX_8X8 - TX_8X8, TX_8X8 - TX_8X8, TX_16X16 - TX_8X8,
+ // 16X32, 32X16, 32X32
+ TX_16X16 - TX_8X8, TX_16X16 - TX_8X8, TX_32X32 - TX_8X8,
+ // 32X64, 64X32, 64X64
+ TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
+#endif // CONFIG_EXT_PARTITION
+};
+
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+// Same as "max_txsize_lookup[bsize] - TX_8X8", except for rectangular
+// block which may use a rectangular transform, in which case it is
+// "(max_txsize_lookup[bsize] + 1) - TX_8X8", invalid for bsize < 8X8
+static const int32_t inter_tx_size_cat_lookup[BLOCK_SIZES] = {
+ // 4X4
+ INT32_MIN,
+ // 4X8, 8X4, 8X8
+ INT32_MIN, INT32_MIN, TX_8X8 - TX_8X8,
+ // 8X16, 16X8, 16X16
+ TX_16X16 - TX_8X8, TX_16X16 - TX_8X8, TX_16X16 - TX_8X8,
+ // 16X32, 32X16, 32X32
+ TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
+ // 32X64, 64X32, 64X64
+ TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
+#if CONFIG_EXT_PARTITION
+ // 64x128, 128x64, 128x128
+ TX_32X32 - TX_8X8, TX_32X32 - TX_8X8, TX_32X32 - TX_8X8,
+#endif // CONFIG_EXT_PARTITION
+};
+#else
+#define inter_tx_size_cat_lookup intra_tx_size_cat_lookup
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+
+/* clang-format on */
+
+static const TX_SIZE txsize_horz_map[TX_SIZES_ALL] = {
+ TX_4X4, // TX_4X4
+ TX_8X8, // TX_8X8
+ TX_16X16, // TX_16X16
+ TX_32X32, // TX_32X32
+#if CONFIG_EXT_TX
+ TX_4X4, // TX_4X8
+ TX_8X8, // TX_8X4
+ TX_8X8, // TX_8X16
+ TX_16X16, // TX_16X8
+ TX_16X16, // TX_16X32
+ TX_32X32 // TX_32X16
+#endif // CONFIG_EXT_TX
+};
+
+static const TX_SIZE txsize_vert_map[TX_SIZES_ALL] = {
+ TX_4X4, // TX_4X4
+ TX_8X8, // TX_8X8
+ TX_16X16, // TX_16X16
+ TX_32X32, // TX_32X32
+#if CONFIG_EXT_TX
+ TX_8X8, // TX_4X8
+ TX_4X4, // TX_8X4
+ TX_16X16, // TX_8X16
+ TX_8X8, // TX_16X8
+ TX_32X32, // TX_16X32
+ TX_16X16 // TX_32X16
+#endif // CONFIG_EXT_TX
+};
+
+static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = {
+ BLOCK_4X4, // TX_4X4
+ BLOCK_8X8, // TX_8X8
+ BLOCK_16X16, // TX_16X16
+ BLOCK_32X32, // TX_32X32
+#if CONFIG_EXT_TX
+ BLOCK_4X8, // TX_4X8
+ BLOCK_8X4, // TX_8X4
+ BLOCK_8X16, // TX_8X16
+ BLOCK_16X8, // TX_16X8
+ BLOCK_16X32, // TX_16X32
+ BLOCK_32X16, // TX_32X16
+#endif // CONFIG_EXT_TX
+};
+
+static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
+ TX_4X4, // TX_4X4
+ TX_8X8, // TX_8X8
+ TX_16X16, // TX_16X16
+ TX_32X32, // TX_32X32
+#if CONFIG_EXT_TX
+ TX_4X4, // TX_4X8
+ TX_4X4, // TX_8X4
+ TX_8X8, // TX_8X16
+ TX_8X8, // TX_16X8
+ TX_16X16, // TX_16X32
+ TX_16X16, // TX_32X16
+#endif // CONFIG_EXT_TX
+};
+
+static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
+ TX_4X4, // TX_4X4
+ TX_8X8, // TX_8X8
+ TX_16X16, // TX_16X16
+ TX_32X32, // TX_32X32
+#if CONFIG_EXT_TX
+ TX_8X8, // TX_4X8
+ TX_8X8, // TX_8X4
+ TX_16X16, // TX_8X16
+ TX_16X16, // TX_16X8
+ TX_32X32, // TX_16X32
+ TX_32X32, // TX_32X16
+#endif // CONFIG_EXT_TX
+};
+
+static const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
+ TX_4X4, // ONLY_4X4
+ TX_8X8, // ALLOW_8X8
+ TX_16X16, // ALLOW_16X16
+ TX_32X32, // ALLOW_32X32
+ TX_32X32, // TX_MODE_SELECT
+};
+
+static const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = {
+ // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
+ // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
+ { { BLOCK_4X4, BLOCK_INVALID }, { BLOCK_INVALID, BLOCK_INVALID } },
+ { { BLOCK_4X8, BLOCK_4X4 }, { BLOCK_INVALID, BLOCK_INVALID } },
+ { { BLOCK_8X4, BLOCK_INVALID }, { BLOCK_4X4, BLOCK_INVALID } },
+ { { BLOCK_8X8, BLOCK_8X4 }, { BLOCK_4X8, BLOCK_4X4 } },
+ { { BLOCK_8X16, BLOCK_8X8 }, { BLOCK_INVALID, BLOCK_4X8 } },
+ { { BLOCK_16X8, BLOCK_INVALID }, { BLOCK_8X8, BLOCK_8X4 } },
+ { { BLOCK_16X16, BLOCK_16X8 }, { BLOCK_8X16, BLOCK_8X8 } },
+ { { BLOCK_16X32, BLOCK_16X16 }, { BLOCK_INVALID, BLOCK_8X16 } },
+ { { BLOCK_32X16, BLOCK_INVALID }, { BLOCK_16X16, BLOCK_16X8 } },
+ { { BLOCK_32X32, BLOCK_32X16 }, { BLOCK_16X32, BLOCK_16X16 } },
+ { { BLOCK_32X64, BLOCK_32X32 }, { BLOCK_INVALID, BLOCK_16X32 } },
+ { { BLOCK_64X32, BLOCK_INVALID }, { BLOCK_32X32, BLOCK_32X16 } },
+ { { BLOCK_64X64, BLOCK_64X32 }, { BLOCK_32X64, BLOCK_32X32 } },
+#if CONFIG_EXT_PARTITION
+ { { BLOCK_64X128, BLOCK_64X64 }, { BLOCK_INVALID, BLOCK_32X64 } },
+ { { BLOCK_128X64, BLOCK_INVALID }, { BLOCK_64X64, BLOCK_64X32 } },
+ { { BLOCK_128X128, BLOCK_128X64 }, { BLOCK_64X128, BLOCK_64X64 } },
+#endif // CONFIG_EXT_PARTITION
+};
+
+// Generates 4 bit field in which each bit set to 1 represents
+// a blocksize partition 1111 means we split 64x64, 32x32, 16x16
+// and 8x8. 1000 means we just split the 64x64 to 32x32
+static const struct {
+ PARTITION_CONTEXT above;
+ PARTITION_CONTEXT left;
+} partition_context_lookup[BLOCK_SIZES] = {
+#if CONFIG_EXT_PARTITION
+ { 31, 31 }, // 4X4 - {0b11111, 0b11111}
+ { 31, 30 }, // 4X8 - {0b11111, 0b11110}
+ { 30, 31 }, // 8X4 - {0b11110, 0b11111}
+ { 30, 30 }, // 8X8 - {0b11110, 0b11110}
+ { 30, 28 }, // 8X16 - {0b11110, 0b11100}
+ { 28, 30 }, // 16X8 - {0b11100, 0b11110}
+ { 28, 28 }, // 16X16 - {0b11100, 0b11100}
+ { 28, 24 }, // 16X32 - {0b11100, 0b11000}
+ { 24, 28 }, // 32X16 - {0b11000, 0b11100}
+ { 24, 24 }, // 32X32 - {0b11000, 0b11000}
+ { 24, 16 }, // 32X64 - {0b11000, 0b10000}
+ { 16, 24 }, // 64X32 - {0b10000, 0b11000}
+ { 16, 16 }, // 64X64 - {0b10000, 0b10000}
+ { 16, 0 }, // 64X128- {0b10000, 0b00000}
+ { 0, 16 }, // 128X64- {0b00000, 0b10000}
+ { 0, 0 }, // 128X128-{0b00000, 0b00000}
+#else
+ { 15, 15 }, // 4X4 - {0b1111, 0b1111}
+ { 15, 14 }, // 4X8 - {0b1111, 0b1110}
+ { 14, 15 }, // 8X4 - {0b1110, 0b1111}
+ { 14, 14 }, // 8X8 - {0b1110, 0b1110}
+ { 14, 12 }, // 8X16 - {0b1110, 0b1100}
+ { 12, 14 }, // 16X8 - {0b1100, 0b1110}
+ { 12, 12 }, // 16X16 - {0b1100, 0b1100}
+ { 12, 8 }, // 16X32 - {0b1100, 0b1000}
+ { 8, 12 }, // 32X16 - {0b1000, 0b1100}
+ { 8, 8 }, // 32X32 - {0b1000, 0b1000}
+ { 8, 0 }, // 32X64 - {0b1000, 0b0000}
+ { 0, 8 }, // 64X32 - {0b0000, 0b1000}
+ { 0, 0 }, // 64X64 - {0b0000, 0b0000}
+#endif // CONFIG_EXT_PARTITION
+};
+
+#if CONFIG_SUPERTX
+static const TX_SIZE uvsupertx_size_lookup[TX_SIZES][2][2] = {
+ // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1
+ // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1
+ { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+ { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
+ { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
+ { { TX_32X32, TX_16X16 }, { TX_16X16, TX_16X16 } },
+};
+
+#if CONFIG_EXT_PARTITION_TYPES
+static const int partition_supertx_context_lookup[EXT_PARTITION_TYPES] = {
+ -1, 0, 0, 1, 0, 0, 0, 0
+};
+
+#else
+static const int partition_supertx_context_lookup[PARTITION_TYPES] = { -1, 0, 0,
+ 1 };
+#endif // CONFIG_EXT_PARTITION_TYPES
+#endif // CONFIG_SUPERTX
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_COMMON_DATA_H_
diff --git a/av1/common/debugmodes.c b/av1/common/debugmodes.c
new file mode 100644
index 0000000..6c958a8
--- /dev/null
+++ b/av1/common/debugmodes.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+
+#include "av1/common/blockd.h"
+#include "av1/common/onyxc_int.h"
+
+static void log_frame_info(VP10_COMMON *cm, const char *str, FILE *f) {
+ fprintf(f, "%s", str);
+ fprintf(f, "(Frame %d, Show:%d, Q:%d): \n", cm->current_video_frame,
+ cm->show_frame, cm->base_qindex);
+}
+/* This function dereferences a pointer to the mbmi structure
+ * and uses the passed in member offset to print out the value of an integer
+ * for each mbmi member value in the mi structure.
+ */
+static void print_mi_data(VP10_COMMON *cm, FILE *file, const char *descriptor,
+ size_t member_offset) {
+ int mi_row, mi_col;
+ MODE_INFO **mi = cm->mi_grid_visible;
+ int rows = cm->mi_rows;
+ int cols = cm->mi_cols;
+ char prefix = descriptor[0];
+
+ log_frame_info(cm, descriptor, file);
+ for (mi_row = 0; mi_row < rows; mi_row++) {
+ fprintf(file, "%c ", prefix);
+ for (mi_col = 0; mi_col < cols; mi_col++) {
+ fprintf(file, "%2d ", *((int *)((char *)(&mi[0]->mbmi) + member_offset)));
+ mi++;
+ }
+ fprintf(file, "\n");
+ mi += 8;
+ }
+ fprintf(file, "\n");
+}
+
+void vp10_print_modes_and_motion_vectors(VP10_COMMON *cm, const char *file) {
+ int mi_row;
+ int mi_col;
+ FILE *mvs = fopen(file, "a");
+ MODE_INFO **mi = cm->mi_grid_visible;
+ int rows = cm->mi_rows;
+ int cols = cm->mi_cols;
+
+ print_mi_data(cm, mvs, "Partitions:", offsetof(MB_MODE_INFO, sb_type));
+ print_mi_data(cm, mvs, "Modes:", offsetof(MB_MODE_INFO, mode));
+ print_mi_data(cm, mvs, "Ref frame:", offsetof(MB_MODE_INFO, ref_frame[0]));
+ print_mi_data(cm, mvs, "Transform:", offsetof(MB_MODE_INFO, tx_size));
+ print_mi_data(cm, mvs, "UV Modes:", offsetof(MB_MODE_INFO, uv_mode));
+
+ // output skip infomation.
+ log_frame_info(cm, "Skips:", mvs);
+ for (mi_row = 0; mi_row < rows; mi_row++) {
+ fprintf(mvs, "S ");
+ for (mi_col = 0; mi_col < cols; mi_col++) {
+ fprintf(mvs, "%2d ", mi[0]->mbmi.skip);
+ mi++;
+ }
+ fprintf(mvs, "\n");
+ mi += 8;
+ }
+ fprintf(mvs, "\n");
+
+ // output motion vectors.
+ log_frame_info(cm, "Vectors ", mvs);
+ mi = cm->mi_grid_visible;
+ for (mi_row = 0; mi_row < rows; mi_row++) {
+ fprintf(mvs, "V ");
+ for (mi_col = 0; mi_col < cols; mi_col++) {
+ fprintf(mvs, "%4d:%4d ", mi[0]->mbmi.mv[0].as_mv.row,
+ mi[0]->mbmi.mv[0].as_mv.col);
+ mi++;
+ }
+ fprintf(mvs, "\n");
+ mi += 8;
+ }
+ fprintf(mvs, "\n");
+
+ fclose(mvs);
+}
diff --git a/av1/common/dering.c b/av1/common/dering.c
new file mode 100644
index 0000000..7c116a2
--- /dev/null
+++ b/av1/common/dering.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string.h>
+#include <math.h>
+
+#include "./vpx_scale_rtcd.h"
+#include "aom/vpx_integer.h"
+#include "av1/common/dering.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/od_dering.h"
+
+int compute_level_from_index(int global_level, int gi) {
+ static const int dering_gains[DERING_REFINEMENT_LEVELS] = { 0, 11, 16, 22 };
+ int level;
+ if (global_level == 0) return 0;
+ level = (global_level * dering_gains[gi] + 8) >> 4;
+ return clamp(level, gi, MAX_DERING_LEVEL - 1);
+}
+
+int sb_all_skip(const VP10_COMMON *const cm, int mi_row, int mi_col) {
+ int r, c;
+ int maxc, maxr;
+ int skip = 1;
+ maxc = cm->mi_cols - mi_col;
+ maxr = cm->mi_rows - mi_row;
+ if (maxr > MAX_MIB_SIZE) maxr = MAX_MIB_SIZE;
+ if (maxc > MAX_MIB_SIZE) maxc = MAX_MIB_SIZE;
+ for (r = 0; r < maxr; r++) {
+ for (c = 0; c < maxc; c++) {
+ skip = skip &&
+ cm->mi_grid_visible[(mi_row + r) * cm->mi_stride + mi_col + c]
+ ->mbmi.skip;
+ }
+ }
+ return skip;
+}
+
+void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+ MACROBLOCKD *xd, int global_level) {
+ int r, c;
+ int sbr, sbc;
+ int nhsb, nvsb;
+ od_dering_in *src[3];
+ unsigned char *bskip;
+ int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
+ int stride;
+ int bsize[3];
+ int dec[3];
+ int pli;
+ int coeff_shift = VPXMAX(cm->bit_depth - 8, 0);
+ nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
+ nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
+ bskip = vpx_malloc(sizeof(*bskip) * cm->mi_rows * cm->mi_cols);
+ vp10_setup_dst_planes(xd->plane, frame, 0, 0);
+ for (pli = 0; pli < 3; pli++) {
+ dec[pli] = xd->plane[pli].subsampling_x;
+ bsize[pli] = 8 >> dec[pli];
+ }
+ stride = bsize[0] * cm->mi_cols;
+ for (pli = 0; pli < 3; pli++) {
+ src[pli] = vpx_malloc(sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
+ for (r = 0; r < bsize[pli] * cm->mi_rows; ++r) {
+ for (c = 0; c < bsize[pli] * cm->mi_cols; ++c) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ src[pli][r * stride + c] = CONVERT_TO_SHORTPTR(
+ xd->plane[pli].dst.buf)[r * xd->plane[pli].dst.stride + c];
+ } else {
+#endif
+ src[pli][r * stride + c] =
+ xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif
+ }
+ }
+ }
+ for (r = 0; r < cm->mi_rows; ++r) {
+ for (c = 0; c < cm->mi_cols; ++c) {
+ const MB_MODE_INFO *mbmi =
+ &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi;
+ bskip[r * cm->mi_cols + c] = mbmi->skip;
+ }
+ }
+ for (sbr = 0; sbr < nvsb; sbr++) {
+ for (sbc = 0; sbc < nhsb; sbc++) {
+ int level;
+ int nhb, nvb;
+ nhb = VPXMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
+ nvb = VPXMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
+ for (pli = 0; pli < 3; pli++) {
+ int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
+ int threshold;
+#if DERING_REFINEMENT
+ level = compute_level_from_index(
+ global_level,
+ cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
+ MAX_MIB_SIZE * sbc]
+ ->mbmi.dering_gain);
+#else
+ level = global_level;
+#endif
+ /* FIXME: This is a temporary hack that uses more conservative
+ deringing for chroma. */
+ if (pli) level = (level * 5 + 4) >> 3;
+ if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) level = 0;
+ threshold = level << coeff_shift;
+ od_dering(&OD_DERING_VTBL_C, dst, MAX_MIB_SIZE * bsize[pli],
+ &src[pli][sbr * stride * bsize[pli] * MAX_MIB_SIZE +
+ sbc * bsize[pli] * MAX_MIB_SIZE],
+ stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli,
+ &bskip[MAX_MIB_SIZE * sbr * cm->mi_cols + MAX_MIB_SIZE * sbc],
+ cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP,
+ coeff_shift);
+ for (r = 0; r < bsize[pli] * nvb; ++r) {
+ for (c = 0; c < bsize[pli] * nhb; ++c) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
+ [xd->plane[pli].dst.stride *
+ (bsize[pli] * MAX_MIB_SIZE * sbr + r) +
+ sbc * bsize[pli] * MAX_MIB_SIZE + c] =
+ dst[r * MAX_MIB_SIZE * bsize[pli] + c];
+ } else {
+#endif
+ xd->plane[pli].dst.buf[xd->plane[pli].dst.stride *
+ (bsize[pli] * MAX_MIB_SIZE * sbr + r) +
+ sbc * bsize[pli] * MAX_MIB_SIZE + c] =
+ dst[r * MAX_MIB_SIZE * bsize[pli] + c];
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif
+ }
+ }
+ }
+ }
+ }
+ for (pli = 0; pli < 3; pli++) {
+ vpx_free(src[pli]);
+ }
+ vpx_free(bskip);
+}
diff --git a/av1/common/dering.h b/av1/common/dering.h
new file mode 100644
index 0000000..de59c86
--- /dev/null
+++ b/av1/common/dering.h
@@ -0,0 +1,32 @@
+#ifndef VP10_COMMON_DERING_H_
+#define VP10_COMMON_DERING_H_
+
+#include "av1/common/od_dering.h"
+#include "av1/common/onyxc_int.h"
+#include "aom/vpx_integer.h"
+#include "./vpx_config.h"
+#include "aom_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DERING_LEVEL_BITS 6
+#define MAX_DERING_LEVEL (1 << DERING_LEVEL_BITS)
+
+#define DERING_REFINEMENT 1
+#define DERING_REFINEMENT_BITS 2
+#define DERING_REFINEMENT_LEVELS 4
+
+int compute_level_from_index(int global_level, int gi);
+int sb_all_skip(const VP10_COMMON *const cm, int mi_row, int mi_col);
+void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+ MACROBLOCKD *xd, int global_level);
+
+int vp10_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
+ VP10_COMMON *cm, MACROBLOCKD *xd);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif // VP10_COMMON_DERING_H_
diff --git a/av1/common/divide.c b/av1/common/divide.c
new file mode 100644
index 0000000..f0c6730
--- /dev/null
+++ b/av1/common/divide.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/divide.h"
+
+/* Constants for divide by multiply for small divisors generated with:
+void init_fastdiv() {
+ int i;
+ for (i = 3; i < 256; ++i) {
+ const int s = 31 ^ __builtin_clz(2 * i + 1);
+ const unsigned long long base = (1ull << (sizeof(unsigned) * 8 + s)) - 1;
+ fastdiv_tab[i].mult = (base / i + 1) & 0xFFFFFFFF;
+ fastdiv_tab[i].shift = s;
+ }
+ for (i = 0; i < 8; ++i) {
+ fastdiv_tab[1 << i].mult = 0;
+ fastdiv_tab[1 << i].shift = i;
+ }
+}
+*/
+const struct fastdiv_elem vp10_fastdiv_tab[256] = {
+ { 0, 0 }, { 0, 0 }, { 0, 1 },
+ { 1431655766, 2 }, { 0, 2 }, { 2576980378u, 3 },
+ { 1431655766, 3 }, { 613566757, 3 }, { 0, 3 },
+ { 3340530120u, 4 }, { 2576980378u, 4 }, { 1952257862, 4 },
+ { 1431655766, 4 }, { 991146300, 4 }, { 613566757, 4 },
+ { 286331154u, 4 }, { 0, 4 }, { 3789677026u, 5 },
+ { 3340530120u, 5 }, { 2938661835u, 5 }, { 2576980378u, 5 },
+ { 2249744775u, 5 }, { 1952257862, 5 }, { 1680639377, 5 },
+ { 1431655766, 5 }, { 1202590843, 5 }, { 991146300, 5 },
+ { 795364315, 5 }, { 613566757, 5 }, { 444306962, 5 },
+ { 286331154, 5 }, { 138547333, 5 }, { 0, 5 },
+ { 4034666248u, 6 }, { 3789677026u, 6 }, { 3558687189u, 6 },
+ { 3340530120u, 6 }, { 3134165325u, 6 }, { 2938661835u, 6 },
+ { 2753184165u, 6 }, { 2576980378u, 6 }, { 2409371898u, 6 },
+ { 2249744775u, 6 }, { 2097542168u, 6 }, { 1952257862, 6 },
+ { 1813430637, 6 }, { 1680639377, 6 }, { 1553498810, 6 },
+ { 1431655766, 6 }, { 1314785907, 6 }, { 1202590843, 6 },
+ { 1094795586, 6 }, { 991146300, 6 }, { 891408307, 6 },
+ { 795364315, 6 }, { 702812831, 6 }, { 613566757, 6 },
+ { 527452125, 6 }, { 444306962, 6 }, { 363980280, 6 },
+ { 286331154, 6 }, { 211227900, 6 }, { 138547333, 6 },
+ { 68174085, 6 }, { 0, 6 }, { 4162814457u, 7 },
+ { 4034666248u, 7 }, { 3910343360u, 7 }, { 3789677026u, 7 },
+ { 3672508268u, 7 }, { 3558687189u, 7 }, { 3448072337u, 7 },
+ { 3340530120u, 7 }, { 3235934265u, 7 }, { 3134165325u, 7 },
+ { 3035110223u, 7 }, { 2938661835u, 7 }, { 2844718599u, 7 },
+ { 2753184165u, 7 }, { 2663967058u, 7 }, { 2576980378u, 7 },
+ { 2492141518u, 7 }, { 2409371898u, 7 }, { 2328596727u, 7 },
+ { 2249744775u, 7 }, { 2172748162u, 7 }, { 2097542168, 7 },
+ { 2024065048, 7 }, { 1952257862, 7 }, { 1882064321, 7 },
+ { 1813430637, 7 }, { 1746305385, 7 }, { 1680639377, 7 },
+ { 1616385542, 7 }, { 1553498810, 7 }, { 1491936009, 7 },
+ { 1431655766, 7 }, { 1372618415, 7 }, { 1314785907, 7 },
+ { 1258121734, 7 }, { 1202590843, 7 }, { 1148159575, 7 },
+ { 1094795586, 7 }, { 1042467791, 7 }, { 991146300, 7 },
+ { 940802361, 7 }, { 891408307, 7 }, { 842937507, 7 },
+ { 795364315, 7 }, { 748664025, 7 }, { 702812831, 7 },
+ { 657787785, 7 }, { 613566757, 7 }, { 570128403, 7 },
+ { 527452125, 7 }, { 485518043, 7 }, { 444306962, 7 },
+ { 403800345, 7 }, { 363980280, 7 }, { 324829460, 7 },
+ { 286331154, 7 }, { 248469183, 7 }, { 211227900, 7 },
+ { 174592167, 7 }, { 138547333, 7 }, { 103079216, 7 },
+ { 68174085, 7 }, { 33818641, 7 }, { 0, 7 },
+ { 4228378656u, 8 }, { 4162814457u, 8 }, { 4098251237u, 8 },
+ { 4034666248u, 8 }, { 3972037425u, 8 }, { 3910343360u, 8 },
+ { 3849563281u, 8 }, { 3789677026u, 8 }, { 3730665024u, 8 },
+ { 3672508268u, 8 }, { 3615188300u, 8 }, { 3558687189u, 8 },
+ { 3502987511u, 8 }, { 3448072337u, 8 }, { 3393925206u, 8 },
+ { 3340530120u, 8 }, { 3287871517u, 8 }, { 3235934265u, 8 },
+ { 3184703642u, 8 }, { 3134165325u, 8 }, { 3084305374u, 8 },
+ { 3035110223u, 8 }, { 2986566663u, 8 }, { 2938661835u, 8 },
+ { 2891383213u, 8 }, { 2844718599u, 8 }, { 2798656110u, 8 },
+ { 2753184165u, 8 }, { 2708291480u, 8 }, { 2663967058u, 8 },
+ { 2620200175u, 8 }, { 2576980378u, 8 }, { 2534297473u, 8 },
+ { 2492141518u, 8 }, { 2450502814u, 8 }, { 2409371898u, 8 },
+ { 2368739540u, 8 }, { 2328596727u, 8 }, { 2288934667u, 8 },
+ { 2249744775u, 8 }, { 2211018668u, 8 }, { 2172748162u, 8 },
+ { 2134925265u, 8 }, { 2097542168, 8 }, { 2060591247, 8 },
+ { 2024065048, 8 }, { 1987956292, 8 }, { 1952257862, 8 },
+ { 1916962805, 8 }, { 1882064321, 8 }, { 1847555765, 8 },
+ { 1813430637, 8 }, { 1779682582, 8 }, { 1746305385, 8 },
+ { 1713292966, 8 }, { 1680639377, 8 }, { 1648338801, 8 },
+ { 1616385542, 8 }, { 1584774030, 8 }, { 1553498810, 8 },
+ { 1522554545, 8 }, { 1491936009, 8 }, { 1461638086, 8 },
+ { 1431655766, 8 }, { 1401984144, 8 }, { 1372618415, 8 },
+ { 1343553873, 8 }, { 1314785907, 8 }, { 1286310003, 8 },
+ { 1258121734, 8 }, { 1230216764, 8 }, { 1202590843, 8 },
+ { 1175239808, 8 }, { 1148159575, 8 }, { 1121346142, 8 },
+ { 1094795586, 8 }, { 1068504060, 8 }, { 1042467791, 8 },
+ { 1016683080, 8 }, { 991146300, 8 }, { 965853890, 8 },
+ { 940802361, 8 }, { 915988286, 8 }, { 891408307, 8 },
+ { 867059126, 8 }, { 842937507, 8 }, { 819040276, 8 },
+ { 795364315, 8 }, { 771906565, 8 }, { 748664025, 8 },
+ { 725633745, 8 }, { 702812831, 8 }, { 680198441, 8 },
+ { 657787785, 8 }, { 635578121, 8 }, { 613566757, 8 },
+ { 591751050, 8 }, { 570128403, 8 }, { 548696263, 8 },
+ { 527452125, 8 }, { 506393524, 8 }, { 485518043, 8 },
+ { 464823301, 8 }, { 444306962, 8 }, { 423966729, 8 },
+ { 403800345, 8 }, { 383805589, 8 }, { 363980280, 8 },
+ { 344322273, 8 }, { 324829460, 8 }, { 305499766, 8 },
+ { 286331154, 8 }, { 267321616, 8 }, { 248469183, 8 },
+ { 229771913, 8 }, { 211227900, 8 }, { 192835267, 8 },
+ { 174592167, 8 }, { 156496785, 8 }, { 138547333, 8 },
+ { 120742053, 8 }, { 103079216, 8 }, { 85557118, 8 },
+ { 68174085, 8 }, { 50928466, 8 }, { 33818641, 8 },
+ { 16843010, 8 },
+};
diff --git a/av1/common/divide.h b/av1/common/divide.h
new file mode 100644
index 0000000..7de6c91
--- /dev/null
+++ b/av1/common/divide.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_DIVIDE_H_
+#define VP10_COMMON_DIVIDE_H_
+// An implemntation of the divide by multiply alogrithm
+// https://gmplib.org/~tege/divcnst-pldi94.pdf
+
+#include <limits.h>
+
+#include "./vpx_config.h"
+#include "aom/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+struct fastdiv_elem {
+ unsigned mult;
+ unsigned shift;
+};
+
+extern const struct fastdiv_elem vp10_fastdiv_tab[256];
+
+static INLINE unsigned fastdiv(unsigned x, int y) {
+ unsigned t =
+ ((uint64_t)x * vp10_fastdiv_tab[y].mult) >> (sizeof(x) * CHAR_BIT);
+ return (t + x) >> vp10_fastdiv_tab[y].shift;
+}
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+#endif // VP10_COMMON_DIVIDE_H_
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
new file mode 100644
index 0000000..be96c42
--- /dev/null
+++ b/av1/common/entropy.c
@@ -0,0 +1,2922 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/entropy.h"
+#include "av1/common/blockd.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/entropymode.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom/vpx_integer.h"
+
+// Unconstrained Node Tree
+/* clang-format off */
+const vpx_tree_index vp10_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
+ 2, 6, // 0 = LOW_VAL
+ -TWO_TOKEN, 4, // 1 = TWO
+ -THREE_TOKEN, -FOUR_TOKEN, // 2 = THREE
+ 8, 10, // 3 = HIGH_LOW
+ -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 4 = CAT_ONE
+ 12, 14, // 5 = CAT_THREEFOUR
+ -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 6 = CAT_THREE
+ -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE
+};
+/* clang-format on */
+
+const vpx_prob vp10_cat1_prob[] = { 159 };
+const vpx_prob vp10_cat2_prob[] = { 165, 145 };
+const vpx_prob vp10_cat3_prob[] = { 173, 148, 140 };
+const vpx_prob vp10_cat4_prob[] = { 176, 155, 140, 135 };
+const vpx_prob vp10_cat5_prob[] = { 180, 157, 141, 134, 130 };
+const vpx_prob vp10_cat6_prob[] = { 254, 254, 254, 252, 249, 243, 230,
+ 196, 177, 153, 140, 133, 130, 129 };
+#if CONFIG_VP9_HIGHBITDEPTH
+const vpx_prob vp10_cat1_prob_high10[] = { 159 };
+const vpx_prob vp10_cat2_prob_high10[] = { 165, 145 };
+const vpx_prob vp10_cat3_prob_high10[] = { 173, 148, 140 };
+const vpx_prob vp10_cat4_prob_high10[] = { 176, 155, 140, 135 };
+const vpx_prob vp10_cat5_prob_high10[] = { 180, 157, 141, 134, 130 };
+const vpx_prob vp10_cat6_prob_high10[] = {
+ 255, 255, 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129
+};
+const vpx_prob vp10_cat1_prob_high12[] = { 159 };
+const vpx_prob vp10_cat2_prob_high12[] = { 165, 145 };
+const vpx_prob vp10_cat3_prob_high12[] = { 173, 148, 140 };
+const vpx_prob vp10_cat4_prob_high12[] = { 176, 155, 140, 135 };
+const vpx_prob vp10_cat5_prob_high12[] = { 180, 157, 141, 134, 130 };
+const vpx_prob vp10_cat6_prob_high12[] = { 255, 255, 255, 255, 254, 254,
+ 254, 252, 249, 243, 230, 196,
+ 177, 153, 140, 133, 130, 129 };
+#endif
+
+const uint16_t band_count_table[TX_SIZES_ALL][8] = {
+ { 1, 2, 3, 4, 3, 16 - 13, 0 }, { 1, 2, 3, 4, 11, 64 - 21, 0 },
+ { 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 },
+#if CONFIG_EXT_TX
+ { 1, 2, 3, 4, 8, 32 - 18, 0 }, { 1, 2, 3, 4, 8, 32 - 18, 0 },
+ { 1, 2, 3, 4, 11, 128 - 21, 0 }, { 1, 2, 3, 4, 11, 128 - 21, 0 },
+ { 1, 2, 3, 4, 11, 512 - 21, 0 }, { 1, 2, 3, 4, 11, 512 - 21, 0 },
+#endif // CONFIG_EXT_TX
+};
+
+const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = {
+ { 0, 1, 3, 6, 10, 13, 16, 0 }, { 0, 1, 3, 6, 10, 21, 64, 0 },
+ { 0, 1, 3, 6, 10, 21, 256, 0 }, { 0, 1, 3, 6, 10, 21, 1024, 0 },
+#if CONFIG_EXT_TX
+ { 0, 1, 3, 6, 10, 18, 32, 0 }, { 0, 1, 3, 6, 10, 18, 32, 0 },
+ { 0, 1, 3, 6, 10, 21, 128, 0 }, { 0, 1, 3, 6, 10, 21, 128, 0 },
+ { 0, 1, 3, 6, 10, 21, 512, 0 }, { 0, 1, 3, 6, 10, 21, 512, 0 },
+#endif // CONFIG_EXT_TX
+};
+
+const uint8_t vp10_coefband_trans_8x8plus[1024] = {
+ 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
+ // beyond MAXBAND_INDEX+1 all values are filled as 5
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+};
+
+#if CONFIG_EXT_TX
+const uint8_t vp10_coefband_trans_4x8_8x4[32] = {
+ 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
+ 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+};
+#endif // CONFIG_EXT_TX
+
+const uint8_t vp10_coefband_trans_4x4[16] = {
+ 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
+};
+
+const uint8_t vp10_pt_energy_class[ENTROPY_TOKENS] = { 0, 1, 2, 3, 3, 4,
+ 4, 5, 5, 5, 5, 5 };
+
+// Model obtained from a 2-sided zero-centered distribution derived
+// from a Pareto distribution. The cdf of the distribution is:
+// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta]
+//
+// For a given beta and a given probablity of the 1-node, the alpha
+// is first solved, and then the {alpha, beta} pair is used to generate
+// the probabilities for the rest of the nodes.
+
+// beta = 8
+
+// Every odd line in this table can be generated from the even lines
+// by averaging :
+// vp10_pareto8_full[l][node] = (vp10_pareto8_full[l-1][node] +
+// vp10_pareto8_full[l+1][node] ) >> 1;
+const vpx_prob vp10_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = {
+ { 3, 86, 128, 6, 86, 23, 88, 29 },
+ { 6, 86, 128, 11, 87, 42, 91, 52 },
+ { 9, 86, 129, 17, 88, 61, 94, 76 },
+ { 12, 86, 129, 22, 88, 77, 97, 93 },
+ { 15, 87, 129, 28, 89, 93, 100, 110 },
+ { 17, 87, 129, 33, 90, 105, 103, 123 },
+ { 20, 88, 130, 38, 91, 118, 106, 136 },
+ { 23, 88, 130, 43, 91, 128, 108, 146 },
+ { 26, 89, 131, 48, 92, 139, 111, 156 },
+ { 28, 89, 131, 53, 93, 147, 114, 163 },
+ { 31, 90, 131, 58, 94, 156, 117, 171 },
+ { 34, 90, 131, 62, 94, 163, 119, 177 },
+ { 37, 90, 132, 66, 95, 171, 122, 184 },
+ { 39, 90, 132, 70, 96, 177, 124, 189 },
+ { 42, 91, 132, 75, 97, 183, 127, 194 },
+ { 44, 91, 132, 79, 97, 188, 129, 198 },
+ { 47, 92, 133, 83, 98, 193, 132, 202 },
+ { 49, 92, 133, 86, 99, 197, 134, 205 },
+ { 52, 93, 133, 90, 100, 201, 137, 208 },
+ { 54, 93, 133, 94, 100, 204, 139, 211 },
+ { 57, 94, 134, 98, 101, 208, 142, 214 },
+ { 59, 94, 134, 101, 102, 211, 144, 216 },
+ { 62, 94, 135, 105, 103, 214, 146, 218 },
+ { 64, 94, 135, 108, 103, 216, 148, 220 },
+ { 66, 95, 135, 111, 104, 219, 151, 222 },
+ { 68, 95, 135, 114, 105, 221, 153, 223 },
+ { 71, 96, 136, 117, 106, 224, 155, 225 },
+ { 73, 96, 136, 120, 106, 225, 157, 226 },
+ { 76, 97, 136, 123, 107, 227, 159, 228 },
+ { 78, 97, 136, 126, 108, 229, 160, 229 },
+ { 80, 98, 137, 129, 109, 231, 162, 231 },
+ { 82, 98, 137, 131, 109, 232, 164, 232 },
+ { 84, 98, 138, 134, 110, 234, 166, 233 },
+ { 86, 98, 138, 137, 111, 235, 168, 234 },
+ { 89, 99, 138, 140, 112, 236, 170, 235 },
+ { 91, 99, 138, 142, 112, 237, 171, 235 },
+ { 93, 100, 139, 145, 113, 238, 173, 236 },
+ { 95, 100, 139, 147, 114, 239, 174, 237 },
+ { 97, 101, 140, 149, 115, 240, 176, 238 },
+ { 99, 101, 140, 151, 115, 241, 177, 238 },
+ { 101, 102, 140, 154, 116, 242, 179, 239 },
+ { 103, 102, 140, 156, 117, 242, 180, 239 },
+ { 105, 103, 141, 158, 118, 243, 182, 240 },
+ { 107, 103, 141, 160, 118, 243, 183, 240 },
+ { 109, 104, 141, 162, 119, 244, 185, 241 },
+ { 111, 104, 141, 164, 119, 244, 186, 241 },
+ { 113, 104, 142, 166, 120, 245, 187, 242 },
+ { 114, 104, 142, 168, 121, 245, 188, 242 },
+ { 116, 105, 143, 170, 122, 246, 190, 243 },
+ { 118, 105, 143, 171, 122, 246, 191, 243 },
+ { 120, 106, 143, 173, 123, 247, 192, 244 },
+ { 121, 106, 143, 175, 124, 247, 193, 244 },
+ { 123, 107, 144, 177, 125, 248, 195, 244 },
+ { 125, 107, 144, 178, 125, 248, 196, 244 },
+ { 127, 108, 145, 180, 126, 249, 197, 245 },
+ { 128, 108, 145, 181, 127, 249, 198, 245 },
+ { 130, 109, 145, 183, 128, 249, 199, 245 },
+ { 132, 109, 145, 184, 128, 249, 200, 245 },
+ { 134, 110, 146, 186, 129, 250, 201, 246 },
+ { 135, 110, 146, 187, 130, 250, 202, 246 },
+ { 137, 111, 147, 189, 131, 251, 203, 246 },
+ { 138, 111, 147, 190, 131, 251, 204, 246 },
+ { 140, 112, 147, 192, 132, 251, 205, 247 },
+ { 141, 112, 147, 193, 132, 251, 206, 247 },
+ { 143, 113, 148, 194, 133, 251, 207, 247 },
+ { 144, 113, 148, 195, 134, 251, 207, 247 },
+ { 146, 114, 149, 197, 135, 252, 208, 248 },
+ { 147, 114, 149, 198, 135, 252, 209, 248 },
+ { 149, 115, 149, 199, 136, 252, 210, 248 },
+ { 150, 115, 149, 200, 137, 252, 210, 248 },
+ { 152, 115, 150, 201, 138, 252, 211, 248 },
+ { 153, 115, 150, 202, 138, 252, 212, 248 },
+ { 155, 116, 151, 204, 139, 253, 213, 249 },
+ { 156, 116, 151, 205, 139, 253, 213, 249 },
+ { 158, 117, 151, 206, 140, 253, 214, 249 },
+ { 159, 117, 151, 207, 141, 253, 215, 249 },
+ { 161, 118, 152, 208, 142, 253, 216, 249 },
+ { 162, 118, 152, 209, 142, 253, 216, 249 },
+ { 163, 119, 153, 210, 143, 253, 217, 249 },
+ { 164, 119, 153, 211, 143, 253, 217, 249 },
+ { 166, 120, 153, 212, 144, 254, 218, 250 },
+ { 167, 120, 153, 212, 145, 254, 219, 250 },
+ { 168, 121, 154, 213, 146, 254, 220, 250 },
+ { 169, 121, 154, 214, 146, 254, 220, 250 },
+ { 171, 122, 155, 215, 147, 254, 221, 250 },
+ { 172, 122, 155, 216, 147, 254, 221, 250 },
+ { 173, 123, 155, 217, 148, 254, 222, 250 },
+ { 174, 123, 155, 217, 149, 254, 222, 250 },
+ { 176, 124, 156, 218, 150, 254, 223, 250 },
+ { 177, 124, 156, 219, 150, 254, 223, 250 },
+ { 178, 125, 157, 220, 151, 254, 224, 251 },
+ { 179, 125, 157, 220, 151, 254, 224, 251 },
+ { 180, 126, 157, 221, 152, 254, 225, 251 },
+ { 181, 126, 157, 221, 152, 254, 225, 251 },
+ { 183, 127, 158, 222, 153, 254, 226, 251 },
+ { 184, 127, 158, 223, 154, 254, 226, 251 },
+ { 185, 128, 159, 224, 155, 255, 227, 251 },
+ { 186, 128, 159, 224, 155, 255, 227, 251 },
+ { 187, 129, 160, 225, 156, 255, 228, 251 },
+ { 188, 130, 160, 225, 156, 255, 228, 251 },
+ { 189, 131, 160, 226, 157, 255, 228, 251 },
+ { 190, 131, 160, 226, 158, 255, 228, 251 },
+ { 191, 132, 161, 227, 159, 255, 229, 251 },
+ { 192, 132, 161, 227, 159, 255, 229, 251 },
+ { 193, 133, 162, 228, 160, 255, 230, 252 },
+ { 194, 133, 162, 229, 160, 255, 230, 252 },
+ { 195, 134, 163, 230, 161, 255, 231, 252 },
+ { 196, 134, 163, 230, 161, 255, 231, 252 },
+ { 197, 135, 163, 231, 162, 255, 231, 252 },
+ { 198, 135, 163, 231, 162, 255, 231, 252 },
+ { 199, 136, 164, 232, 163, 255, 232, 252 },
+ { 200, 136, 164, 232, 164, 255, 232, 252 },
+ { 201, 137, 165, 233, 165, 255, 233, 252 },
+ { 201, 137, 165, 233, 165, 255, 233, 252 },
+ { 202, 138, 166, 233, 166, 255, 233, 252 },
+ { 203, 138, 166, 233, 166, 255, 233, 252 },
+ { 204, 139, 166, 234, 167, 255, 234, 252 },
+ { 205, 139, 166, 234, 167, 255, 234, 252 },
+ { 206, 140, 167, 235, 168, 255, 235, 252 },
+ { 206, 140, 167, 235, 168, 255, 235, 252 },
+ { 207, 141, 168, 236, 169, 255, 235, 252 },
+ { 208, 141, 168, 236, 170, 255, 235, 252 },
+ { 209, 142, 169, 237, 171, 255, 236, 252 },
+ { 209, 143, 169, 237, 171, 255, 236, 252 },
+ { 210, 144, 169, 237, 172, 255, 236, 252 },
+ { 211, 144, 169, 237, 172, 255, 236, 252 },
+ { 212, 145, 170, 238, 173, 255, 237, 252 },
+ { 213, 145, 170, 238, 173, 255, 237, 252 },
+ { 214, 146, 171, 239, 174, 255, 237, 253 },
+ { 214, 146, 171, 239, 174, 255, 237, 253 },
+ { 215, 147, 172, 240, 175, 255, 238, 253 },
+ { 215, 147, 172, 240, 175, 255, 238, 253 },
+ { 216, 148, 173, 240, 176, 255, 238, 253 },
+ { 217, 148, 173, 240, 176, 255, 238, 253 },
+ { 218, 149, 173, 241, 177, 255, 239, 253 },
+ { 218, 149, 173, 241, 178, 255, 239, 253 },
+ { 219, 150, 174, 241, 179, 255, 239, 253 },
+ { 219, 151, 174, 241, 179, 255, 239, 253 },
+ { 220, 152, 175, 242, 180, 255, 240, 253 },
+ { 221, 152, 175, 242, 180, 255, 240, 253 },
+ { 222, 153, 176, 242, 181, 255, 240, 253 },
+ { 222, 153, 176, 242, 181, 255, 240, 253 },
+ { 223, 154, 177, 243, 182, 255, 240, 253 },
+ { 223, 154, 177, 243, 182, 255, 240, 253 },
+ { 224, 155, 178, 244, 183, 255, 241, 253 },
+ { 224, 155, 178, 244, 183, 255, 241, 253 },
+ { 225, 156, 178, 244, 184, 255, 241, 253 },
+ { 225, 157, 178, 244, 184, 255, 241, 253 },
+ { 226, 158, 179, 244, 185, 255, 242, 253 },
+ { 227, 158, 179, 244, 185, 255, 242, 253 },
+ { 228, 159, 180, 245, 186, 255, 242, 253 },
+ { 228, 159, 180, 245, 186, 255, 242, 253 },
+ { 229, 160, 181, 245, 187, 255, 242, 253 },
+ { 229, 160, 181, 245, 187, 255, 242, 253 },
+ { 230, 161, 182, 246, 188, 255, 243, 253 },
+ { 230, 162, 182, 246, 188, 255, 243, 253 },
+ { 231, 163, 183, 246, 189, 255, 243, 253 },
+ { 231, 163, 183, 246, 189, 255, 243, 253 },
+ { 232, 164, 184, 247, 190, 255, 243, 253 },
+ { 232, 164, 184, 247, 190, 255, 243, 253 },
+ { 233, 165, 185, 247, 191, 255, 244, 253 },
+ { 233, 165, 185, 247, 191, 255, 244, 253 },
+ { 234, 166, 185, 247, 192, 255, 244, 253 },
+ { 234, 167, 185, 247, 192, 255, 244, 253 },
+ { 235, 168, 186, 248, 193, 255, 244, 253 },
+ { 235, 168, 186, 248, 193, 255, 244, 253 },
+ { 236, 169, 187, 248, 194, 255, 244, 253 },
+ { 236, 169, 187, 248, 194, 255, 244, 253 },
+ { 236, 170, 188, 248, 195, 255, 245, 253 },
+ { 236, 170, 188, 248, 195, 255, 245, 253 },
+ { 237, 171, 189, 249, 196, 255, 245, 254 },
+ { 237, 172, 189, 249, 196, 255, 245, 254 },
+ { 238, 173, 190, 249, 197, 255, 245, 254 },
+ { 238, 173, 190, 249, 197, 255, 245, 254 },
+ { 239, 174, 191, 249, 198, 255, 245, 254 },
+ { 239, 174, 191, 249, 198, 255, 245, 254 },
+ { 240, 175, 192, 249, 199, 255, 246, 254 },
+ { 240, 176, 192, 249, 199, 255, 246, 254 },
+ { 240, 177, 193, 250, 200, 255, 246, 254 },
+ { 240, 177, 193, 250, 200, 255, 246, 254 },
+ { 241, 178, 194, 250, 201, 255, 246, 254 },
+ { 241, 178, 194, 250, 201, 255, 246, 254 },
+ { 242, 179, 195, 250, 202, 255, 246, 254 },
+ { 242, 180, 195, 250, 202, 255, 246, 254 },
+ { 242, 181, 196, 250, 203, 255, 247, 254 },
+ { 242, 181, 196, 250, 203, 255, 247, 254 },
+ { 243, 182, 197, 251, 204, 255, 247, 254 },
+ { 243, 183, 197, 251, 204, 255, 247, 254 },
+ { 244, 184, 198, 251, 205, 255, 247, 254 },
+ { 244, 184, 198, 251, 205, 255, 247, 254 },
+ { 244, 185, 199, 251, 206, 255, 247, 254 },
+ { 244, 185, 199, 251, 206, 255, 247, 254 },
+ { 245, 186, 200, 251, 207, 255, 247, 254 },
+ { 245, 187, 200, 251, 207, 255, 247, 254 },
+ { 246, 188, 201, 252, 207, 255, 248, 254 },
+ { 246, 188, 201, 252, 207, 255, 248, 254 },
+ { 246, 189, 202, 252, 208, 255, 248, 254 },
+ { 246, 190, 202, 252, 208, 255, 248, 254 },
+ { 247, 191, 203, 252, 209, 255, 248, 254 },
+ { 247, 191, 203, 252, 209, 255, 248, 254 },
+ { 247, 192, 204, 252, 210, 255, 248, 254 },
+ { 247, 193, 204, 252, 210, 255, 248, 254 },
+ { 248, 194, 205, 252, 211, 255, 248, 254 },
+ { 248, 194, 205, 252, 211, 255, 248, 254 },
+ { 248, 195, 206, 252, 212, 255, 249, 254 },
+ { 248, 196, 206, 252, 212, 255, 249, 254 },
+ { 249, 197, 207, 253, 213, 255, 249, 254 },
+ { 249, 197, 207, 253, 213, 255, 249, 254 },
+ { 249, 198, 208, 253, 214, 255, 249, 254 },
+ { 249, 199, 209, 253, 214, 255, 249, 254 },
+ { 250, 200, 210, 253, 215, 255, 249, 254 },
+ { 250, 200, 210, 253, 215, 255, 249, 254 },
+ { 250, 201, 211, 253, 215, 255, 249, 254 },
+ { 250, 202, 211, 253, 215, 255, 249, 254 },
+ { 250, 203, 212, 253, 216, 255, 249, 254 },
+ { 250, 203, 212, 253, 216, 255, 249, 254 },
+ { 251, 204, 213, 253, 217, 255, 250, 254 },
+ { 251, 205, 213, 253, 217, 255, 250, 254 },
+ { 251, 206, 214, 254, 218, 255, 250, 254 },
+ { 251, 206, 215, 254, 218, 255, 250, 254 },
+ { 252, 207, 216, 254, 219, 255, 250, 254 },
+ { 252, 208, 216, 254, 219, 255, 250, 254 },
+ { 252, 209, 217, 254, 220, 255, 250, 254 },
+ { 252, 210, 217, 254, 220, 255, 250, 254 },
+ { 252, 211, 218, 254, 221, 255, 250, 254 },
+ { 252, 212, 218, 254, 221, 255, 250, 254 },
+ { 253, 213, 219, 254, 222, 255, 250, 254 },
+ { 253, 213, 220, 254, 222, 255, 250, 254 },
+ { 253, 214, 221, 254, 223, 255, 250, 254 },
+ { 253, 215, 221, 254, 223, 255, 250, 254 },
+ { 253, 216, 222, 254, 224, 255, 251, 254 },
+ { 253, 217, 223, 254, 224, 255, 251, 254 },
+ { 253, 218, 224, 254, 225, 255, 251, 254 },
+ { 253, 219, 224, 254, 225, 255, 251, 254 },
+ { 254, 220, 225, 254, 225, 255, 251, 254 },
+ { 254, 221, 226, 254, 225, 255, 251, 254 },
+ { 254, 222, 227, 255, 226, 255, 251, 254 },
+ { 254, 223, 227, 255, 226, 255, 251, 254 },
+ { 254, 224, 228, 255, 227, 255, 251, 254 },
+ { 254, 225, 229, 255, 227, 255, 251, 254 },
+ { 254, 226, 230, 255, 228, 255, 251, 254 },
+ { 254, 227, 230, 255, 229, 255, 251, 254 },
+ { 255, 228, 231, 255, 230, 255, 251, 254 },
+ { 255, 229, 232, 255, 230, 255, 251, 254 },
+ { 255, 230, 233, 255, 231, 255, 252, 254 },
+ { 255, 231, 234, 255, 231, 255, 252, 254 },
+ { 255, 232, 235, 255, 232, 255, 252, 254 },
+ { 255, 233, 236, 255, 232, 255, 252, 254 },
+ { 255, 235, 237, 255, 233, 255, 252, 254 },
+ { 255, 236, 238, 255, 234, 255, 252, 254 },
+ { 255, 238, 240, 255, 235, 255, 252, 255 },
+ { 255, 239, 241, 255, 235, 255, 252, 254 },
+ { 255, 241, 243, 255, 236, 255, 252, 254 },
+ { 255, 243, 245, 255, 237, 255, 252, 254 },
+ { 255, 246, 247, 255, 239, 255, 253, 255 },
+};
+
+#if CONFIG_ANS
+// Model obtained from a 2-sided zero-centerd distribuition derived
+// from a Pareto distribution. The cdf of the distribution is:
+// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta]
+//
+// For a given beta and a given probablity of the 1-node, the alpha
+// is first solved, and then the {alpha, beta} pair is used to generate
+// the probabilities for the rest of the nodes.
+//
+// beta = 8
+// Values for tokens ONE_TOKEN through CATEGORY6_TOKEN included here.
+// ZERO_TOKEN and EOB_TOKEN are coded as flags outside this coder.
+const AnsP10 vp10_pareto8_token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2] = {
+ { 4, 4, 4, 4, 8, 15, 30, 57, 103, 795 },
+ { 8, 8, 8, 8, 15, 30, 57, 103, 168, 619 },
+ { 12, 12, 12, 12, 23, 43, 80, 138, 205, 487 },
+ { 16, 16, 15, 15, 30, 56, 101, 165, 225, 385 },
+ { 20, 20, 19, 19, 36, 68, 119, 186, 231, 306 },
+ { 24, 23, 23, 22, 43, 79, 135, 201, 230, 244 },
+ { 28, 27, 26, 26, 49, 89, 149, 211, 223, 196 },
+ { 32, 31, 30, 29, 55, 98, 160, 218, 212, 159 },
+ { 36, 35, 33, 32, 60, 107, 171, 221, 200, 129 },
+ { 40, 38, 37, 35, 66, 115, 179, 222, 187, 105 },
+ { 44, 42, 40, 38, 71, 122, 186, 221, 174, 86 },
+ { 48, 45, 43, 41, 76, 129, 192, 219, 160, 71 },
+ { 52, 49, 46, 44, 80, 136, 196, 215, 148, 58 },
+ { 56, 53, 49, 46, 85, 142, 200, 210, 135, 48 },
+ { 60, 56, 52, 49, 89, 147, 203, 204, 124, 40 },
+ { 64, 60, 55, 52, 93, 151, 205, 198, 113, 33 },
+ { 68, 63, 58, 54, 97, 156, 205, 192, 103, 28 },
+ { 72, 66, 61, 57, 100, 160, 206, 185, 94, 23 },
+ { 76, 70, 64, 59, 104, 163, 205, 178, 85, 20 },
+ { 80, 73, 67, 61, 107, 166, 205, 171, 77, 17 },
+ { 84, 76, 69, 63, 110, 169, 204, 164, 71, 14 },
+ { 88, 80, 72, 65, 113, 171, 202, 157, 64, 12 },
+ { 92, 83, 75, 67, 116, 173, 200, 150, 58, 10 },
+ { 96, 86, 77, 69, 118, 175, 198, 143, 53, 9 },
+ { 100, 89, 80, 71, 121, 176, 195, 137, 48, 7 },
+ { 104, 92, 82, 73, 123, 178, 192, 130, 44, 6 },
+ { 108, 96, 84, 75, 125, 178, 189, 124, 40, 5 },
+ { 112, 98, 87, 76, 127, 179, 186, 118, 36, 5 },
+ { 116, 101, 89, 78, 129, 179, 183, 112, 33, 4 },
+ { 120, 104, 91, 80, 131, 180, 179, 106, 30, 3 },
+ { 124, 107, 93, 81, 132, 180, 176, 101, 27, 3 },
+ { 128, 110, 95, 82, 134, 179, 172, 96, 25, 3 },
+ { 132, 113, 97, 84, 135, 179, 168, 91, 23, 2 },
+ { 136, 116, 99, 85, 136, 179, 164, 86, 21, 2 },
+ { 140, 119, 101, 86, 137, 178, 160, 82, 19, 2 },
+ { 144, 122, 103, 88, 138, 177, 157, 77, 17, 1 },
+ { 148, 124, 105, 89, 139, 176, 153, 73, 16, 1 },
+ { 152, 127, 107, 90, 140, 175, 149, 69, 14, 1 },
+ { 156, 130, 108, 91, 141, 173, 145, 66, 13, 1 },
+ { 160, 133, 110, 92, 141, 172, 141, 62, 12, 1 },
+ { 164, 135, 111, 93, 142, 171, 137, 59, 11, 1 },
+ { 168, 138, 113, 94, 142, 169, 133, 56, 10, 1 },
+ { 172, 140, 115, 94, 142, 168, 130, 53, 9, 1 },
+ { 176, 143, 116, 95, 143, 166, 126, 50, 8, 1 },
+ { 180, 145, 118, 96, 143, 164, 122, 47, 8, 1 },
+ { 184, 147, 119, 96, 143, 163, 119, 45, 7, 1 },
+ { 188, 150, 120, 97, 143, 161, 116, 42, 6, 1 },
+ { 192, 152, 121, 98, 143, 159, 112, 40, 6, 1 },
+ { 196, 155, 123, 98, 142, 157, 109, 38, 5, 1 },
+ { 200, 157, 124, 99, 142, 155, 105, 36, 5, 1 },
+ { 204, 159, 125, 99, 142, 153, 102, 34, 5, 1 },
+ { 208, 161, 126, 100, 142, 151, 99, 32, 4, 1 },
+ { 212, 164, 127, 100, 141, 149, 96, 30, 4, 1 },
+ { 216, 166, 129, 100, 141, 147, 93, 28, 3, 1 },
+ { 220, 168, 130, 101, 140, 144, 90, 27, 3, 1 },
+ { 224, 170, 131, 101, 140, 142, 87, 25, 3, 1 },
+ { 228, 172, 132, 101, 139, 140, 84, 24, 3, 1 },
+ { 232, 174, 132, 101, 139, 138, 81, 23, 3, 1 },
+ { 236, 176, 133, 101, 138, 136, 79, 22, 2, 1 },
+ { 240, 178, 134, 102, 137, 134, 76, 20, 2, 1 },
+ { 244, 180, 135, 102, 136, 131, 74, 19, 2, 1 },
+ { 248, 182, 135, 102, 136, 129, 71, 18, 2, 1 },
+ { 252, 184, 136, 101, 135, 127, 69, 17, 2, 1 },
+ { 256, 186, 137, 102, 134, 124, 66, 16, 2, 1 },
+ { 260, 188, 138, 102, 133, 122, 64, 15, 1, 1 },
+ { 264, 190, 138, 101, 132, 120, 62, 15, 1, 1 },
+ { 268, 191, 139, 101, 131, 118, 60, 14, 1, 1 },
+ { 272, 193, 139, 101, 130, 116, 58, 13, 1, 1 },
+ { 276, 195, 139, 101, 129, 114, 56, 12, 1, 1 },
+ { 280, 196, 140, 101, 128, 111, 54, 12, 1, 1 },
+ { 284, 198, 140, 101, 127, 109, 52, 11, 1, 1 },
+ { 288, 200, 141, 100, 126, 107, 50, 10, 1, 1 },
+ { 292, 201, 141, 100, 125, 105, 48, 10, 1, 1 },
+ { 296, 203, 141, 100, 123, 103, 47, 9, 1, 1 },
+ { 300, 204, 142, 99, 122, 101, 45, 9, 1, 1 },
+ { 304, 206, 142, 99, 121, 99, 43, 8, 1, 1 },
+ { 308, 207, 142, 99, 119, 97, 42, 8, 1, 1 },
+ { 312, 209, 142, 99, 118, 95, 40, 7, 1, 1 },
+ { 316, 210, 142, 98, 117, 93, 39, 7, 1, 1 },
+ { 320, 211, 142, 98, 116, 91, 37, 7, 1, 1 },
+ { 324, 213, 142, 97, 115, 89, 36, 6, 1, 1 },
+ { 328, 214, 142, 97, 113, 87, 35, 6, 1, 1 },
+ { 332, 215, 143, 96, 112, 85, 33, 6, 1, 1 },
+ { 336, 216, 143, 96, 111, 83, 32, 5, 1, 1 },
+ { 340, 218, 143, 95, 109, 81, 31, 5, 1, 1 },
+ { 344, 219, 142, 95, 108, 79, 30, 5, 1, 1 },
+ { 348, 220, 142, 94, 107, 78, 29, 4, 1, 1 },
+ { 352, 221, 142, 94, 105, 76, 28, 4, 1, 1 },
+ { 356, 222, 142, 93, 104, 74, 27, 4, 1, 1 },
+ { 360, 223, 142, 92, 103, 72, 26, 4, 1, 1 },
+ { 364, 224, 142, 92, 101, 70, 25, 4, 1, 1 },
+ { 368, 225, 142, 91, 100, 69, 24, 3, 1, 1 },
+ { 372, 226, 141, 91, 99, 67, 23, 3, 1, 1 },
+ { 376, 227, 141, 90, 97, 66, 22, 3, 1, 1 },
+ { 380, 228, 141, 89, 96, 64, 21, 3, 1, 1 },
+ { 384, 229, 140, 89, 95, 62, 20, 3, 1, 1 },
+ { 388, 229, 140, 88, 93, 61, 20, 3, 1, 1 },
+ { 392, 230, 140, 87, 92, 60, 19, 2, 1, 1 },
+ { 396, 231, 140, 86, 91, 58, 18, 2, 1, 1 },
+ { 400, 232, 139, 86, 89, 57, 17, 2, 1, 1 },
+ { 404, 232, 139, 85, 88, 55, 17, 2, 1, 1 },
+ { 408, 233, 138, 84, 87, 54, 16, 2, 1, 1 },
+ { 412, 234, 138, 84, 85, 52, 15, 2, 1, 1 },
+ { 416, 234, 137, 83, 84, 51, 15, 2, 1, 1 },
+ { 420, 235, 137, 82, 82, 50, 14, 2, 1, 1 },
+ { 424, 236, 136, 81, 81, 48, 14, 2, 1, 1 },
+ { 428, 236, 136, 81, 80, 47, 13, 1, 1, 1 },
+ { 432, 236, 135, 80, 79, 46, 13, 1, 1, 1 },
+ { 436, 237, 135, 79, 77, 45, 12, 1, 1, 1 },
+ { 440, 238, 134, 78, 76, 43, 12, 1, 1, 1 },
+ { 444, 238, 134, 77, 75, 42, 11, 1, 1, 1 },
+ { 448, 238, 133, 77, 73, 41, 11, 1, 1, 1 },
+ { 452, 239, 132, 76, 72, 40, 10, 1, 1, 1 },
+ { 456, 239, 131, 75, 71, 39, 10, 1, 1, 1 },
+ { 460, 239, 131, 74, 70, 38, 9, 1, 1, 1 },
+ { 464, 240, 130, 73, 68, 37, 9, 1, 1, 1 },
+ { 468, 240, 129, 72, 67, 36, 9, 1, 1, 1 },
+ { 472, 240, 128, 72, 66, 35, 8, 1, 1, 1 },
+ { 476, 240, 127, 71, 65, 34, 8, 1, 1, 1 },
+ { 480, 240, 127, 70, 63, 33, 8, 1, 1, 1 },
+ { 484, 241, 126, 69, 62, 32, 7, 1, 1, 1 },
+ { 488, 241, 125, 68, 61, 31, 7, 1, 1, 1 },
+ { 492, 241, 124, 67, 60, 30, 7, 1, 1, 1 },
+ { 496, 241, 124, 66, 59, 29, 6, 1, 1, 1 },
+ { 500, 240, 123, 66, 58, 28, 6, 1, 1, 1 },
+ { 504, 240, 122, 65, 57, 27, 6, 1, 1, 1 },
+ { 508, 240, 121, 64, 55, 27, 6, 1, 1, 1 },
+ { 512, 241, 120, 63, 54, 26, 5, 1, 1, 1 },
+ { 516, 241, 119, 62, 53, 25, 5, 1, 1, 1 },
+ { 520, 240, 118, 62, 52, 24, 5, 1, 1, 1 },
+ { 524, 240, 117, 60, 51, 24, 5, 1, 1, 1 },
+ { 528, 239, 116, 60, 50, 23, 5, 1, 1, 1 },
+ { 532, 239, 116, 59, 49, 22, 4, 1, 1, 1 },
+ { 536, 239, 115, 58, 48, 21, 4, 1, 1, 1 },
+ { 540, 239, 113, 57, 47, 21, 4, 1, 1, 1 },
+ { 544, 238, 113, 56, 46, 20, 4, 1, 1, 1 },
+ { 548, 238, 112, 55, 45, 19, 4, 1, 1, 1 },
+ { 552, 238, 110, 55, 44, 19, 3, 1, 1, 1 },
+ { 556, 237, 110, 54, 43, 18, 3, 1, 1, 1 },
+ { 560, 237, 108, 53, 42, 18, 3, 1, 1, 1 },
+ { 564, 236, 108, 52, 41, 17, 3, 1, 1, 1 },
+ { 568, 236, 106, 51, 40, 17, 3, 1, 1, 1 },
+ { 572, 235, 105, 51, 39, 16, 3, 1, 1, 1 },
+ { 576, 235, 104, 50, 38, 15, 3, 1, 1, 1 },
+ { 580, 234, 103, 49, 37, 15, 3, 1, 1, 1 },
+ { 584, 234, 102, 48, 37, 14, 2, 1, 1, 1 },
+ { 588, 233, 101, 47, 36, 14, 2, 1, 1, 1 },
+ { 592, 233, 100, 46, 35, 13, 2, 1, 1, 1 },
+ { 596, 231, 99, 46, 34, 13, 2, 1, 1, 1 },
+ { 600, 230, 98, 45, 33, 13, 2, 1, 1, 1 },
+ { 604, 230, 97, 44, 32, 12, 2, 1, 1, 1 },
+ { 608, 229, 96, 43, 31, 12, 2, 1, 1, 1 },
+ { 612, 228, 95, 42, 31, 11, 2, 1, 1, 1 },
+ { 616, 227, 93, 42, 30, 11, 2, 1, 1, 1 },
+ { 620, 227, 92, 41, 29, 10, 2, 1, 1, 1 },
+ { 624, 226, 92, 40, 28, 10, 1, 1, 1, 1 },
+ { 628, 225, 90, 39, 28, 10, 1, 1, 1, 1 },
+ { 632, 224, 89, 39, 27, 9, 1, 1, 1, 1 },
+ { 636, 223, 88, 38, 26, 9, 1, 1, 1, 1 },
+ { 640, 222, 87, 37, 25, 9, 1, 1, 1, 1 },
+ { 644, 221, 86, 36, 25, 8, 1, 1, 1, 1 },
+ { 648, 220, 84, 36, 24, 8, 1, 1, 1, 1 },
+ { 652, 219, 83, 35, 23, 8, 1, 1, 1, 1 },
+ { 656, 218, 82, 34, 23, 7, 1, 1, 1, 1 },
+ { 660, 217, 81, 33, 22, 7, 1, 1, 1, 1 },
+ { 664, 215, 80, 33, 21, 7, 1, 1, 1, 1 },
+ { 668, 214, 78, 32, 21, 7, 1, 1, 1, 1 },
+ { 672, 213, 78, 31, 20, 6, 1, 1, 1, 1 },
+ { 676, 211, 76, 31, 20, 6, 1, 1, 1, 1 },
+ { 680, 210, 75, 30, 19, 6, 1, 1, 1, 1 },
+ { 684, 209, 74, 29, 18, 6, 1, 1, 1, 1 },
+ { 688, 208, 73, 28, 18, 5, 1, 1, 1, 1 },
+ { 692, 206, 72, 28, 17, 5, 1, 1, 1, 1 },
+ { 696, 205, 70, 27, 17, 5, 1, 1, 1, 1 },
+ { 700, 203, 69, 27, 16, 5, 1, 1, 1, 1 },
+ { 704, 201, 68, 26, 16, 5, 1, 1, 1, 1 },
+ { 708, 201, 67, 25, 15, 4, 1, 1, 1, 1 },
+ { 712, 198, 66, 25, 15, 4, 1, 1, 1, 1 },
+ { 716, 197, 65, 24, 14, 4, 1, 1, 1, 1 },
+ { 720, 196, 63, 23, 14, 4, 1, 1, 1, 1 },
+ { 724, 194, 62, 23, 13, 4, 1, 1, 1, 1 },
+ { 728, 193, 61, 22, 13, 3, 1, 1, 1, 1 },
+ { 732, 191, 60, 22, 12, 3, 1, 1, 1, 1 },
+ { 736, 189, 59, 21, 12, 3, 1, 1, 1, 1 },
+ { 740, 188, 58, 20, 11, 3, 1, 1, 1, 1 },
+ { 744, 186, 56, 20, 11, 3, 1, 1, 1, 1 },
+ { 748, 184, 55, 19, 11, 3, 1, 1, 1, 1 },
+ { 752, 182, 54, 19, 10, 3, 1, 1, 1, 1 },
+ { 756, 181, 53, 18, 10, 2, 1, 1, 1, 1 },
+ { 760, 179, 52, 18, 9, 2, 1, 1, 1, 1 },
+ { 764, 177, 51, 17, 9, 2, 1, 1, 1, 1 },
+ { 768, 174, 50, 17, 9, 2, 1, 1, 1, 1 },
+ { 772, 173, 49, 16, 8, 2, 1, 1, 1, 1 },
+ { 776, 171, 47, 16, 8, 2, 1, 1, 1, 1 },
+ { 780, 169, 46, 15, 8, 2, 1, 1, 1, 1 },
+ { 784, 167, 45, 15, 7, 2, 1, 1, 1, 1 },
+ { 788, 165, 44, 14, 7, 2, 1, 1, 1, 1 },
+ { 792, 162, 43, 14, 7, 2, 1, 1, 1, 1 },
+ { 796, 161, 42, 13, 7, 1, 1, 1, 1, 1 },
+ { 800, 159, 41, 13, 6, 1, 1, 1, 1, 1 },
+ { 804, 157, 40, 12, 6, 1, 1, 1, 1, 1 },
+ { 808, 154, 39, 12, 6, 1, 1, 1, 1, 1 },
+ { 812, 153, 38, 11, 5, 1, 1, 1, 1, 1 },
+ { 816, 150, 37, 11, 5, 1, 1, 1, 1, 1 },
+ { 820, 148, 36, 10, 5, 1, 1, 1, 1, 1 },
+ { 824, 145, 35, 10, 5, 1, 1, 1, 1, 1 },
+ { 828, 143, 34, 10, 4, 1, 1, 1, 1, 1 },
+ { 832, 141, 33, 9, 4, 1, 1, 1, 1, 1 },
+ { 836, 138, 32, 9, 4, 1, 1, 1, 1, 1 },
+ { 840, 136, 30, 9, 4, 1, 1, 1, 1, 1 },
+ { 844, 133, 30, 8, 4, 1, 1, 1, 1, 1 },
+ { 848, 131, 29, 8, 3, 1, 1, 1, 1, 1 },
+ { 852, 129, 28, 7, 3, 1, 1, 1, 1, 1 },
+ { 856, 126, 27, 7, 3, 1, 1, 1, 1, 1 },
+ { 860, 123, 26, 7, 3, 1, 1, 1, 1, 1 },
+ { 864, 121, 25, 6, 3, 1, 1, 1, 1, 1 },
+ { 868, 118, 24, 6, 3, 1, 1, 1, 1, 1 },
+ { 872, 116, 23, 6, 2, 1, 1, 1, 1, 1 },
+ { 876, 113, 22, 6, 2, 1, 1, 1, 1, 1 },
+ { 880, 111, 21, 5, 2, 1, 1, 1, 1, 1 },
+ { 884, 108, 20, 5, 2, 1, 1, 1, 1, 1 },
+ { 888, 105, 19, 5, 2, 1, 1, 1, 1, 1 },
+ { 892, 102, 19, 4, 2, 1, 1, 1, 1, 1 },
+ { 896, 99, 18, 4, 2, 1, 1, 1, 1, 1 },
+ { 900, 97, 17, 4, 1, 1, 1, 1, 1, 1 },
+ { 904, 94, 16, 4, 1, 1, 1, 1, 1, 1 },
+ { 908, 92, 15, 3, 1, 1, 1, 1, 1, 1 },
+ { 912, 89, 14, 3, 1, 1, 1, 1, 1, 1 },
+ { 916, 85, 14, 3, 1, 1, 1, 1, 1, 1 },
+ { 920, 82, 13, 3, 1, 1, 1, 1, 1, 1 },
+ { 924, 79, 12, 3, 1, 1, 1, 1, 1, 1 },
+ { 928, 77, 11, 2, 1, 1, 1, 1, 1, 1 },
+ { 932, 73, 11, 2, 1, 1, 1, 1, 1, 1 },
+ { 936, 70, 10, 2, 1, 1, 1, 1, 1, 1 },
+ { 940, 67, 9, 2, 1, 1, 1, 1, 1, 1 },
+ { 944, 64, 8, 2, 1, 1, 1, 1, 1, 1 },
+ { 948, 60, 8, 2, 1, 1, 1, 1, 1, 1 },
+ { 952, 58, 7, 1, 1, 1, 1, 1, 1, 1 },
+ { 956, 54, 7, 1, 1, 1, 1, 1, 1, 1 },
+ { 960, 51, 6, 1, 1, 1, 1, 1, 1, 1 },
+ { 964, 48, 5, 1, 1, 1, 1, 1, 1, 1 },
+ { 968, 44, 5, 1, 1, 1, 1, 1, 1, 1 },
+ { 972, 41, 4, 1, 1, 1, 1, 1, 1, 1 },
+ { 976, 37, 4, 1, 1, 1, 1, 1, 1, 1 },
+ { 980, 34, 3, 1, 1, 1, 1, 1, 1, 1 },
+ { 984, 30, 3, 1, 1, 1, 1, 1, 1, 1 },
+ { 988, 27, 2, 1, 1, 1, 1, 1, 1, 1 },
+ { 992, 23, 2, 1, 1, 1, 1, 1, 1, 1 },
+ { 996, 19, 2, 1, 1, 1, 1, 1, 1, 1 },
+ { 1000, 16, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1004, 12, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1008, 8, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1012, 4, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1015, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+};
+#endif // CONFIG_ANS
+
+/* clang-format off */
+#if CONFIG_ENTROPY
+const vp10_coeff_probs_model
+default_qctx_coef_probs[QCTX_BINS][TX_SIZES][PLANE_TYPES] = {
+ { // Q_Index 0
+ { // TX_SIZE 0
+ { // Y plane
+ { // Intra
+ { // band 0
+ {182, 34, 137}, { 79, 39, 103}, { 10, 28, 51},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 45, 88, 147}, { 46, 80, 140}, { 25, 69, 119},
+ { 12, 57, 96}, { 4, 41, 65}, { 1, 20, 31},
+ },
+ { // band 2
+ { 58, 124, 190}, { 39, 106, 178}, { 16, 86, 147},
+ { 7, 69, 114}, { 3, 50, 80}, { 1, 25, 42},
+ },
+ { // band 3
+ { 90, 138, 215}, { 54, 116, 198}, { 18, 86, 155},
+ { 5, 62, 112}, { 1, 38, 68}, { 1, 17, 30},
+ },
+ { // band 4
+ {126, 149, 231}, { 82, 114, 211}, { 21, 80, 157},
+ { 6, 56, 105}, { 1, 36, 64}, { 1, 17, 31},
+ },
+ { // band 5
+ {171, 56, 236}, {140, 54, 219}, { 57, 45, 167},
+ { 26, 36, 113}, { 11, 29, 72}, { 3, 18, 39},
+ },
+ },
+ { // Intra
+ { // band 0
+ {153, 122, 186}, {106, 109, 171}, { 36, 84, 128},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 27, 151, 201}, { 34, 131, 199}, { 23, 102, 161},
+ { 10, 80, 120}, { 4, 52, 78}, { 1, 24, 37},
+ },
+ { // band 2
+ { 43, 158, 213}, { 35, 133, 203}, { 8, 92, 151},
+ { 2, 64, 106}, { 1, 36, 60}, { 1, 13, 24},
+ },
+ { // band 3
+ { 68, 167, 223}, { 36, 135, 211}, { 9, 94, 157},
+ { 2, 67, 112}, { 1, 40, 68}, { 1, 17, 31},
+ },
+ { // band 4
+ {131, 146, 237}, { 72, 119, 223}, { 17, 82, 164},
+ { 4, 55, 107}, { 1, 34, 63}, { 1, 16, 29},
+ },
+ { // band 5
+ {184, 68, 244}, {153, 59, 232}, { 68, 51, 179},
+ { 31, 40, 123}, { 13, 29, 77}, { 4, 17, 37},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {203, 41, 203}, {127, 56, 174}, { 49, 56, 127},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {110, 121, 217}, {119, 113, 213}, { 64, 95, 185},
+ { 30, 72, 144}, { 8, 42, 76}, { 2, 17, 25},
+ },
+ { // band 2
+ {127, 159, 229}, {115, 134, 223}, { 36, 100, 189},
+ { 11, 75, 142}, { 3, 48, 83}, { 1, 19, 33},
+ },
+ { // band 3
+ {150, 172, 241}, { 90, 133, 231}, { 28, 102, 192},
+ { 7, 81, 147}, { 1, 53, 91}, { 1, 25, 42},
+ },
+ { // band 4
+ {184, 144, 248}, {114, 117, 237}, { 37, 89, 192},
+ { 10, 63, 130}, { 4, 42, 76}, { 1, 19, 38},
+ },
+ { // band 5
+ {207, 79, 250}, {179, 74, 241}, { 83, 67, 199},
+ { 38, 51, 142}, { 17, 37, 97}, { 10, 14, 55},
+ },
+ },
+ { // Inter
+ { // band 0
+ {220, 82, 232}, {150, 93, 214}, { 66, 95, 177},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {116, 160, 227}, {136, 141, 227}, { 67, 114, 190},
+ { 40, 94, 148}, { 21, 70, 107}, { 10, 43, 63},
+ },
+ { // band 2
+ {124, 173, 235}, {105, 147, 226}, { 27, 107, 184},
+ { 10, 80, 142}, { 3, 50, 86}, { 1, 16, 32},
+ },
+ { // band 3
+ {149, 179, 243}, { 89, 147, 234}, { 29, 112, 193},
+ { 9, 94, 157}, { 1, 64, 111}, { 1, 25, 43},
+ },
+ { // band 4
+ {187, 153, 248}, {127, 130, 241}, { 52, 99, 202},
+ { 20, 79, 152}, { 4, 50, 93}, { 1, 19, 32},
+ },
+ { // band 5
+ {215, 82, 251}, {195, 80, 246}, { 93, 70, 204},
+ { 39, 54, 147}, { 14, 33, 88}, { 6, 14, 39},
+ },
+ },
+ },
+ },
+ { // TX_SIZE 1
+ { // Y plane
+ { // Intra
+ { // band 0
+ {116, 43, 131}, { 39, 41, 94}, { 4, 28, 47},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 28, 101, 141}, { 27, 95, 140}, { 18, 80, 121},
+ { 10, 61, 95}, { 4, 39, 60}, { 1, 19, 26},
+ },
+ { // band 2
+ { 29, 150, 183}, { 19, 127, 175}, { 8, 98, 147},
+ { 3, 76, 115}, { 1, 55, 84}, { 1, 29, 43},
+ },
+ { // band 3
+ { 26, 168, 202}, { 12, 138, 188}, { 2, 98, 149},
+ { 1, 69, 110}, { 1, 40, 65}, { 1, 17, 25},
+ },
+ { // band 4
+ { 33, 188, 225}, { 12, 155, 207}, { 2, 101, 155},
+ { 1, 65, 106}, { 1, 36, 60}, { 1, 18, 26},
+ },
+ { // band 5
+ { 79, 205, 242}, { 30, 168, 224}, { 5, 106, 164},
+ { 1, 68, 110}, { 1, 39, 65}, { 1, 18, 28},
+ },
+ },
+ { // Intra
+ { // band 0
+ { 96, 80, 201}, { 51, 88, 168}, { 14, 78, 116},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 6, 167, 216}, { 32, 152, 211}, { 24, 121, 182},
+ { 13, 98, 149}, { 12, 76, 108}, { 8, 48, 62},
+ },
+ { // band 2
+ { 17, 176, 225}, { 13, 147, 209}, { 3, 96, 155},
+ { 1, 65, 108}, { 2, 43, 63}, { 2, 23, 25},
+ },
+ { // band 3
+ { 18, 183, 232}, { 10, 153, 214}, { 1, 96, 154},
+ { 1, 63, 105}, { 1, 39, 59}, { 1, 21, 24},
+ },
+ { // band 4
+ { 23, 191, 239}, { 8, 159, 221}, { 1, 97, 158},
+ { 1, 61, 105}, { 1, 37, 60}, { 1, 20, 26},
+ },
+ { // band 5
+ { 70, 201, 243}, { 29, 163, 228}, { 4, 102, 169},
+ { 1, 67, 114}, { 1, 39, 66}, { 1, 17, 29},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {181, 38, 192}, { 95, 47, 151}, { 29, 49, 102},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 72, 131, 202}, { 93, 120, 205}, { 50, 103, 179},
+ { 24, 79, 143}, { 11, 47, 78}, { 7, 19, 25},
+ },
+ { // band 2
+ { 84, 176, 221}, { 56, 144, 214}, { 21, 108, 182},
+ { 8, 83, 139}, { 3, 55, 90}, { 2, 27, 41},
+ },
+ { // band 3
+ { 84, 195, 234}, { 42, 156, 222}, { 10, 109, 180},
+ { 4, 77, 133}, { 1, 48, 80}, { 1, 23, 35},
+ },
+ { // band 4
+ { 89, 210, 238}, { 35, 165, 221}, { 6, 106, 172},
+ { 2, 70, 123}, { 1, 44, 74}, { 1, 21, 30},
+ },
+ { // band 5
+ {114, 221, 247}, { 49, 170, 234}, { 7, 113, 184},
+ { 2, 77, 132}, { 1, 48, 79}, { 1, 25, 33},
+ },
+ },
+ { // Inter
+ { // band 0
+ {192, 66, 237}, {113, 84, 211}, { 35, 84, 154},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 81, 180, 234}, {127, 165, 229}, { 58, 137, 204},
+ { 41, 114, 174}, { 44, 94, 136}, { 29, 66, 86},
+ },
+ { // band 2
+ { 82, 193, 240}, { 39, 162, 223}, { 8, 113, 179},
+ { 3, 83, 136}, { 6, 62, 84}, { 5, 45, 45},
+ },
+ { // band 3
+ { 78, 203, 242}, { 31, 170, 227}, { 4, 115, 181},
+ { 1, 82, 135}, { 2, 59, 82}, { 1, 45, 47},
+ },
+ { // band 4
+ { 76, 210, 239}, { 25, 170, 213}, { 2, 99, 152},
+ { 1, 69, 115}, { 1, 49, 80}, { 1, 47, 57},
+ },
+ { // band 5
+ {103, 217, 250}, { 42, 180, 237}, { 3, 124, 191},
+ { 1, 90, 150}, { 1, 69, 116}, { 1, 52, 46},
+ },
+ },
+ },
+ },
+ { // TX_SIZE 2
+ { // Y plane
+ { // Intra
+ { // band 0
+ { 58, 38, 99}, { 9, 26, 51}, { 1, 14, 22},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 14, 78, 109}, { 16, 73, 105}, { 11, 62, 92},
+ { 6, 47, 72}, { 2, 29, 45}, { 1, 12, 18},
+ },
+ { // band 2
+ { 17, 131, 148}, { 11, 112, 140}, { 5, 87, 118},
+ { 2, 63, 90}, { 1, 42, 63}, { 1, 19, 31},
+ },
+ { // band 3
+ { 12, 151, 168}, { 6, 116, 152}, { 1, 76, 115},
+ { 1, 50, 81}, { 1, 32, 52}, { 1, 14, 23},
+ },
+ { // band 4
+ { 10, 174, 191}, { 3, 130, 172}, { 1, 80, 126},
+ { 1, 53, 88}, { 1, 32, 55}, { 1, 14, 24},
+ },
+ { // band 5
+ { 19, 219, 237}, { 3, 168, 211}, { 1, 90, 142},
+ { 1, 53, 91}, { 1, 29, 51}, { 1, 12, 21},
+ },
+ },
+ { // Intra
+ { // band 0
+ { 21, 46, 184}, { 10, 53, 130}, { 2, 49, 78},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 3, 169, 198}, { 37, 165, 196}, { 26, 134, 176},
+ { 11, 108, 149}, { 5, 81, 112}, { 3, 47, 64},
+ },
+ { // band 2
+ { 11, 183, 215}, { 8, 142, 192}, { 2, 91, 141},
+ { 1, 62, 100}, { 1, 38, 62}, { 1, 17, 28},
+ },
+ { // band 3
+ { 12, 190, 223}, { 6, 149, 199}, { 1, 88, 139},
+ { 1, 56, 93}, { 1, 31, 54}, { 1, 13, 21},
+ },
+ { // band 4
+ { 11, 197, 230}, { 3, 154, 204}, { 1, 83, 134},
+ { 1, 50, 86}, { 1, 28, 49}, { 1, 12, 21},
+ },
+ { // band 5
+ { 17, 211, 240}, { 2, 167, 217}, { 1, 88, 143},
+ { 1, 53, 91}, { 1, 30, 53}, { 1, 14, 24},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {151, 30, 151}, { 50, 36, 105}, { 8, 34, 66},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 39, 111, 160}, { 62, 111, 165}, { 37, 99, 147},
+ { 15, 77, 118}, { 3, 47, 73}, { 1, 17, 27},
+ },
+ { // band 2
+ { 48, 170, 190}, { 32, 135, 180}, { 11, 100, 149},
+ { 4, 76, 116}, { 1, 51, 80}, { 1, 22, 36},
+ },
+ { // band 3
+ { 39, 191, 208}, { 18, 141, 191}, { 3, 96, 150},
+ { 1, 66, 110}, { 1, 41, 69}, { 1, 17, 28},
+ },
+ { // band 4
+ { 32, 209, 219}, { 8, 152, 201}, { 1, 96, 153},
+ { 1, 63, 106}, { 1, 38, 66}, { 1, 17, 29},
+ },
+ { // band 5
+ { 33, 230, 237}, { 5, 173, 214}, { 1, 100, 155},
+ { 1, 62, 105}, { 1, 38, 66}, { 1, 18, 32},
+ },
+ },
+ { // Inter
+ { // band 0
+ {149, 38, 231}, { 59, 51, 186}, { 12, 54, 117},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 53, 179, 226}, {126, 176, 223}, { 58, 147, 202},
+ { 28, 118, 174}, { 15, 94, 138}, { 14, 63, 87},
+ },
+ { // band 2
+ { 58, 196, 232}, { 26, 158, 213}, { 5, 106, 166},
+ { 1, 75, 124}, { 1, 46, 79}, { 1, 23, 39},
+ },
+ { // band 3
+ { 46, 203, 235}, { 17, 162, 213}, { 2, 104, 165},
+ { 1, 72, 120}, { 1, 44, 74}, { 1, 22, 33},
+ },
+ { // band 4
+ { 37, 213, 238}, { 8, 167, 216}, { 1, 104, 168},
+ { 1, 68, 119}, { 1, 40, 67}, { 1, 17, 29},
+ },
+ { // band 5
+ { 30, 228, 239}, { 4, 181, 213}, { 1, 103, 153},
+ { 1, 65, 110}, { 1, 43, 79}, { 1, 27, 56},
+ },
+ },
+ },
+ },
+ { // TX_SIZE 3
+ { // Y plane
+ { // Intra
+ { // band 0
+ { 76, 25, 53}, { 9, 18, 32}, { 1, 12, 18},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 29, 55, 91}, { 19, 58, 95}, { 15, 57, 89},
+ { 12, 49, 77}, { 3, 29, 44}, { 1, 8, 12},
+ },
+ { // band 2
+ { 32, 160, 148}, { 33, 143, 146}, { 19, 122, 132},
+ { 6, 90, 102}, { 1, 58, 70}, { 1, 17, 24},
+ },
+ { // band 3
+ { 16, 181, 181}, { 6, 142, 165}, { 1, 90, 120},
+ { 1, 50, 71}, { 1, 25, 38}, { 1, 9, 14},
+ },
+ { // band 4
+ { 13, 203, 203}, { 3, 154, 176}, { 1, 80, 108},
+ { 1, 41, 61}, { 1, 24, 37}, { 1, 11, 17},
+ },
+ { // band 5
+ { 6, 234, 240}, { 1, 178, 204}, { 1, 80, 119},
+ { 1, 45, 71}, { 1, 26, 42}, { 1, 12, 19},
+ },
+ },
+ { // Intra
+ { // band 0
+ { 78, 20, 135}, { 25, 18, 101}, { 5, 19, 57},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 7, 144, 183}, {117, 151, 195}, {109, 151, 187},
+ { 39, 130, 168}, { 11, 100, 125}, { 4, 59, 64},
+ },
+ { // band 2
+ { 20, 184, 212}, { 12, 148, 191}, { 2, 98, 141},
+ { 1, 65, 100}, { 1, 39, 61}, { 1, 14, 22},
+ },
+ { // band 3
+ { 15, 194, 222}, { 6, 153, 198}, { 1, 92, 138},
+ { 1, 58, 91}, { 1, 32, 52}, { 1, 12, 18},
+ },
+ { // band 4
+ { 14, 206, 232}, { 3, 162, 206}, { 1, 89, 134},
+ { 1, 52, 83}, { 1, 28, 46}, { 1, 11, 17},
+ },
+ { // band 5
+ { 6, 225, 241}, { 1, 175, 210}, { 1, 81, 125},
+ { 1, 48, 78}, { 1, 28, 46}, { 1, 13, 21},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {124, 23, 93}, { 31, 24, 63}, { 6, 24, 46},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 23, 86, 126}, { 45, 90, 145}, { 31, 91, 133},
+ { 19, 80, 114}, { 7, 53, 72}, { 1, 20, 27},
+ },
+ { // band 2
+ { 51, 186, 189}, { 48, 159, 182}, { 33, 128, 156},
+ { 15, 92, 124}, { 2, 62, 83}, { 1, 29, 43},
+ },
+ { // band 3
+ { 36, 198, 211}, { 15, 156, 187}, { 3, 97, 137},
+ { 1, 61, 93}, { 1, 35, 57}, { 1, 15, 23},
+ },
+ { // band 4
+ { 34, 219, 223}, { 9, 162, 193}, { 1, 91, 136},
+ { 1, 58, 92}, { 1, 35, 54}, { 1, 14, 23},
+ },
+ { // band 5
+ { 19, 243, 243}, { 3, 191, 208}, { 1, 91, 137},
+ { 1, 56, 90}, { 1, 34, 55}, { 1, 16, 24},
+ },
+ },
+ { // Inter
+ { // band 0
+ {119, 20, 197}, { 19, 29, 156}, { 3, 30, 107},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 24, 192, 226}, {161, 193, 227}, { 97, 185, 222},
+ { 31, 158, 204}, { 16, 122, 165}, { 17, 84, 112},
+ },
+ { // band 2
+ { 26, 202, 229}, { 11, 165, 210}, { 2, 103, 152},
+ { 1, 68, 104}, { 1, 42, 70}, { 1, 16, 36},
+ },
+ { // band 3
+ { 24, 209, 237}, { 6, 169, 214}, { 1, 102, 154},
+ { 1, 65, 107}, { 1, 45, 68}, { 1, 17, 24},
+ },
+ { // band 4
+ { 19, 219, 243}, { 4, 183, 226}, { 1, 115, 172},
+ { 1, 73, 119}, { 1, 43, 77}, { 1, 15, 37},
+ },
+ { // band 5
+ { 11, 237, 241}, { 2, 190, 216}, { 1, 108, 146},
+ { 1, 59, 94}, { 1, 40, 67}, { 1, 30, 53},
+ },
+ },
+ },
+ },
+ },
+ { // Q_Index 1
+ { // TX_SIZE 0
+ { // Y plane
+ { // Intra
+ { // band 0
+ {174, 30, 159}, { 76, 38, 115}, { 15, 33, 65},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 60, 80, 153}, { 72, 75, 147}, { 36, 68, 129},
+ { 15, 59, 104}, { 4, 45, 74}, { 1, 28, 45},
+ },
+ { // band 2
+ { 70, 122, 186}, { 55, 104, 175}, { 21, 83, 144},
+ { 8, 67, 112}, { 2, 51, 82}, { 1, 34, 57},
+ },
+ { // band 3
+ { 97, 144, 207}, { 52, 109, 195}, { 16, 77, 153},
+ { 4, 58, 113}, { 1, 43, 77}, { 1, 27, 48},
+ },
+ { // band 4
+ {128, 148, 229}, { 76, 104, 210}, { 18, 77, 159},
+ { 4, 65, 110}, { 1, 52, 82}, { 1, 31, 55},
+ },
+ { // band 5
+ {165, 51, 238}, {128, 50, 230}, { 57, 49, 185},
+ { 28, 47, 130}, { 12, 44, 96}, { 3, 36, 60},
+ },
+ },
+ { // Intra
+ { // band 0
+ {169, 103, 203}, {117, 96, 176}, { 56, 81, 137},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 31, 150, 224}, { 49, 128, 212}, { 19, 92, 165},
+ { 6, 67, 116}, { 2, 43, 71}, { 1, 21, 36},
+ },
+ { // band 2
+ { 58, 156, 230}, { 47, 130, 215}, { 7, 87, 158},
+ { 2, 63, 114}, { 1, 39, 71}, { 1, 18, 36},
+ },
+ { // band 3
+ { 85, 176, 240}, { 43, 138, 226}, { 8, 93, 172},
+ { 2, 70, 127}, { 1, 46, 81}, { 1, 26, 47},
+ },
+ { // band 4
+ {155, 144, 248}, { 93, 116, 235}, { 21, 83, 180},
+ { 4, 59, 119}, { 1, 43, 80}, { 1, 25, 50},
+ },
+ { // band 5
+ {203, 61, 250}, {171, 57, 243}, { 71, 57, 199},
+ { 31, 49, 144}, { 13, 42, 96}, { 7, 30, 52},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {204, 44, 204}, {137, 57, 184}, { 72, 62, 152},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {145, 117, 236}, {151, 112, 231}, { 87, 95, 208},
+ { 31, 77, 165}, { 5, 49, 98}, { 1, 24, 39},
+ },
+ { // band 2
+ {146, 152, 241}, {140, 132, 236}, { 41, 103, 209},
+ { 10, 86, 165}, { 2, 55, 106}, { 1, 25, 58},
+ },
+ { // band 3
+ {154, 181, 249}, { 84, 143, 240}, { 23, 114, 210},
+ { 6, 102, 182}, { 2, 71, 137}, { 1, 35, 90},
+ },
+ { // band 4
+ {184, 150, 251}, {115, 130, 244}, { 34, 105, 215},
+ { 15, 89, 173}, { 1, 51, 141}, {128, 128, 128},
+ },
+ { // band 5
+ {211, 71, 253}, {193, 78, 249}, {106, 91, 232},
+ { 61, 87, 198}, { 85, 153, 254}, {128, 128, 128},
+ },
+ },
+ { // Inter
+ { // band 0
+ {232, 104, 242}, {165, 114, 227}, { 96, 120, 206},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {137, 178, 250}, {146, 153, 245}, { 74, 108, 205},
+ { 41, 81, 149}, { 24, 55, 104}, { 13, 36, 68},
+ },
+ { // band 2
+ {147, 185, 252}, {127, 161, 246}, { 30, 104, 208},
+ { 11, 74, 154}, { 6, 54, 100}, { 2, 29, 63},
+ },
+ { // band 3
+ {163, 191, 254}, {101, 161, 249}, { 22, 114, 215},
+ { 6, 89, 173}, { 1, 65, 120}, { 1, 1, 170},
+ },
+ { // band 4
+ {197, 160, 254}, {142, 141, 251}, { 39, 102, 218},
+ { 10, 76, 158}, { 1, 56, 122}, {128, 128, 128},
+ },
+ { // band 5
+ {224, 76, 254}, {215, 84, 253}, {107, 85, 232},
+ { 43, 71, 177}, { 1, 1, 254}, {128, 128, 128},
+ },
+ },
+ },
+ },
+ { // TX_SIZE 1
+ { // Y plane
+ { // Intra
+ { // band 0
+ { 68, 37, 120}, { 21, 34, 82}, { 5, 26, 49},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 41, 89, 138}, { 56, 83, 132}, { 31, 73, 115},
+ { 16, 62, 92}, { 5, 45, 62}, { 1, 24, 32},
+ },
+ { // band 2
+ { 48, 139, 165}, { 30, 114, 160}, { 13, 92, 132},
+ { 6, 72, 103}, { 3, 49, 72}, { 1, 26, 41},
+ },
+ { // band 3
+ { 44, 162, 191}, { 20, 127, 175}, { 5, 90, 137},
+ { 1, 62, 100}, { 1, 38, 63}, { 1, 20, 32},
+ },
+ { // band 4
+ { 51, 184, 213}, { 16, 137, 193}, { 2, 89, 143},
+ { 1, 60, 102}, { 1, 39, 66}, { 1, 23, 37},
+ },
+ { // band 5
+ { 76, 200, 235}, { 27, 150, 216}, { 3, 99, 164},
+ { 1, 70, 119}, { 1, 45, 77}, { 1, 22, 38},
+ },
+ },
+ { // Intra
+ { // band 0
+ { 81, 112, 199}, { 49, 101, 164}, { 19, 80, 119},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 12, 181, 217}, { 48, 151, 212}, { 38, 118, 180},
+ { 22, 95, 140}, { 11, 67, 92}, { 13, 46, 44},
+ },
+ { // band 2
+ { 29, 188, 226}, { 19, 147, 210}, { 5, 95, 154},
+ { 4, 68, 106}, { 3, 44, 60}, { 1, 24, 27},
+ },
+ { // band 3
+ { 30, 195, 234}, { 15, 153, 216}, { 3, 95, 156},
+ { 2, 66, 108}, { 2, 44, 62}, { 1, 24, 29},
+ },
+ { // band 4
+ { 36, 203, 243}, { 12, 162, 225}, { 2, 98, 163},
+ { 2, 67, 113}, { 2, 45, 68}, { 1, 24, 34},
+ },
+ { // band 5
+ { 86, 207, 248}, { 35, 165, 236}, { 3, 107, 180},
+ { 1, 73, 128}, { 1, 45, 78}, { 1, 20, 34},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {188, 37, 205}, {118, 51, 172}, { 56, 57, 135},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {116, 135, 225}, {144, 123, 221}, { 72, 103, 197},
+ { 35, 77, 153}, { 15, 47, 82}, { 6, 25, 34},
+ },
+ { // band 2
+ {128, 171, 233}, { 82, 142, 226}, { 31, 106, 191},
+ { 16, 82, 146}, { 9, 59, 98}, { 4, 33, 54},
+ },
+ { // band 3
+ {126, 197, 241}, { 66, 155, 230}, { 18, 108, 190},
+ { 7, 82, 148}, { 3, 58, 98}, { 1, 25, 50},
+ },
+ { // band 4
+ {117, 207, 244}, { 44, 163, 233}, { 9, 112, 191},
+ { 5, 84, 148}, { 3, 61, 87}, { 1, 28, 38},
+ },
+ { // band 5
+ {112, 214, 249}, { 39, 174, 240}, { 6, 125, 205},
+ { 4, 96, 163}, { 5, 66, 100}, { 1, 128, 254},
+ },
+ },
+ { // Inter
+ { // band 0
+ {227, 70, 234}, {145, 91, 213}, { 61, 100, 173},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {108, 198, 243}, {171, 172, 240}, {118, 130, 210},
+ {104, 107, 165}, { 64, 85, 114}, { 55, 64, 60},
+ },
+ { // band 2
+ {110, 208, 247}, { 64, 175, 237}, { 24, 112, 187},
+ { 24, 81, 133}, { 24, 63, 83}, { 21, 47, 53},
+ },
+ { // band 3
+ { 91, 218, 249}, { 46, 188, 238}, { 8, 113, 184},
+ { 5, 83, 137}, { 6, 62, 95}, { 17, 44, 94},
+ },
+ { // band 4
+ { 84, 216, 248}, { 30, 187, 237}, { 2, 117, 188},
+ { 1, 88, 141}, { 3, 63, 98}, { 1, 1, 1},
+ },
+ { // band 5
+ {116, 218, 252}, { 47, 186, 242}, { 2, 132, 204},
+ { 1, 106, 175}, { 1, 88, 104}, { 1, 254, 128},
+ },
+ },
+ },
+ },
+ { // TX_SIZE 2
+ { // Y plane
+ { // Intra
+ { // band 0
+ { 35, 41, 129}, { 12, 30, 70}, { 2, 19, 32},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 30, 77, 116}, { 39, 70, 110}, { 20, 58, 96},
+ { 8, 47, 77}, { 2, 33, 52}, { 1, 17, 26},
+ },
+ { // band 2
+ { 31, 123, 146}, { 18, 103, 140}, { 7, 81, 119},
+ { 2, 62, 95}, { 1, 44, 70}, { 1, 26, 42},
+ },
+ { // band 3
+ { 21, 149, 170}, { 9, 114, 158}, { 2, 80, 126},
+ { 1, 57, 94}, { 1, 36, 61}, { 1, 18, 31},
+ },
+ { // band 4
+ { 20, 178, 199}, { 6, 134, 183}, { 1, 87, 139},
+ { 1, 60, 100}, { 1, 37, 64}, { 1, 18, 31},
+ },
+ { // band 5
+ { 36, 218, 233}, { 6, 160, 207}, { 1, 92, 147},
+ { 1, 59, 101}, { 1, 35, 62}, { 1, 18, 31},
+ },
+ },
+ { // Intra
+ { // band 0
+ { 17, 62, 211}, { 14, 62, 153}, { 5, 50, 84},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 11, 180, 205}, { 87, 160, 205}, { 53, 128, 184},
+ { 27, 106, 156}, { 13, 79, 115}, { 6, 46, 67},
+ },
+ { // band 2
+ { 32, 194, 220}, { 20, 145, 202}, { 4, 96, 152},
+ { 1, 67, 111}, { 1, 42, 70}, { 1, 21, 37},
+ },
+ { // band 3
+ { 30, 204, 228}, { 14, 152, 207}, { 1, 92, 149},
+ { 1, 61, 103}, { 1, 34, 59}, { 1, 16, 28},
+ },
+ { // band 4
+ { 27, 213, 235}, { 7, 159, 210}, { 1, 88, 143},
+ { 1, 55, 94}, { 1, 31, 53}, { 1, 16, 27},
+ },
+ { // band 5
+ { 28, 223, 243}, { 4, 173, 217}, { 1, 91, 146},
+ { 1, 58, 98}, { 1, 35, 60}, { 1, 19, 33},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {172, 37, 202}, { 83, 51, 156}, { 24, 53, 110},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 76, 134, 206}, {110, 124, 200}, { 47, 106, 180},
+ { 15, 82, 145}, { 3, 48, 83}, { 1, 19, 32},
+ },
+ { // band 2
+ { 80, 176, 220}, { 49, 145, 212}, { 17, 112, 180},
+ { 7, 84, 140}, { 1, 53, 89}, { 1, 27, 43},
+ },
+ { // band 3
+ { 74, 201, 232}, { 38, 158, 221}, { 8, 112, 179},
+ { 2, 79, 132}, { 1, 47, 82}, { 1, 26, 42},
+ },
+ { // band 4
+ { 73, 215, 239}, { 28, 169, 227}, { 3, 112, 176},
+ { 1, 74, 126}, { 1, 48, 79}, { 1, 27, 44},
+ },
+ { // band 5
+ { 71, 233, 244}, { 18, 180, 230}, { 1, 114, 180},
+ { 1, 80, 134}, { 1, 51, 85}, { 1, 26, 36},
+ },
+ },
+ { // Inter
+ { // band 0
+ {213, 34, 244}, {126, 57, 212}, { 46, 67, 151},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {120, 202, 245}, {198, 173, 241}, {119, 146, 224},
+ { 76, 126, 195}, { 44, 102, 159}, { 40, 76, 115},
+ },
+ { // band 2
+ {120, 215, 248}, { 69, 171, 237}, { 23, 119, 194},
+ { 10, 86, 147}, { 2, 56, 94}, { 1, 25, 44},
+ },
+ { // band 3
+ {102, 226, 250}, { 53, 183, 239}, { 9, 118, 188},
+ { 2, 78, 131}, { 1, 48, 89}, { 1, 17, 36},
+ },
+ { // band 4
+ { 86, 235, 252}, { 34, 194, 240}, { 2, 109, 173},
+ { 1, 68, 118}, { 1, 44, 79}, { 1, 1, 38},
+ },
+ { // band 5
+ { 59, 236, 243}, { 11, 189, 228}, { 1, 112, 187},
+ { 1, 88, 145}, { 1, 55, 92}, { 1, 1, 128},
+ },
+ },
+ },
+ },
+ { // TX_SIZE 3
+ { // Y plane
+ { // Intra
+ { // band 0
+ { 41, 40, 104}, { 12, 31, 64}, { 2, 16, 28},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 65, 58, 132}, { 50, 61, 130}, { 40, 57, 116},
+ { 22, 46, 87}, { 2, 28, 44}, { 1, 11, 17},
+ },
+ { // band 2
+ { 55, 139, 135}, { 46, 122, 132}, { 21, 89, 110},
+ { 6, 60, 78}, { 1, 38, 54}, { 1, 17, 27},
+ },
+ { // band 3
+ { 29, 167, 161}, { 10, 120, 141}, { 1, 69, 98},
+ { 1, 42, 66}, { 1, 28, 44}, { 1, 15, 24},
+ },
+ { // band 4
+ { 19, 191, 180}, { 4, 125, 154}, { 1, 70, 107},
+ { 1, 48, 77}, { 1, 33, 53}, { 1, 17, 28},
+ },
+ { // band 5
+ { 16, 238, 231}, { 2, 163, 198}, { 1, 85, 134},
+ { 1, 54, 90}, { 1, 34, 57}, { 1, 17, 29},
+ },
+ },
+ { // Intra
+ { // band 0
+ { 70, 15, 216}, { 40, 18, 164}, { 14, 17, 83},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 25, 150, 200}, {185, 154, 211}, {123, 137, 199},
+ { 67, 119, 177}, { 31, 96, 137}, { 18, 63, 86},
+ },
+ { // band 2
+ { 57, 187, 223}, { 35, 148, 207}, { 7, 104, 159},
+ { 2, 72, 113}, { 1, 44, 71}, { 1, 20, 34},
+ },
+ { // band 3
+ { 44, 203, 233}, { 18, 157, 212}, { 1, 98, 150},
+ { 1, 61, 102}, { 1, 38, 62}, { 1, 19, 31},
+ },
+ { // band 4
+ { 41, 215, 238}, { 11, 166, 215}, { 1, 94, 146},
+ { 1, 60, 101}, { 1, 37, 63}, { 1, 17, 28},
+ },
+ { // band 5
+ { 19, 236, 246}, { 3, 188, 223}, { 1, 95, 146},
+ { 1, 58, 95}, { 1, 34, 56}, { 1, 17, 27},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {146, 27, 156}, { 49, 32, 116}, { 10, 39, 77},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 47, 101, 172}, { 93, 100, 178}, { 58, 91, 165},
+ { 26, 75, 134}, { 4, 49, 82}, { 2, 22, 33},
+ },
+ { // band 2
+ { 60, 158, 196}, { 44, 135, 186}, { 25, 106, 157},
+ { 8, 81, 124}, { 2, 56, 86}, { 1, 28, 45},
+ },
+ { // band 3
+ { 44, 169, 212}, { 15, 138, 196}, { 2, 100, 157},
+ { 1, 74, 119}, { 1, 49, 76}, { 1, 20, 34},
+ },
+ { // band 4
+ { 38, 199, 231}, { 11, 158, 214}, { 1, 111, 167},
+ { 1, 76, 122}, { 1, 44, 76}, { 1, 17, 39},
+ },
+ { // band 5
+ { 40, 236, 246}, { 10, 187, 230}, { 1, 115, 175},
+ { 1, 74, 122}, { 1, 42, 71}, { 1, 14, 59},
+ },
+ },
+ { // Inter
+ { // band 0
+ {161, 26, 237}, { 65, 46, 209}, { 21, 46, 161},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 87, 229, 245}, {206, 214, 244}, {148, 186, 236},
+ { 89, 165, 221}, { 41, 132, 186}, { 37, 93, 141},
+ },
+ { // band 2
+ { 93, 231, 246}, { 47, 181, 231}, { 8, 117, 188},
+ { 2, 84, 138}, { 1, 43, 87}, { 1, 27, 41},
+ },
+ { // band 3
+ { 80, 239, 250}, { 28, 190, 236}, { 1, 119, 183},
+ { 1, 84, 135}, { 1, 81, 69}, { 1, 102, 1},
+ },
+ { // band 4
+ { 67, 245, 252}, { 22, 206, 242}, { 1, 130, 195},
+ { 1, 77, 136}, { 1, 35, 88}, {128, 128, 128},
+ },
+ { // band 5
+ { 43, 250, 228}, { 31, 185, 204}, { 6, 101, 183},
+ { 1, 92, 151}, { 1, 84, 137}, {128, 128, 128},
+ },
+ },
+ },
+ },
+ },
+ { // Q_Index 2
+ { // TX_SIZE 0
+ { // Y plane
+ { // Intra
+ { // band 0
+ {181, 22, 175}, { 96, 37, 147}, { 35, 41, 105},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 80, 95, 197}, {111, 92, 193}, { 59, 87, 175},
+ { 29, 79, 150}, { 10, 65, 118}, { 2, 47, 82},
+ },
+ { // band 2
+ { 90, 141, 216}, { 77, 120, 210}, { 23, 95, 184},
+ { 11, 81, 151}, { 6, 75, 130}, { 2, 58, 113},
+ },
+ { // band 3
+ {122, 167, 231}, { 66, 119, 225}, { 26, 87, 189},
+ { 7, 76, 151}, { 2, 63, 125}, { 1, 59, 77},
+ },
+ { // band 4
+ {162, 147, 244}, {110, 97, 236}, { 32, 88, 204},
+ { 11, 89, 174}, { 5, 78, 151}, {128, 128, 128},
+ },
+ { // band 5
+ {205, 59, 251}, {176, 68, 248}, { 90, 71, 223},
+ { 49, 72, 188}, { 17, 74, 203}, {128, 128, 128},
+ },
+ },
+ { // Intra
+ { // band 0
+ {188, 70, 207}, {140, 73, 189}, { 85, 73, 163},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 59, 144, 239}, { 79, 126, 237}, { 31, 102, 202},
+ { 10, 81, 153}, { 3, 56, 102}, { 2, 33, 59},
+ },
+ { // band 2
+ {100, 152, 243}, { 80, 129, 236}, { 14, 94, 194},
+ { 4, 72, 150}, { 1, 50, 103}, { 1, 35, 60},
+ },
+ { // band 3
+ {130, 183, 247}, { 70, 139, 242}, { 19, 100, 203},
+ { 4, 83, 159}, { 1, 59, 119}, { 1, 44, 72},
+ },
+ { // band 4
+ {197, 138, 252}, {135, 107, 247}, { 31, 86, 210},
+ { 7, 74, 160}, { 1, 53, 107}, {128, 128, 128},
+ },
+ { // band 5
+ {229, 54, 254}, {200, 51, 251}, { 83, 61, 226},
+ { 33, 55, 177}, { 12, 74, 145}, {128, 128, 128},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {229, 20, 235}, {183, 37, 221}, {127, 47, 198},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {188, 115, 251}, {208, 110, 250}, {101, 99, 235},
+ { 38, 81, 197}, { 9, 56, 132}, { 9, 52, 63},
+ },
+ { // band 2
+ {189, 150, 252}, {186, 137, 251}, { 54, 107, 236},
+ { 14, 90, 195}, { 1, 89, 104}, {128, 128, 128},
+ },
+ { // band 3
+ {209, 180, 254}, {142, 145, 253}, { 51, 130, 236},
+ { 6, 128, 214}, { 1, 128, 254}, {128, 128, 128},
+ },
+ { // band 4
+ {231, 140, 254}, {194, 128, 254}, { 75, 119, 233},
+ {128, 23, 230}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 5
+ {244, 59, 254}, {239, 81, 254}, {128, 85, 254},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ { // Inter
+ { // band 0
+ {246, 55, 247}, {197, 64, 235}, {141, 74, 218},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {178, 163, 254}, {192, 138, 252}, { 85, 103, 231},
+ { 49, 81, 179}, { 32, 54, 133}, { 12, 26, 98},
+ },
+ { // band 2
+ {189, 173, 254}, {179, 150, 253}, { 60, 94, 237},
+ { 34, 81, 198}, { 20, 53, 187}, {128, 128, 128},
+ },
+ { // band 3
+ {202, 191, 254}, {157, 160, 254}, { 57, 117, 240},
+ { 28, 105, 211}, { 1, 128, 1}, {128, 128, 128},
+ },
+ { // band 4
+ {231, 146, 254}, {208, 133, 254}, { 66, 78, 233},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 5
+ {246, 49, 254}, {246, 63, 254}, { 85, 142, 254},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ },
+ },
+ { // TX_SIZE 1
+ { // Y plane
+ { // Intra
+ { // band 0
+ { 45, 28, 124}, { 23, 35, 107}, { 10, 34, 78},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 53, 99, 177}, { 82, 96, 174}, { 46, 89, 158},
+ { 21, 76, 133}, { 6, 56, 94}, { 1, 33, 54},
+ },
+ { // band 2
+ { 68, 147, 201}, { 42, 124, 195}, { 17, 98, 166},
+ { 7, 75, 131}, { 2, 53, 93}, { 1, 33, 59},
+ },
+ { // band 3
+ { 65, 176, 217}, { 30, 137, 206}, { 6, 97, 167},
+ { 2, 70, 128}, { 1, 47, 88}, { 1, 29, 46},
+ },
+ { // band 4
+ { 69, 195, 232}, { 24, 146, 218}, { 4, 100, 175},
+ { 2, 72, 134}, { 1, 51, 93}, { 1, 29, 52},
+ },
+ { // band 5
+ { 96, 212, 246}, { 39, 158, 234}, { 6, 109, 192},
+ { 2, 77, 144}, { 1, 50, 95}, { 1, 20, 45},
+ },
+ },
+ { // Intra
+ { // band 0
+ { 71, 80, 213}, { 53, 73, 181}, { 25, 66, 141},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 35, 168, 231}, { 91, 150, 229}, { 49, 122, 202},
+ { 22, 97, 162}, { 10, 68, 108}, { 9, 48, 57},
+ },
+ { // band 2
+ { 56, 178, 236}, { 32, 148, 225}, { 9, 99, 176},
+ { 4, 69, 127}, { 2, 44, 78}, { 1, 25, 41},
+ },
+ { // band 3
+ { 57, 191, 242}, { 27, 155, 230}, { 5, 102, 180},
+ { 2, 71, 133}, { 1, 44, 78}, { 1, 27, 41},
+ },
+ { // band 4
+ { 67, 201, 247}, { 24, 162, 237}, { 3, 106, 188},
+ { 3, 74, 137}, { 1, 46, 85}, { 1, 34, 48},
+ },
+ { // band 5
+ {111, 210, 251}, { 47, 166, 244}, { 3, 113, 199},
+ { 2, 77, 146}, { 1, 48, 93}, { 1, 38, 22},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {206, 21, 221}, {150, 36, 195}, { 94, 44, 164},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {147, 128, 239}, {194, 122, 238}, { 95, 104, 220},
+ { 39, 81, 183}, { 13, 53, 111}, { 3, 24, 49},
+ },
+ { // band 2
+ {164, 163, 244}, {106, 142, 239}, { 50, 112, 215},
+ { 26, 90, 177}, { 12, 67, 130}, { 1, 1, 64},
+ },
+ { // band 3
+ {155, 193, 249}, { 88, 158, 244}, { 26, 124, 220},
+ { 10, 98, 173}, { 1, 77, 126}, {128, 128, 128},
+ },
+ { // band 4
+ {141, 205, 252}, { 64, 174, 248}, { 17, 124, 221},
+ { 12, 92, 176}, { 1, 29, 148}, {128, 128, 128},
+ },
+ { // band 5
+ {150, 217, 254}, { 74, 191, 252}, { 30, 144, 215},
+ { 1, 106, 137}, {128, 1, 128}, {128, 128, 128},
+ },
+ },
+ { // Inter
+ { // band 0
+ {241, 37, 242}, {175, 48, 223}, { 99, 53, 189},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {153, 183, 248}, {212, 156, 247}, {134, 124, 221},
+ { 88, 103, 184}, { 59, 86, 132}, { 29, 61, 67},
+ },
+ { // band 2
+ {162, 199, 250}, {106, 167, 247}, { 56, 110, 207},
+ { 32, 85, 165}, { 16, 71, 130}, { 1, 93, 254},
+ },
+ { // band 3
+ {143, 213, 252}, { 86, 187, 250}, { 23, 124, 220},
+ { 7, 95, 176}, { 1, 109, 102}, {128, 128, 128},
+ },
+ { // band 4
+ {130, 219, 254}, { 70, 201, 253}, { 15, 128, 215},
+ { 1, 101, 201}, { 1, 64, 170}, {128, 128, 128},
+ },
+ { // band 5
+ {155, 219, 254}, {105, 207, 254}, { 28, 155, 229},
+ { 1, 153, 191}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ },
+ },
+ { // TX_SIZE 2
+ { // Y plane
+ { // Intra
+ { // band 0
+ { 18, 26, 117}, { 10, 29, 82}, { 3, 25, 52},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 35, 88, 152}, { 62, 85, 150}, { 36, 77, 137},
+ { 16, 66, 116}, { 4, 47, 81}, { 1, 26, 44},
+ },
+ { // band 2
+ { 55, 141, 182}, { 32, 119, 177}, { 12, 93, 154},
+ { 4, 71, 123}, { 1, 51, 89}, { 1, 32, 56},
+ },
+ { // band 3
+ { 46, 171, 202}, { 21, 130, 191}, { 5, 91, 154},
+ { 1, 64, 115}, { 1, 42, 77}, { 1, 25, 41},
+ },
+ { // band 4
+ { 43, 195, 219}, { 12, 142, 203}, { 1, 91, 156},
+ { 1, 63, 115}, { 1, 41, 77}, { 1, 22, 43},
+ },
+ { // band 5
+ { 42, 221, 238}, { 8, 162, 219}, { 1, 98, 167},
+ { 1, 67, 123}, { 1, 43, 83}, { 1, 25, 38},
+ },
+ },
+ { // Intra
+ { // band 0
+ { 16, 51, 216}, { 20, 48, 168}, { 9, 44, 109},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 34, 164, 226}, {124, 148, 226}, { 72, 127, 207},
+ { 36, 107, 175}, { 15, 81, 129}, { 6, 51, 79},
+ },
+ { // band 2
+ { 61, 182, 234}, { 35, 148, 220}, { 9, 101, 178},
+ { 4, 71, 134}, { 1, 46, 90}, { 1, 24, 51},
+ },
+ { // band 3
+ { 54, 198, 239}, { 25, 156, 224}, { 3, 98, 173},
+ { 1, 66, 124}, { 1, 41, 78}, { 1, 15, 37},
+ },
+ { // band 4
+ { 48, 209, 242}, { 12, 162, 226}, { 1, 96, 169},
+ { 1, 63, 119}, { 1, 40, 78}, { 1, 18, 45},
+ },
+ { // band 5
+ { 44, 223, 247}, { 6, 173, 232}, { 1, 105, 178},
+ { 1, 71, 131}, { 1, 44, 84}, { 1, 13, 46},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {188, 26, 214}, {121, 42, 181}, { 66, 49, 149},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {136, 128, 233}, {172, 124, 230}, { 80, 106, 211},
+ { 27, 81, 174}, { 6, 49, 98}, { 8, 28, 49},
+ },
+ { // band 2
+ {145, 166, 239}, { 92, 141, 229}, { 28, 108, 196},
+ { 8, 87, 154}, { 1, 58, 105}, { 1, 27, 59},
+ },
+ { // band 3
+ {131, 193, 242}, { 66, 151, 231}, { 13, 112, 192},
+ { 2, 81, 152}, { 1, 66, 121}, { 1, 23, 64},
+ },
+ { // band 4
+ {112, 211, 246}, { 41, 164, 235}, { 5, 117, 202},
+ { 1, 83, 162}, { 1, 64, 111}, {128, 128, 128},
+ },
+ { // band 5
+ { 96, 230, 250}, { 28, 185, 243}, { 2, 132, 204},
+ { 1, 91, 166}, { 1, 85, 46}, {128, 128, 128},
+ },
+ },
+ { // Inter
+ { // band 0
+ {238, 23, 242}, {157, 29, 215}, { 73, 27, 162},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {165, 173, 250}, {222, 151, 247}, {152, 134, 235},
+ {114, 120, 210}, { 86, 109, 176}, { 53, 88, 145},
+ },
+ { // band 2
+ {164, 194, 249}, {100, 158, 241}, { 35, 111, 212},
+ { 17, 85, 167}, { 1, 52, 112}, { 1, 73, 1},
+ },
+ { // band 3
+ {151, 215, 252}, { 83, 172, 245}, { 16, 122, 208},
+ { 6, 101, 165}, { 1, 74, 113}, { 1, 1, 1},
+ },
+ { // band 4
+ {138, 230, 253}, { 65, 184, 248}, { 8, 128, 212},
+ { 1, 111, 182}, {128, 1, 1}, {128, 128, 128},
+ },
+ { // band 5
+ {123, 240, 253}, { 36, 201, 250}, { 3, 127, 211},
+ { 1, 68, 204}, {128, 1, 1}, {128, 128, 128},
+ },
+ },
+ },
+ },
+ { // TX_SIZE 3
+ { // Y plane
+ { // Intra
+ { // band 0
+ { 51, 21, 156}, { 30, 23, 86}, { 4, 18, 37},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 38, 77, 129}, { 79, 76, 129}, { 40, 66, 117},
+ { 12, 54, 95}, { 1, 36, 60}, { 1, 17, 29},
+ },
+ { // band 2
+ { 44, 133, 149}, { 24, 107, 143}, { 8, 78, 121},
+ { 3, 59, 97}, { 1, 42, 71}, { 1, 22, 37},
+ },
+ { // band 3
+ { 29, 160, 171}, { 9, 114, 158}, { 1, 76, 125},
+ { 1, 54, 93}, { 1, 36, 63}, { 1, 20, 35},
+ },
+ { // band 4
+ { 22, 188, 205}, { 6, 132, 186}, { 1, 87, 144},
+ { 1, 62, 107}, { 1, 41, 72}, { 1, 23, 41},
+ },
+ { // band 5
+ { 25, 233, 236}, { 5, 165, 214}, { 1, 96, 158},
+ { 1, 63, 112}, { 1, 40, 73}, { 1, 23, 40},
+ },
+ },
+ { // Intra
+ { // band 0
+ { 48, 20, 231}, { 37, 21, 179}, { 15, 18, 109},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 41, 154, 216}, {196, 142, 221}, {131, 125, 207},
+ { 84, 111, 181}, { 45, 91, 142}, { 27, 62, 89},
+ },
+ { // band 2
+ { 72, 181, 230}, { 41, 147, 215}, { 10, 102, 173},
+ { 3, 73, 132}, { 1, 47, 89}, { 1, 23, 50},
+ },
+ { // band 3
+ { 60, 201, 236}, { 23, 157, 219}, { 2, 99, 167},
+ { 1, 69, 124}, { 1, 43, 80}, { 1, 22, 39},
+ },
+ { // band 4
+ { 53, 214, 242}, { 15, 165, 224}, { 1, 101, 173},
+ { 1, 70, 131}, { 1, 44, 83}, { 1, 23, 49},
+ },
+ { // band 5
+ { 39, 239, 248}, { 7, 186, 233}, { 1, 108, 174},
+ { 1, 70, 123}, { 1, 43, 77}, { 1, 16, 42},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {161, 26, 204}, { 77, 40, 160}, { 26, 50, 117},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 80, 140, 218}, {136, 133, 215}, { 63, 117, 197},
+ { 20, 93, 170}, { 7, 55, 102}, { 13, 32, 52},
+ },
+ { // band 2
+ { 86, 173, 231}, { 46, 150, 220}, { 18, 118, 190},
+ { 8, 90, 150}, { 2, 60, 95}, { 1, 39, 41},
+ },
+ { // band 3
+ { 80, 183, 242}, { 37, 160, 231}, { 6, 120, 182},
+ { 1, 86, 137}, { 1, 46, 78}, { 1, 15, 24},
+ },
+ { // band 4
+ { 88, 215, 247}, { 42, 179, 235}, { 4, 116, 182},
+ { 2, 80, 133}, { 1, 46, 85}, { 1, 64, 43},
+ },
+ { // band 5
+ {100, 236, 250}, { 31, 186, 234}, { 1, 114, 181},
+ { 1, 85, 135}, { 1, 78, 64}, {128, 128, 128},
+ },
+ },
+ { // Inter
+ { // band 0
+ {213, 13, 245}, {106, 16, 211}, { 32, 11, 156},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {140, 214, 247}, {241, 186, 243}, {177, 172, 235},
+ {128, 156, 219}, {106, 130, 191}, { 99, 105, 152},
+ },
+ { // band 2
+ {125, 218, 248}, { 75, 167, 239}, { 29, 111, 212},
+ { 6, 66, 152}, { 1, 42, 96}, { 1, 85, 128},
+ },
+ { // band 3
+ {120, 232, 252}, { 60, 189, 247}, { 8, 141, 200},
+ { 1, 89, 134}, { 1, 32, 128}, {128, 128, 128},
+ },
+ { // band 4
+ {111, 238, 253}, { 56, 198, 245}, { 1, 123, 208},
+ { 1, 93, 176}, { 1, 1, 73}, {128, 128, 128},
+ },
+ { // band 5
+ { 98, 251, 249}, { 56, 189, 244}, { 17, 113, 220},
+ { 1, 109, 179}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ },
+ },
+ },
+ { // Q_Index 3
+ { // TX_SIZE 0
+ { // Y plane
+ { // Intra
+ { // band 0
+ {186, 16, 200}, {122, 31, 187}, { 78, 40, 161},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {146, 119, 245}, {182, 115, 244}, {130, 113, 238},
+ { 88, 110, 225}, { 47, 103, 208}, { 5, 102, 188},
+ },
+ { // band 2
+ {164, 157, 248}, {155, 141, 250}, { 71, 116, 243},
+ { 88, 129, 233}, { 50, 99, 228}, { 26, 148, 191},
+ },
+ { // band 3
+ {200, 158, 253}, {177, 118, 252}, { 99, 113, 245},
+ { 77, 120, 210}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 4
+ {231, 104, 254}, {209, 82, 254}, {143, 112, 252},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 5
+ {250, 36, 254}, {243, 55, 254}, {223, 170, 254},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ { // Intra
+ { // band 0
+ {207, 37, 226}, {164, 46, 218}, {122, 58, 201},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {149, 154, 253}, {170, 137, 253}, { 94, 123, 247},
+ { 42, 113, 222}, { 16, 97, 174}, { 49, 98, 159},
+ },
+ { // band 2
+ {177, 162, 253}, {165, 142, 252}, { 51, 108, 243},
+ { 18, 108, 213}, { 1, 98, 254}, {128, 128, 128},
+ },
+ { // band 3
+ {211, 152, 254}, {184, 116, 254}, { 70, 110, 244},
+ { 8, 108, 237}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 4
+ {236, 89, 254}, {210, 67, 254}, {112, 111, 248},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 5
+ {246, 26, 254}, {233, 35, 254}, {128, 1, 254},
+ {254, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {247, 2, 247}, {226, 8, 242}, {191, 14, 235},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {231, 94, 254}, {248, 91, 254}, {186, 89, 252},
+ {128, 92, 244}, { 79, 112, 254}, {128, 128, 128},
+ },
+ { // band 2
+ {228, 145, 253}, {240, 130, 254}, {223, 105, 254},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 3
+ {245, 153, 253}, {240, 120, 254}, {128, 128, 128},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 4
+ {254, 128, 254}, {204, 128, 254}, {128, 128, 128},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 5
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ { // Inter
+ { // band 0
+ {253, 7, 249}, {224, 9, 244}, {182, 13, 231},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {234, 109, 254}, {242, 104, 254}, {160, 98, 254},
+ {123, 85, 243}, { 82, 43, 217}, {128, 128, 128},
+ },
+ { // band 2
+ {243, 137, 254}, {240, 118, 254}, {136, 53, 254},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 3
+ {251, 173, 254}, {229, 129, 250}, {128, 128, 128},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 4
+ {254, 119, 254}, {254, 128, 128}, {128, 128, 128},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 5
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ },
+ },
+ { // TX_SIZE 1
+ { // Y plane
+ { // Intra
+ { // band 0
+ { 49, 26, 159}, { 36, 34, 150}, { 26, 38, 124},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 99, 122, 226}, {143, 119, 225}, { 90, 113, 213},
+ { 46, 102, 193}, { 14, 84, 157}, { 3, 59, 107},
+ },
+ { // band 2
+ {109, 164, 237}, { 74, 142, 233}, { 29, 112, 216},
+ { 14, 92, 184}, { 10, 80, 156}, { 1, 52, 137},
+ },
+ { // band 3
+ {110, 191, 245}, { 59, 156, 240}, { 18, 121, 220},
+ { 8, 97, 184}, { 3, 84, 150}, {128, 128, 128},
+ },
+ { // band 4
+ {115, 203, 250}, { 59, 167, 246}, { 16, 130, 226},
+ { 7, 97, 192}, { 1, 71, 99}, {128, 128, 128},
+ },
+ { // band 5
+ {149, 218, 253}, { 93, 171, 251}, { 28, 125, 233},
+ { 28, 99, 192}, {128, 85, 85}, {128, 128, 128},
+ },
+ },
+ { // Intra
+ { // band 0
+ { 97, 45, 229}, { 79, 52, 205}, { 46, 58, 171},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 99, 180, 249}, {156, 165, 249}, { 73, 141, 237},
+ { 31, 116, 208}, { 13, 81, 153}, { 5, 42, 86},
+ },
+ { // band 2
+ {113, 188, 251}, { 68, 161, 244}, { 16, 108, 216},
+ { 6, 81, 168}, { 2, 65, 118}, {128, 1, 1},
+ },
+ { // band 3
+ {117, 201, 252}, { 62, 171, 248}, { 12, 119, 221},
+ { 5, 90, 182}, { 4, 66, 116}, {128, 128, 128},
+ },
+ { // band 4
+ {128, 207, 253}, { 70, 176, 251}, { 11, 126, 228},
+ { 6, 89, 189}, { 1, 44, 148}, {128, 128, 128},
+ },
+ { // band 5
+ {162, 218, 254}, {107, 170, 253}, { 22, 131, 238},
+ { 1, 77, 182}, { 1, 254, 128}, {128, 128, 128},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {235, 5, 238}, {194, 14, 223}, {152, 22, 205},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {200, 121, 251}, {241, 115, 252}, {167, 108, 248},
+ { 93, 93, 233}, { 36, 66, 189}, {128, 128, 128},
+ },
+ { // band 2
+ {220, 151, 253}, {176, 135, 252}, { 95, 124, 254},
+ { 64, 105, 217}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 3
+ {225, 189, 254}, {175, 155, 254}, {102, 119, 254},
+ { 1, 1, 1}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 4
+ {218, 195, 254}, {125, 157, 253}, {128, 128, 254},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 5
+ {221, 197, 254}, { 85, 210, 254}, {128, 128, 128},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ { // Inter
+ { // band 0
+ {250, 9, 246}, {204, 13, 234}, {144, 18, 211},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {213, 157, 253}, {243, 138, 253}, {170, 117, 250},
+ {109, 91, 233}, { 66, 77, 163}, { 64, 85, 254},
+ },
+ { // band 2
+ {221, 169, 254}, {182, 141, 253}, {112, 120, 239},
+ { 85, 165, 254}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 3
+ {226, 192, 254}, {189, 174, 251}, {153, 128, 254},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 4
+ {232, 192, 254}, {195, 187, 247}, { 1, 191, 254},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 5
+ {247, 185, 254}, {254, 93, 254}, {128, 128, 128},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ },
+ },
+ { // TX_SIZE 2
+ { // Y plane
+ { // Intra
+ { // band 0
+ { 14, 30, 136}, { 15, 33, 120}, { 10, 33, 90},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 92, 109, 209}, {113, 108, 207}, { 77, 102, 193},
+ { 39, 91, 171}, { 11, 70, 129}, { 2, 44, 77},
+ },
+ { // band 2
+ { 99, 158, 223}, { 66, 135, 217}, { 23, 109, 194},
+ { 9, 85, 160}, { 3, 66, 124}, { 1, 51, 100},
+ },
+ { // band 3
+ { 89, 189, 234}, { 46, 149, 225}, { 10, 110, 194},
+ { 2, 83, 156}, { 1, 57, 113}, { 1, 47, 73},
+ },
+ { // band 4
+ { 78, 206, 242}, { 28, 161, 232}, { 3, 114, 200},
+ { 1, 86, 161}, { 1, 62, 118}, { 1, 1, 1},
+ },
+ { // band 5
+ { 72, 227, 250}, { 20, 182, 242}, { 3, 126, 210},
+ { 2, 91, 166}, { 1, 64, 126}, {128, 128, 128},
+ },
+ },
+ { // Intra
+ { // band 0
+ { 23, 42, 227}, { 41, 43, 195}, { 25, 45, 146},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {100, 172, 245}, {165, 158, 246}, { 88, 137, 234},
+ { 44, 116, 203}, { 18, 85, 149}, { 7, 56, 92},
+ },
+ { // band 2
+ {117, 188, 247}, { 70, 155, 239}, { 18, 105, 204},
+ { 7, 78, 158}, { 2, 50, 111}, { 1, 38, 77},
+ },
+ { // band 3
+ {104, 207, 250}, { 54, 166, 241}, { 6, 110, 199},
+ { 1, 78, 155}, { 1, 45, 100}, { 1, 1, 1},
+ },
+ { // band 4
+ { 87, 216, 251}, { 30, 177, 243}, { 1, 114, 203},
+ { 1, 85, 157}, { 1, 53, 108}, {128, 128, 128},
+ },
+ { // band 5
+ { 80, 230, 253}, { 23, 193, 248}, { 1, 127, 215},
+ { 1, 94, 170}, { 1, 71, 59}, {128, 128, 128},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {222, 9, 234}, {161, 20, 210}, {113, 30, 185},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {195, 120, 248}, {231, 124, 247}, {148, 116, 238},
+ { 64, 98, 207}, { 20, 70, 147}, { 87, 68, 100},
+ },
+ { // band 2
+ {186, 161, 250}, {124, 148, 245}, { 44, 123, 230},
+ { 23, 107, 205}, { 1, 80, 131}, {128, 128, 128},
+ },
+ { // band 3
+ {172, 196, 252}, {110, 160, 248}, { 37, 134, 235},
+ { 23, 125, 200}, {128, 254, 128}, {128, 128, 128},
+ },
+ { // band 4
+ {173, 209, 253}, {103, 175, 250}, { 1, 120, 240},
+ { 1, 146, 254}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 5
+ {184, 235, 254}, { 81, 186, 251}, {128, 109, 254},
+ {128, 254, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ { // Inter
+ { // band 0
+ {248, 8, 243}, {185, 11, 225}, {108, 11, 189},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {208, 158, 254}, {244, 147, 252}, {195, 132, 248},
+ {161, 122, 224}, {129, 114, 188}, { 59, 119, 159},
+ },
+ { // band 2
+ {202, 182, 253}, {143, 161, 251}, { 73, 115, 247},
+ {146, 175, 204}, {128, 1, 254}, {128, 128, 128},
+ },
+ { // band 3
+ {202, 204, 254}, {131, 174, 251}, { 18, 153, 207},
+ {128, 254, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 4
+ {192, 221, 254}, {114, 190, 254}, {128, 170, 254},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 5
+ {166, 236, 254}, {119, 200, 254}, {128, 128, 128},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ },
+ },
+ { // TX_SIZE 3
+ { // Y plane
+ { // Intra
+ { // band 0
+ { 30, 32, 144}, { 21, 35, 96}, { 4, 27, 55},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 35, 107, 172}, { 61, 104, 170}, { 33, 94, 160},
+ { 13, 80, 139}, { 2, 55, 97}, { 1, 28, 49},
+ },
+ { // band 2
+ { 51, 153, 195}, { 29, 129, 189}, { 9, 99, 163},
+ { 3, 75, 129}, { 1, 49, 88}, { 1, 29, 50},
+ },
+ { // band 3
+ { 53, 164, 210}, { 21, 134, 201}, { 3, 97, 164},
+ { 1, 69, 124}, { 1, 45, 82}, { 1, 31, 58},
+ },
+ { // band 4
+ { 47, 205, 234}, { 18, 158, 220}, { 2, 109, 177},
+ { 1, 78, 137}, { 1, 53, 101}, { 1, 34, 70},
+ },
+ { // band 5
+ { 55, 233, 245}, { 16, 179, 233}, { 1, 116, 191},
+ { 1, 79, 145}, { 1, 53, 101}, { 1, 37, 58},
+ },
+ },
+ { // Intra
+ { // band 0
+ { 36, 33, 227}, { 39, 28, 190}, { 18, 27, 134},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ { 76, 156, 235}, {184, 147, 235}, {114, 130, 220},
+ { 72, 112, 191}, { 42, 87, 144}, { 21, 65, 93},
+ },
+ { // band 2
+ { 96, 179, 240}, { 51, 149, 228}, { 12, 105, 191},
+ { 6, 74, 148}, { 1, 47, 100}, { 1, 29, 53},
+ },
+ { // band 3
+ { 88, 191, 242}, { 35, 154, 231}, { 3, 106, 187},
+ { 1, 74, 140}, { 1, 41, 84}, { 1, 25, 38},
+ },
+ { // band 4
+ { 77, 212, 249}, { 28, 171, 239}, { 2, 117, 199},
+ { 1, 79, 151}, { 1, 45, 99}, { 1, 1, 1},
+ },
+ { // band 5
+ { 77, 236, 252}, { 27, 190, 246}, { 2, 120, 203},
+ { 1, 78, 147}, { 1, 42, 72}, {128, 128, 128},
+ },
+ },
+ },
+ { // UV plane
+ { // Inter
+ { // band 0
+ {185, 11, 227}, {113, 30, 182}, { 57, 44, 144},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {151, 139, 244}, {212, 139, 241}, {124, 126, 231},
+ { 59, 104, 213}, { 26, 73, 158}, { 20, 45, 95},
+ },
+ { // band 2
+ {155, 163, 247}, {108, 152, 239}, { 39, 124, 214},
+ { 7, 109, 162}, { 29, 57, 128}, {128, 128, 128},
+ },
+ { // band 3
+ {158, 176, 250}, { 89, 164, 243}, { 11, 114, 196},
+ { 1, 96, 141}, { 1, 81, 118}, {128, 1, 1},
+ },
+ { // band 4
+ {148, 212, 251}, { 59, 174, 240}, { 2, 130, 203},
+ { 1, 70, 168}, { 1, 51, 106}, {128, 128, 128},
+ },
+ { // band 5
+ {104, 237, 252}, { 39, 190, 246}, { 1, 154, 220},
+ {128, 102, 1}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ { // Inter
+ { // band 0
+ {236, 6, 242}, {111, 6, 206}, { 36, 5, 161},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 1
+ {193, 193, 252}, {248, 182, 251}, {218, 150, 246},
+ {182, 134, 244}, {151, 137, 227}, { 45, 102, 195},
+ },
+ { // band 2
+ {188, 202, 251}, {125, 165, 249}, { 64, 75, 218},
+ { 1, 128, 254}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 3
+ {178, 225, 254}, {107, 188, 231}, { 21, 135, 233},
+ {128, 1, 254}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 4
+ {164, 227, 253}, { 55, 193, 251}, { 1, 111, 225},
+ {128, 128, 128}, {128, 128, 128}, {128, 128, 128},
+ },
+ { // band 5
+ {151, 243, 254}, { 50, 203, 254}, {128, 179, 254},
+ {128, 1, 254}, {128, 128, 128}, {128, 128, 128},
+ },
+ },
+ },
+ },
+ },
+};
+#else
+static const vp10_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = {
+ { // Y plane
+ { // Intra
+ { // Band 0
+ { 195, 29, 183 }, { 84, 49, 136 }, { 8, 42, 71 }
+ }, { // Band 1
+ { 31, 107, 169 }, { 35, 99, 159 }, { 17, 82, 140 },
+ { 8, 66, 114 }, { 2, 44, 76 }, { 1, 19, 32 }
+ }, { // Band 2
+ { 40, 132, 201 }, { 29, 114, 187 }, { 13, 91, 157 },
+ { 7, 75, 127 }, { 3, 58, 95 }, { 1, 28, 47 }
+ }, { // Band 3
+ { 69, 142, 221 }, { 42, 122, 201 }, { 15, 91, 159 },
+ { 6, 67, 121 }, { 1, 42, 77 }, { 1, 17, 31 }
+ }, { // Band 4
+ { 102, 148, 228 }, { 67, 117, 204 }, { 17, 82, 154 },
+ { 6, 59, 114 }, { 2, 39, 75 }, { 1, 15, 29 }
+ }, { // Band 5
+ { 156, 57, 233 }, { 119, 57, 212 }, { 58, 48, 163 },
+ { 29, 40, 124 }, { 12, 30, 81 }, { 3, 12, 31 }
+ }
+ }, { // Inter
+ { // Band 0
+ { 191, 107, 226 }, { 124, 117, 204 }, { 25, 99, 155 }
+ }, { // Band 1
+ { 29, 148, 210 }, { 37, 126, 194 }, { 8, 93, 157 },
+ { 2, 68, 118 }, { 1, 39, 69 }, { 1, 17, 33 }
+ }, { // Band 2
+ { 41, 151, 213 }, { 27, 123, 193 }, { 3, 82, 144 },
+ { 1, 58, 105 }, { 1, 32, 60 }, { 1, 13, 26 }
+ }, { // Band 3
+ { 59, 159, 220 }, { 23, 126, 198 }, { 4, 88, 151 },
+ { 1, 66, 114 }, { 1, 38, 71 }, { 1, 18, 34 }
+ }, { // Band 4
+ { 114, 136, 232 }, { 51, 114, 207 }, { 11, 83, 155 },
+ { 3, 56, 105 }, { 1, 33, 65 }, { 1, 17, 34 }
+ }, { // Band 5
+ { 149, 65, 234 }, { 121, 57, 215 }, { 61, 49, 166 },
+ { 28, 36, 114 }, { 12, 25, 76 }, { 3, 16, 42 }
+ }
+ }
+ }, { // UV plane
+ { // Intra
+ { // Band 0
+ { 214, 49, 220 }, { 132, 63, 188 }, { 42, 65, 137 }
+ }, { // Band 1
+ { 85, 137, 221 }, { 104, 131, 216 }, { 49, 111, 192 },
+ { 21, 87, 155 }, { 2, 49, 87 }, { 1, 16, 28 }
+ }, { // Band 2
+ { 89, 163, 230 }, { 90, 137, 220 }, { 29, 100, 183 },
+ { 10, 70, 135 }, { 2, 42, 81 }, { 1, 17, 33 }
+ }, { // Band 3
+ { 108, 167, 237 }, { 55, 133, 222 }, { 15, 97, 179 },
+ { 4, 72, 135 }, { 1, 45, 85 }, { 1, 19, 38 }
+ }, { // Band 4
+ { 124, 146, 240 }, { 66, 124, 224 }, { 17, 88, 175 },
+ { 4, 58, 122 }, { 1, 36, 75 }, { 1, 18, 37 }
+ }, { // Band 5
+ { 141, 79, 241 }, { 126, 70, 227 }, { 66, 58, 182 },
+ { 30, 44, 136 }, { 12, 34, 96 }, { 2, 20, 47 }
+ }
+ }, { // Inter
+ { // Band 0
+ { 229, 99, 249 }, { 143, 111, 235 }, { 46, 109, 192 }
+ }, { // Band 1
+ { 82, 158, 236 }, { 94, 146, 224 }, { 25, 117, 191 },
+ { 9, 87, 149 }, { 3, 56, 99 }, { 1, 33, 57 }
+ }, { // Band 2
+ { 83, 167, 237 }, { 68, 145, 222 }, { 10, 103, 177 },
+ { 2, 72, 131 }, { 1, 41, 79 }, { 1, 20, 39 }
+ }, { // Band 3
+ { 99, 167, 239 }, { 47, 141, 224 }, { 10, 104, 178 },
+ { 2, 73, 133 }, { 1, 44, 85 }, { 1, 22, 47 }
+ }, { // Band 4
+ { 127, 145, 243 }, { 71, 129, 228 }, { 17, 93, 177 },
+ { 3, 61, 124 }, { 1, 41, 84 }, { 1, 21, 52 }
+ }, { // Band 5
+ { 157, 78, 244 }, { 140, 72, 231 }, { 69, 58, 184 },
+ { 31, 44, 137 }, { 14, 38, 105 }, { 8, 23, 61 }
+ }
+ }
+ }
+};
+
+static const vp10_coeff_probs_model default_coef_probs_8x8[PLANE_TYPES] = {
+ { // Y plane
+ { // Intra
+ { // Band 0
+ { 125, 34, 187 }, { 52, 41, 133 }, { 6, 31, 56 }
+ }, { // Band 1
+ { 37, 109, 153 }, { 51, 102, 147 }, { 23, 87, 128 },
+ { 8, 67, 101 }, { 1, 41, 63 }, { 1, 19, 29 }
+ }, { // Band 2
+ { 31, 154, 185 }, { 17, 127, 175 }, { 6, 96, 145 },
+ { 2, 73, 114 }, { 1, 51, 82 }, { 1, 28, 45 }
+ }, { // Band 3
+ { 23, 163, 200 }, { 10, 131, 185 }, { 2, 93, 148 },
+ { 1, 67, 111 }, { 1, 41, 69 }, { 1, 14, 24 }
+ }, { // Band 4
+ { 29, 176, 217 }, { 12, 145, 201 }, { 3, 101, 156 },
+ { 1, 69, 111 }, { 1, 39, 63 }, { 1, 14, 23 }
+ }, { // Band 5
+ { 57, 192, 233 }, { 25, 154, 215 }, { 6, 109, 167 },
+ { 3, 78, 118 }, { 1, 48, 69 }, { 1, 21, 29 }
+ }
+ }, { // Inter
+ { // Band 0
+ { 202, 105, 245 }, { 108, 106, 216 }, { 18, 90, 144 }
+ }, { // Band 1
+ { 33, 172, 219 }, { 64, 149, 206 }, { 14, 117, 177 },
+ { 5, 90, 141 }, { 2, 61, 95 }, { 1, 37, 57 }
+ }, { // Band 2
+ { 33, 179, 220 }, { 11, 140, 198 }, { 1, 89, 148 },
+ { 1, 60, 104 }, { 1, 33, 57 }, { 1, 12, 21 }
+ }, { // Band 3
+ { 30, 181, 221 }, { 8, 141, 198 }, { 1, 87, 145 },
+ { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 20 }
+ }, { // Band 4
+ { 32, 186, 224 }, { 7, 142, 198 }, { 1, 86, 143 },
+ { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 22 }
+ }, { // Band 5
+ { 57, 192, 227 }, { 20, 143, 204 }, { 3, 96, 154 },
+ { 1, 68, 112 }, { 1, 42, 69 }, { 1, 19, 32 }
+ }
+ }
+ }, { // UV plane
+ { // Intra
+ { // Band 0
+ { 212, 35, 215 }, { 113, 47, 169 }, { 29, 48, 105 }
+ }, { // Band 1
+ { 74, 129, 203 }, { 106, 120, 203 }, { 49, 107, 178 },
+ { 19, 84, 144 }, { 4, 50, 84 }, { 1, 15, 25 }
+ }, { // Band 2
+ { 71, 172, 217 }, { 44, 141, 209 }, { 15, 102, 173 },
+ { 6, 76, 133 }, { 2, 51, 89 }, { 1, 24, 42 }
+ }, { // Band 3
+ { 64, 185, 231 }, { 31, 148, 216 }, { 8, 103, 175 },
+ { 3, 74, 131 }, { 1, 46, 81 }, { 1, 18, 30 }
+ }, { // Band 4
+ { 65, 196, 235 }, { 25, 157, 221 }, { 5, 105, 174 },
+ { 1, 67, 120 }, { 1, 38, 69 }, { 1, 15, 30 }
+ }, { // Band 5
+ { 65, 204, 238 }, { 30, 156, 224 }, { 7, 107, 177 },
+ { 2, 70, 124 }, { 1, 42, 73 }, { 1, 18, 34 }
+ }
+ }, { // Inter
+ { // Band 0
+ { 225, 86, 251 }, { 144, 104, 235 }, { 42, 99, 181 }
+ }, { // Band 1
+ { 85, 175, 239 }, { 112, 165, 229 }, { 29, 136, 200 },
+ { 12, 103, 162 }, { 6, 77, 123 }, { 2, 53, 84 }
+ }, { // Band 2
+ { 75, 183, 239 }, { 30, 155, 221 }, { 3, 106, 171 },
+ { 1, 74, 128 }, { 1, 44, 76 }, { 1, 17, 28 }
+ }, { // Band 3
+ { 73, 185, 240 }, { 27, 159, 222 }, { 2, 107, 172 },
+ { 1, 75, 127 }, { 1, 42, 73 }, { 1, 17, 29 }
+ }, { // Band 4
+ { 62, 190, 238 }, { 21, 159, 222 }, { 2, 107, 172 },
+ { 1, 72, 122 }, { 1, 40, 71 }, { 1, 18, 32 }
+ }, { // Band 5
+ { 61, 199, 240 }, { 27, 161, 226 }, { 4, 113, 180 },
+ { 1, 76, 129 }, { 1, 46, 80 }, { 1, 23, 41 }
+ }
+ }
+ }
+};
+
+static const vp10_coeff_probs_model default_coef_probs_16x16[PLANE_TYPES] = {
+ { // Y plane
+ { // Intra
+ { // Band 0
+ { 7, 27, 153 }, { 5, 30, 95 }, { 1, 16, 30 }
+ }, { // Band 1
+ { 50, 75, 127 }, { 57, 75, 124 }, { 27, 67, 108 },
+ { 10, 54, 86 }, { 1, 33, 52 }, { 1, 12, 18 }
+ }, { // Band 2
+ { 43, 125, 151 }, { 26, 108, 148 }, { 7, 83, 122 },
+ { 2, 59, 89 }, { 1, 38, 60 }, { 1, 17, 27 }
+ }, { // Band 3
+ { 23, 144, 163 }, { 13, 112, 154 }, { 2, 75, 117 },
+ { 1, 50, 81 }, { 1, 31, 51 }, { 1, 14, 23 }
+ }, { // Band 4
+ { 18, 162, 185 }, { 6, 123, 171 }, { 1, 78, 125 },
+ { 1, 51, 86 }, { 1, 31, 54 }, { 1, 14, 23 }
+ }, { // Band 5
+ { 15, 199, 227 }, { 3, 150, 204 }, { 1, 91, 146 },
+ { 1, 55, 95 }, { 1, 30, 53 }, { 1, 11, 20 }
+ }
+ }, { // Inter
+ { // Band 0
+ { 19, 55, 240 }, { 19, 59, 196 }, { 3, 52, 105 }
+ }, { // Band 1
+ { 41, 166, 207 }, { 104, 153, 199 }, { 31, 123, 181 },
+ { 14, 101, 152 }, { 5, 72, 106 }, { 1, 36, 52 }
+ }, { // Band 2
+ { 35, 176, 211 }, { 12, 131, 190 }, { 2, 88, 144 },
+ { 1, 60, 101 }, { 1, 36, 60 }, { 1, 16, 28 }
+ }, { // Band 3
+ { 28, 183, 213 }, { 8, 134, 191 }, { 1, 86, 142 },
+ { 1, 56, 96 }, { 1, 30, 53 }, { 1, 12, 20 }
+ }, { // Band 4
+ { 20, 190, 215 }, { 4, 135, 192 }, { 1, 84, 139 },
+ { 1, 53, 91 }, { 1, 28, 49 }, { 1, 11, 20 }
+ }, { // Band 5
+ { 13, 196, 216 }, { 2, 137, 192 }, { 1, 86, 143 },
+ { 1, 57, 99 }, { 1, 32, 56 }, { 1, 13, 24 }
+ }
+ }
+ }, { // UV plane
+ { // Intra
+ { // Band 0
+ { 211, 29, 217 }, { 96, 47, 156 }, { 22, 43, 87 }
+ }, { // Band 1
+ { 78, 120, 193 }, { 111, 116, 186 }, { 46, 102, 164 },
+ { 15, 80, 128 }, { 2, 49, 76 }, { 1, 18, 28 }
+ }, { // Band 2
+ { 71, 161, 203 }, { 42, 132, 192 }, { 10, 98, 150 },
+ { 3, 69, 109 }, { 1, 44, 70 }, { 1, 18, 29 }
+ }, { // Band 3
+ { 57, 186, 211 }, { 30, 140, 196 }, { 4, 93, 146 },
+ { 1, 62, 102 }, { 1, 38, 65 }, { 1, 16, 27 }
+ }, { // Band 4
+ { 47, 199, 217 }, { 14, 145, 196 }, { 1, 88, 142 },
+ { 1, 57, 98 }, { 1, 36, 62 }, { 1, 15, 26 }
+ }, { // Band 5
+ { 26, 219, 229 }, { 5, 155, 207 }, { 1, 94, 151 },
+ { 1, 60, 104 }, { 1, 36, 62 }, { 1, 16, 28 }
+ }
+ }, { // Inter
+ { // Band 0
+ { 233, 29, 248 }, { 146, 47, 220 }, { 43, 52, 140 }
+ }, { // Band 1
+ { 100, 163, 232 }, { 179, 161, 222 }, { 63, 142, 204 },
+ { 37, 113, 174 }, { 26, 89, 137 }, { 18, 68, 97 }
+ }, { // Band 2
+ { 85, 181, 230 }, { 32, 146, 209 }, { 7, 100, 164 },
+ { 3, 71, 121 }, { 1, 45, 77 }, { 1, 18, 30 }
+ }, { // Band 3
+ { 65, 187, 230 }, { 20, 148, 207 }, { 2, 97, 159 },
+ { 1, 68, 116 }, { 1, 40, 70 }, { 1, 14, 29 }
+ }, { // Band 4
+ { 40, 194, 227 }, { 8, 147, 204 }, { 1, 94, 155 },
+ { 1, 65, 112 }, { 1, 39, 66 }, { 1, 14, 26 }
+ }, { // Band 5
+ { 16, 208, 228 }, { 3, 151, 207 }, { 1, 98, 160 },
+ { 1, 67, 117 }, { 1, 41, 74 }, { 1, 17, 31 }
+ }
+ }
+ }
+};
+
+static const vp10_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = {
+ { // Y plane
+ { // Intra
+ { // Band 0
+ { 17, 38, 140 }, { 7, 34, 80 }, { 1, 17, 29 }
+ }, { // Band 1
+ { 37, 75, 128 }, { 41, 76, 128 }, { 26, 66, 116 },
+ { 12, 52, 94 }, { 2, 32, 55 }, { 1, 10, 16 }
+ }, { // Band 2
+ { 50, 127, 154 }, { 37, 109, 152 }, { 16, 82, 121 },
+ { 5, 59, 85 }, { 1, 35, 54 }, { 1, 13, 20 }
+ }, { // Band 3
+ { 40, 142, 167 }, { 17, 110, 157 }, { 2, 71, 112 },
+ { 1, 44, 72 }, { 1, 27, 45 }, { 1, 11, 17 }
+ }, { // Band 4
+ { 30, 175, 188 }, { 9, 124, 169 }, { 1, 74, 116 },
+ { 1, 48, 78 }, { 1, 30, 49 }, { 1, 11, 18 }
+ }, { // Band 5
+ { 10, 222, 223 }, { 2, 150, 194 }, { 1, 83, 128 },
+ { 1, 48, 79 }, { 1, 27, 45 }, { 1, 11, 17 }
+ }
+ }, { // Inter
+ { // Band 0
+ { 36, 41, 235 }, { 29, 36, 193 }, { 10, 27, 111 }
+ }, { // Band 1
+ { 85, 165, 222 }, { 177, 162, 215 }, { 110, 135, 195 },
+ { 57, 113, 168 }, { 23, 83, 120 }, { 10, 49, 61 }
+ }, { // Band 2
+ { 85, 190, 223 }, { 36, 139, 200 }, { 5, 90, 146 },
+ { 1, 60, 103 }, { 1, 38, 65 }, { 1, 18, 30 }
+ }, { // Band 3
+ { 72, 202, 223 }, { 23, 141, 199 }, { 2, 86, 140 },
+ { 1, 56, 97 }, { 1, 36, 61 }, { 1, 16, 27 }
+ }, { // Band 4
+ { 55, 218, 225 }, { 13, 145, 200 }, { 1, 86, 141 },
+ { 1, 57, 99 }, { 1, 35, 61 }, { 1, 13, 22 }
+ }, { // Band 5
+ { 15, 235, 212 }, { 1, 132, 184 }, { 1, 84, 139 },
+ { 1, 57, 97 }, { 1, 34, 56 }, { 1, 14, 23 }
+ }
+ }
+ }, { // UV plane
+ { // Intra
+ { // Band 0
+ { 181, 21, 201 }, { 61, 37, 123 }, { 10, 38, 71 }
+ }, { // Band 1
+ { 47, 106, 172 }, { 95, 104, 173 }, { 42, 93, 159 },
+ { 18, 77, 131 }, { 4, 50, 81 }, { 1, 17, 23 }
+ }, { // Band 2
+ { 62, 147, 199 }, { 44, 130, 189 }, { 28, 102, 154 },
+ { 18, 75, 115 }, { 2, 44, 65 }, { 1, 12, 19 }
+ }, { // Band 3
+ { 55, 153, 210 }, { 24, 130, 194 }, { 3, 93, 146 },
+ { 1, 61, 97 }, { 1, 31, 50 }, { 1, 10, 16 }
+ }, { // Band 4
+ { 49, 186, 223 }, { 17, 148, 204 }, { 1, 96, 142 },
+ { 1, 53, 83 }, { 1, 26, 44 }, { 1, 11, 17 }
+ }, { // Band 5
+ { 13, 217, 212 }, { 2, 136, 180 }, { 1, 78, 124 },
+ { 1, 50, 83 }, { 1, 29, 49 }, { 1, 14, 23 }
+ }
+ }, { // Inter
+ { // Band 0
+ { 197, 13, 247 }, { 82, 17, 222 }, { 25, 17, 162 }
+ }, { // Band 1
+ { 126, 186, 247 }, { 234, 191, 243 }, { 176, 177, 234 },
+ { 104, 158, 220 }, { 66, 128, 186 }, { 55, 90, 137 }
+ }, { // Band 2
+ { 111, 197, 242 }, { 46, 158, 219 }, { 9, 104, 171 },
+ { 2, 65, 125 }, { 1, 44, 80 }, { 1, 17, 91 }
+ }, { // Band 3
+ { 104, 208, 245 }, { 39, 168, 224 }, { 3, 109, 162 },
+ { 1, 79, 124 }, { 1, 50, 102 }, { 1, 43, 102 }
+ }, { // Band 4
+ { 84, 220, 246 }, { 31, 177, 231 }, { 2, 115, 180 },
+ { 1, 79, 134 }, { 1, 55, 77 }, { 1, 60, 79 }
+ }, { // Band 5
+ { 43, 243, 240 }, { 8, 180, 217 }, { 1, 115, 166 },
+ { 1, 84, 121 }, { 1, 51, 67 }, { 1, 16, 6 }
+ }
+ }
+ }
+};
+#endif // CONFIG_ENTROPY
+/* clang-format on */
+
+static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) {
+ assert(p != 0);
+ memcpy(probs, vp10_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob));
+}
+
+void vp10_model_to_full_probs(const vpx_prob *model, vpx_prob *full) {
+ if (full != model)
+ memcpy(full, model, sizeof(vpx_prob) * UNCONSTRAINED_NODES);
+ extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]);
+}
+
+#if CONFIG_ANS
+void vp10_build_token_cdfs(const vpx_prob *pdf_model, rans_dec_lut cdf) {
+ AnsP10 pdf_tab[ENTROPY_TOKENS - 1];
+ assert(pdf_model[2] != 0);
+ // TODO(aconverse): Investigate making the precision of the zero and EOB tree
+ // nodes 10-bits.
+ rans_merge_prob8_pdf(pdf_tab, pdf_model[1],
+ vp10_pareto8_token_probs[pdf_model[2] - 1],
+ ENTROPY_TOKENS - 2);
+ rans_build_cdf_from_pdf(pdf_tab, cdf);
+}
+
+void vp10_coef_pareto_cdfs(FRAME_CONTEXT *fc) {
+ TX_SIZE t;
+ int i, j, k, l;
+ for (t = TX_4X4; t <= TX_32X32; ++t)
+ for (i = 0; i < PLANE_TYPES; ++i)
+ for (j = 0; j < REF_TYPES; ++j)
+ for (k = 0; k < COEF_BANDS; ++k)
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
+ vp10_build_token_cdfs(fc->coef_probs[t][i][j][k][l],
+ fc->coef_cdfs[t][i][j][k][l]);
+}
+#endif // CONFIG_ANS
+
+void vp10_default_coef_probs(VP10_COMMON *cm) {
+#if CONFIG_ENTROPY
+ const int index = VPXMIN(
+ ROUND_POWER_OF_TWO(cm->base_qindex, 8 - QCTX_BIN_BITS), QCTX_BINS - 1);
+ vp10_copy(cm->fc->coef_probs, default_qctx_coef_probs[index]);
+#else
+ vp10_copy(cm->fc->coef_probs[TX_4X4], default_coef_probs_4x4);
+ vp10_copy(cm->fc->coef_probs[TX_8X8], default_coef_probs_8x8);
+ vp10_copy(cm->fc->coef_probs[TX_16X16], default_coef_probs_16x16);
+ vp10_copy(cm->fc->coef_probs[TX_32X32], default_coef_probs_32x32);
+#endif // CONFIG_ENTROPY
+#if CONFIG_ANS
+ vp10_coef_pareto_cdfs(cm->fc);
+#endif // CONFIG_ANS
+}
+
+static void adapt_coef_probs(VP10_COMMON *cm, TX_SIZE tx_size,
+ unsigned int count_sat,
+ unsigned int update_factor) {
+ const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
+ vp10_coeff_probs_model *const probs = cm->fc->coef_probs[tx_size];
+#if CONFIG_ENTROPY
+ const vp10_coeff_probs_model *const pre_probs =
+ cm->partial_prob_update
+ ? (const vp10_coeff_probs_model *)cm->starting_coef_probs[tx_size]
+ : pre_fc->coef_probs[tx_size];
+#else
+ const vp10_coeff_probs_model *const pre_probs = pre_fc->coef_probs[tx_size];
+#endif // CONFIG_ENTROPY
+ const vp10_coeff_count_model *const counts =
+ (const vp10_coeff_count_model *)cm->counts.coef[tx_size];
+ const unsigned int(*eob_counts)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
+ (const unsigned int(*)[
+ REF_TYPES][COEF_BANDS][COEFF_CONTEXTS])cm->counts.eob_branch[tx_size];
+ int i, j, k, l, m;
+
+ for (i = 0; i < PLANE_TYPES; ++i)
+ for (j = 0; j < REF_TYPES; ++j)
+ for (k = 0; k < COEF_BANDS; ++k)
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+ const int n0 = counts[i][j][k][l][ZERO_TOKEN];
+ const int n1 = counts[i][j][k][l][ONE_TOKEN];
+ const int n2 = counts[i][j][k][l][TWO_TOKEN];
+ const int neob = counts[i][j][k][l][EOB_MODEL_TOKEN];
+ const unsigned int branch_ct[UNCONSTRAINED_NODES][2] = {
+ { neob, eob_counts[i][j][k][l] - neob }, { n0, n1 + n2 }, { n1, n2 }
+ };
+ for (m = 0; m < UNCONSTRAINED_NODES; ++m)
+ probs[i][j][k][l][m] =
+ vp10_merge_probs(pre_probs[i][j][k][l][m], branch_ct[m],
+ count_sat, update_factor);
+ }
+}
+
+void vp10_adapt_coef_probs(VP10_COMMON *cm) {
+ TX_SIZE t;
+ unsigned int count_sat, update_factor;
+
+#if CONFIG_ENTROPY
+ if (cm->last_frame_type == KEY_FRAME) {
+ update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY_BITS; /* adapt quickly */
+ count_sat = COEF_COUNT_SAT_AFTER_KEY_BITS;
+ } else {
+ update_factor = COEF_MAX_UPDATE_FACTOR_BITS;
+ count_sat = COEF_COUNT_SAT_BITS;
+ }
+ if (cm->partial_prob_update == 1) {
+ update_factor = COEF_MAX_UPDATE_FACTOR_BITS;
+ }
+#else
+ if (cm->last_frame_type == KEY_FRAME) {
+ update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */
+ count_sat = COEF_COUNT_SAT_AFTER_KEY;
+ } else {
+ update_factor = COEF_MAX_UPDATE_FACTOR;
+ count_sat = COEF_COUNT_SAT;
+ }
+#endif // CONFIG_ENTROPY
+ for (t = TX_4X4; t <= TX_32X32; t++)
+ adapt_coef_probs(cm, t, count_sat, update_factor);
+#if CONFIG_ANS
+ vp10_coef_pareto_cdfs(cm->fc);
+#endif
+}
+
+#if CONFIG_ENTROPY
+void vp10_partial_adapt_probs(VP10_COMMON *cm, int mi_row, int mi_col) {
+ (void)mi_row;
+ (void)mi_col;
+
+ if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+ cm->partial_prob_update = 1;
+ vp10_adapt_coef_probs(cm);
+ }
+}
+#endif // CONFIG_ENTROPY
diff --git a/av1/common/entropy.h b/av1/common/entropy.h
new file mode 100644
index 0000000..b0afd46
--- /dev/null
+++ b/av1/common/entropy.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ENTROPY_H_
+#define VP10_COMMON_ENTROPY_H_
+
+#include "aom/vpx_integer.h"
+#include "aom_dsp/prob.h"
+
+#if CONFIG_ANS
+#include "av1/common/ans.h"
+#endif // CONFIG_ANS
+#include "av1/common/common.h"
+#include "av1/common/enums.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DIFF_UPDATE_PROB 252
+#define GROUP_DIFF_UPDATE_PROB 252
+
+#if CONFIG_ENTROPY
+#define COEF_PROBS_BUFS 16
+#define QCTX_BIN_BITS 2
+#define QCTX_BINS (1 << QCTX_BIN_BITS)
+#endif // CONFIG_ENTROPY
+
+// Coefficient token alphabet
+#define ZERO_TOKEN 0 // 0 Extra Bits 0+0
+#define ONE_TOKEN 1 // 1 Extra Bits 0+1
+#define TWO_TOKEN 2 // 2 Extra Bits 0+1
+#define THREE_TOKEN 3 // 3 Extra Bits 0+1
+#define FOUR_TOKEN 4 // 4 Extra Bits 0+1
+#define CATEGORY1_TOKEN 5 // 5-6 Extra Bits 1+1
+#define CATEGORY2_TOKEN 6 // 7-10 Extra Bits 2+1
+#define CATEGORY3_TOKEN 7 // 11-18 Extra Bits 3+1
+#define CATEGORY4_TOKEN 8 // 19-34 Extra Bits 4+1
+#define CATEGORY5_TOKEN 9 // 35-66 Extra Bits 5+1
+#define CATEGORY6_TOKEN 10 // 67+ Extra Bits 14+1
+#define EOB_TOKEN 11 // EOB Extra Bits 0+0
+
+#define ENTROPY_TOKENS 12
+
+#define ENTROPY_NODES 11
+
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_pt_energy_class[ENTROPY_TOKENS]);
+
+#define CAT1_MIN_VAL 5
+#define CAT2_MIN_VAL 7
+#define CAT3_MIN_VAL 11
+#define CAT4_MIN_VAL 19
+#define CAT5_MIN_VAL 35
+#define CAT6_MIN_VAL 67
+
+// Extra bit probabilities.
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat1_prob[1]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat2_prob[2]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat3_prob[3]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat4_prob[4]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat5_prob[5]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat6_prob[14]);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat1_prob_high10[1]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat2_prob_high10[2]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat3_prob_high10[3]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat4_prob_high10[4]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat5_prob_high10[5]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat6_prob_high10[16]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat1_prob_high12[1]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat2_prob_high12[2]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat3_prob_high12[3]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat4_prob_high12[4]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat5_prob_high12[5]);
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_cat6_prob_high12[18]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#define EOB_MODEL_TOKEN 3
+
+typedef struct {
+ const vpx_tree_index *tree;
+ const vpx_prob *prob;
+ int len;
+ int base_val;
+ const int16_t *cost;
+} vp10_extra_bit;
+
+// indexed by token value
+extern const vp10_extra_bit vp10_extra_bits[ENTROPY_TOKENS];
+#if CONFIG_VP9_HIGHBITDEPTH
+extern const vp10_extra_bit vp10_extra_bits_high10[ENTROPY_TOKENS];
+extern const vp10_extra_bit vp10_extra_bits_high12[ENTROPY_TOKENS];
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#define DCT_MAX_VALUE 16384
+#if CONFIG_VP9_HIGHBITDEPTH
+#define DCT_MAX_VALUE_HIGH10 65536
+#define DCT_MAX_VALUE_HIGH12 262144
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+/* Coefficients are predicted via a 3-dimensional probability table. */
+
+#define REF_TYPES 2 // intra=0, inter=1
+
+/* Middle dimension reflects the coefficient position within the transform. */
+#define COEF_BANDS 6
+
+/* Inside dimension is measure of nearby complexity, that reflects the energy
+ of nearby coefficients are nonzero. For the first coefficient (DC, unless
+ block type is 0), we look at the (already encoded) blocks above and to the
+ left of the current block. The context index is then the number (0,1,or 2)
+ of these blocks having nonzero coefficients.
+ After decoding a coefficient, the measure is determined by the size of the
+ most recently decoded coefficient.
+ Note that the intuitive meaning of this measure changes as coefficients
+ are decoded, e.g., prior to the first token, a zero means that my neighbors
+ are empty while, after the first token, because of the use of end-of-block,
+ a zero means we just decoded a zero and hence guarantees that a non-zero
+ coefficient will appear later in this block. However, this shift
+ in meaning is perfectly OK because our context depends also on the
+ coefficient band (and since zigzag positions 0, 1, and 2 are in
+ distinct bands). */
+
+#define COEFF_CONTEXTS 6
+#define BAND_COEFF_CONTEXTS(band) ((band) == 0 ? 3 : COEFF_CONTEXTS)
+
+// #define ENTROPY_STATS
+
+typedef unsigned int
+ vp10_coeff_count[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS][ENTROPY_TOKENS];
+typedef unsigned int
+ vp10_coeff_stats[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS][ENTROPY_NODES][2];
+
+#define SUBEXP_PARAM 4 /* Subexponential code parameter */
+#define MODULUS_PARAM 13 /* Modulus parameter */
+
+struct VP10Common;
+void vp10_default_coef_probs(struct VP10Common *cm);
+void vp10_adapt_coef_probs(struct VP10Common *cm);
+#if CONFIG_ENTROPY
+void vp10_partial_adapt_probs(struct VP10Common *cm, int mi_row, int mi_col);
+#endif // CONFIG_ENTROPY
+
+// This is the index in the scan order beyond which all coefficients for
+// 8x8 transform and above are in the top band.
+// This macro is currently unused but may be used by certain implementations
+#define MAXBAND_INDEX 21
+
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x8plus[1024]);
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x8_8x4[32]);
+#endif // CONFIG_EXT_TX
+DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x4[16]);
+
+DECLARE_ALIGNED(16, extern const uint16_t, band_count_table[TX_SIZES_ALL][8]);
+DECLARE_ALIGNED(16, extern const uint16_t,
+ band_cum_count_table[TX_SIZES_ALL][8]);
+
+static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
+ switch (tx_size) {
+ case TX_4X4: return vp10_coefband_trans_4x4;
+#if CONFIG_EXT_TX
+ case TX_4X8: return vp10_coefband_trans_4x8_8x4;
+#endif // CONFIG_EXT_TX
+ default: return vp10_coefband_trans_8x8plus;
+ }
+}
+
+// 128 lists of probabilities are stored for the following ONE node probs:
+// 1, 3, 5, 7, ..., 253, 255
+// In between probabilities are interpolated linearly
+
+#define COEFF_PROB_MODELS 255
+
+#define UNCONSTRAINED_NODES 3
+
+#define PIVOT_NODE 2 // which node is pivot
+
+#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES)
+extern const vpx_tree_index vp10_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)];
+extern const vpx_prob vp10_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES];
+#if CONFIG_ANS
+extern const AnsP10
+ vp10_pareto8_token_probs[COEFF_PROB_MODELS][ENTROPY_TOKENS - 2];
+
+typedef rans_dec_lut coeff_cdf_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS];
+#endif // CONFIG_ANS
+
+typedef vpx_prob vp10_coeff_probs_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS]
+ [UNCONSTRAINED_NODES];
+
+typedef unsigned int vp10_coeff_count_model
+ [REF_TYPES][COEF_BANDS][COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
+
+void vp10_model_to_full_probs(const vpx_prob *model, vpx_prob *full);
+
+typedef char ENTROPY_CONTEXT;
+
+static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a,
+ ENTROPY_CONTEXT b) {
+ return (a != 0) + (b != 0);
+}
+
+static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
+ const ENTROPY_CONTEXT *l) {
+ ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
+
+ switch (tx_size) {
+ case TX_4X4:
+ above_ec = a[0] != 0;
+ left_ec = l[0] != 0;
+ break;
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ above_ec = a[0] != 0;
+ left_ec = !!*(const uint16_t *)l;
+ break;
+ case TX_8X4:
+ above_ec = !!*(const uint16_t *)a;
+ left_ec = l[0] != 0;
+ break;
+ case TX_8X16:
+ above_ec = !!*(const uint16_t *)a;
+ left_ec = !!*(const uint32_t *)l;
+ break;
+ case TX_16X8:
+ above_ec = !!*(const uint32_t *)a;
+ left_ec = !!*(const uint16_t *)l;
+ break;
+ case TX_16X32:
+ above_ec = !!*(const uint32_t *)a;
+ left_ec = !!*(const uint64_t *)l;
+ break;
+ case TX_32X16:
+ above_ec = !!*(const uint64_t *)a;
+ left_ec = !!*(const uint32_t *)l;
+ break;
+#endif // CONFIG_EXT_TX
+ case TX_8X8:
+ above_ec = !!*(const uint16_t *)a;
+ left_ec = !!*(const uint16_t *)l;
+ break;
+ case TX_16X16:
+ above_ec = !!*(const uint32_t *)a;
+ left_ec = !!*(const uint32_t *)l;
+ break;
+ case TX_32X32:
+ above_ec = !!*(const uint64_t *)a;
+ left_ec = !!*(const uint64_t *)l;
+ break;
+ default: assert(0 && "Invalid transform size."); break;
+ }
+ return combine_entropy_contexts(above_ec, left_ec);
+}
+
+#if CONFIG_ANS
+struct frame_contexts;
+void vp10_coef_pareto_cdfs(struct frame_contexts *fc);
+#endif // CONFIG_ANS
+
+#if CONFIG_ENTROPY
+#define COEF_COUNT_SAT_BITS 5
+#define COEF_MAX_UPDATE_FACTOR_BITS 7
+#define COEF_COUNT_SAT_AFTER_KEY_BITS 5
+#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY_BITS 7
+#define MODE_MV_COUNT_SAT_BITS 5
+#define MODE_MV_MAX_UPDATE_FACTOR_BITS 7
+
+#else
+
+#define COEF_COUNT_SAT 24
+#define COEF_MAX_UPDATE_FACTOR 112
+#define COEF_COUNT_SAT_AFTER_KEY 24
+#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128
+
+#endif // CONFIG_ENTROPY
+
+static INLINE vpx_prob vp10_merge_probs(vpx_prob pre_prob,
+ const unsigned int ct[2],
+ unsigned int count_sat,
+ unsigned int max_update_factor) {
+#if CONFIG_ENTROPY
+ const vpx_prob prob = get_binary_prob(ct[0], ct[1]);
+ const unsigned int count =
+ VPXMIN(ct[0] + ct[1], (unsigned int)(1 << count_sat));
+ const unsigned int factor = count << (max_update_factor - count_sat);
+ return weighted_prob(pre_prob, prob, factor);
+#else
+ return merge_probs(pre_prob, ct, count_sat, max_update_factor);
+#endif // CONFIG_ENTROPY
+}
+
+static INLINE vpx_prob vp10_mode_mv_merge_probs(vpx_prob pre_prob,
+ const unsigned int ct[2]) {
+#if CONFIG_ENTROPY
+ return vp10_merge_probs(pre_prob, ct, MODE_MV_COUNT_SAT_BITS,
+ MODE_MV_MAX_UPDATE_FACTOR_BITS);
+#else
+ return mode_mv_merge_probs(pre_prob, ct);
+#endif // CONFIG_ENTROPY
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_ENTROPY_H_
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
new file mode 100644
index 0000000..98e26e7
--- /dev/null
+++ b/av1/common/entropymode.c
@@ -0,0 +1,1631 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "aom_mem/vpx_mem.h"
+
+#include "av1/common/reconinter.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/seg_common.h"
+
+const vpx_prob vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] =
+ { {
+ // above = dc
+ { 137, 30, 42, 148, 151, 207, 70, 52, 91 }, // left = dc
+ { 92, 45, 102, 136, 116, 180, 74, 90, 100 }, // left = v
+ { 73, 32, 19, 187, 222, 215, 46, 34, 100 }, // left = h
+ { 91, 30, 32, 116, 121, 186, 93, 86, 94 }, // left = d45
+ { 72, 35, 36, 149, 68, 206, 68, 63, 105 }, // left = d135
+ { 73, 31, 28, 138, 57, 124, 55, 122, 151 }, // left = d117
+ { 67, 23, 21, 140, 126, 197, 40, 37, 171 }, // left = d153
+ { 86, 27, 28, 128, 154, 212, 45, 43, 53 }, // left = d207
+ { 74, 32, 27, 107, 86, 160, 63, 134, 102 }, // left = d63
+ { 59, 67, 44, 140, 161, 202, 78, 67, 119 } // left = tm
+ },
+ {
+ // above = v
+ { 63, 36, 126, 146, 123, 158, 60, 90, 96 }, // left = dc
+ { 43, 46, 168, 134, 107, 128, 69, 142, 92 }, // left = v
+ { 44, 29, 68, 159, 201, 177, 50, 57, 77 }, // left = h
+ { 58, 38, 76, 114, 97, 172, 78, 133, 92 }, // left = d45
+ { 46, 41, 76, 140, 63, 184, 69, 112, 57 }, // left = d135
+ { 38, 32, 85, 140, 46, 112, 54, 151, 133 }, // left = d117
+ { 39, 27, 61, 131, 110, 175, 44, 75, 136 }, // left = d153
+ { 52, 30, 74, 113, 130, 175, 51, 64, 58 }, // left = d207
+ { 47, 35, 80, 100, 74, 143, 64, 163, 74 }, // left = d63
+ { 36, 61, 116, 114, 128, 162, 80, 125, 82 } // left = tm
+ },
+ {
+ // above = h
+ { 82, 26, 26, 171, 208, 204, 44, 32, 105 }, // left = dc
+ { 55, 44, 68, 166, 179, 192, 57, 57, 108 }, // left = v
+ { 42, 26, 11, 199, 241, 228, 23, 15, 85 }, // left = h
+ { 68, 42, 19, 131, 160, 199, 55, 52, 83 }, // left = d45
+ { 58, 50, 25, 139, 115, 232, 39, 52, 118 }, // left = d135
+ { 50, 35, 33, 153, 104, 162, 64, 59, 131 }, // left = d117
+ { 44, 24, 16, 150, 177, 202, 33, 19, 156 }, // left = d153
+ { 55, 27, 12, 153, 203, 218, 26, 27, 49 }, // left = d207
+ { 53, 49, 21, 110, 116, 168, 59, 80, 76 }, // left = d63
+ { 38, 72, 19, 168, 203, 212, 50, 50, 107 } // left = tm
+ },
+ {
+ // above = d45
+ { 103, 26, 36, 129, 132, 201, 83, 80, 93 }, // left = dc
+ { 59, 38, 83, 112, 103, 162, 98, 136, 90 }, // left = v
+ { 62, 30, 23, 158, 200, 207, 59, 57, 50 }, // left = h
+ { 67, 30, 29, 84, 86, 191, 102, 91, 59 }, // left = d45
+ { 60, 32, 33, 112, 71, 220, 64, 89, 104 }, // left = d135
+ { 53, 26, 34, 130, 56, 149, 84, 120, 103 }, // left = d117
+ { 53, 21, 23, 133, 109, 210, 56, 77, 172 }, // left = d153
+ { 77, 19, 29, 112, 142, 228, 55, 66, 36 }, // left = d207
+ { 61, 29, 29, 93, 97, 165, 83, 175, 162 }, // left = d63
+ { 47, 47, 43, 114, 137, 181, 100, 99, 95 } // left = tm
+ },
+ {
+ // above = d135
+ { 69, 23, 29, 128, 83, 199, 46, 44, 101 }, // left = dc
+ { 53, 40, 55, 139, 69, 183, 61, 80, 110 }, // left = v
+ { 40, 29, 19, 161, 180, 207, 43, 24, 91 }, // left = h
+ { 60, 34, 19, 105, 61, 198, 53, 64, 89 }, // left = d45
+ { 52, 31, 22, 158, 40, 209, 58, 62, 89 }, // left = d135
+ { 44, 31, 29, 147, 46, 158, 56, 102, 198 }, // left = d117
+ { 35, 19, 12, 135, 87, 209, 41, 45, 167 }, // left = d153
+ { 55, 25, 21, 118, 95, 215, 38, 39, 66 }, // left = d207
+ { 51, 38, 25, 113, 58, 164, 70, 93, 97 }, // left = d63
+ { 47, 54, 34, 146, 108, 203, 72, 103, 151 } // left = tm
+ },
+ {
+ // above = d117
+ { 64, 19, 37, 156, 66, 138, 49, 95, 133 }, // left = dc
+ { 46, 27, 80, 150, 55, 124, 55, 121, 135 }, // left = v
+ { 36, 23, 27, 165, 149, 166, 54, 64, 118 }, // left = h
+ { 53, 21, 36, 131, 63, 163, 60, 109, 81 }, // left = d45
+ { 40, 26, 35, 154, 40, 185, 51, 97, 123 }, // left = d135
+ { 35, 19, 34, 179, 19, 97, 48, 129, 124 }, // left = d117
+ { 36, 20, 26, 136, 62, 164, 33, 77, 154 }, // left = d153
+ { 45, 18, 32, 130, 90, 157, 40, 79, 91 }, // left = d207
+ { 45, 26, 28, 129, 45, 129, 49, 147, 123 }, // left = d63
+ { 38, 44, 51, 136, 74, 162, 57, 97, 121 } // left = tm
+ },
+ {
+ // above = d153
+ { 75, 17, 22, 136, 138, 185, 32, 34, 166 }, // left = dc
+ { 56, 39, 58, 133, 117, 173, 48, 53, 187 }, // left = v
+ { 35, 21, 12, 161, 212, 207, 20, 23, 145 }, // left = h
+ { 56, 29, 19, 117, 109, 181, 55, 68, 112 }, // left = d45
+ { 47, 29, 17, 153, 64, 220, 59, 51, 114 }, // left = d135
+ { 46, 16, 24, 136, 76, 147, 41, 64, 172 }, // left = d117
+ { 34, 17, 11, 108, 152, 187, 13, 15, 209 }, // left = d153
+ { 51, 24, 14, 115, 133, 209, 32, 26, 104 }, // left = d207
+ { 55, 30, 18, 122, 79, 179, 44, 88, 116 }, // left = d63
+ { 37, 49, 25, 129, 168, 164, 41, 54, 148 } // left = tm
+ },
+ {
+ // above = d207
+ { 82, 22, 32, 127, 143, 213, 39, 41, 70 }, // left = dc
+ { 62, 44, 61, 123, 105, 189, 48, 57, 64 }, // left = v
+ { 47, 25, 17, 175, 222, 220, 24, 30, 86 }, // left = h
+ { 68, 36, 17, 106, 102, 206, 59, 74, 74 }, // left = d45
+ { 57, 39, 23, 151, 68, 216, 55, 63, 58 }, // left = d135
+ { 49, 30, 35, 141, 70, 168, 82, 40, 115 }, // left = d117
+ { 51, 25, 15, 136, 129, 202, 38, 35, 139 }, // left = d153
+ { 68, 26, 16, 111, 141, 215, 29, 28, 28 }, // left = d207
+ { 59, 39, 19, 114, 75, 180, 77, 104, 42 }, // left = d63
+ { 40, 61, 26, 126, 152, 206, 61, 59, 93 } // left = tm
+ },
+ {
+ // above = d63
+ { 78, 23, 39, 111, 117, 170, 74, 124, 94 }, // left = dc
+ { 48, 34, 86, 101, 92, 146, 78, 179, 134 }, // left = v
+ { 47, 22, 24, 138, 187, 178, 68, 69, 59 }, // left = h
+ { 56, 25, 33, 105, 112, 187, 95, 177, 129 }, // left = d45
+ { 48, 31, 27, 114, 63, 183, 82, 116, 56 }, // left = d135
+ { 43, 28, 37, 121, 63, 123, 61, 192, 169 }, // left = d117
+ { 42, 17, 24, 109, 97, 177, 56, 76, 122 }, // left = d153
+ { 58, 18, 28, 105, 139, 182, 70, 92, 63 }, // left = d207
+ { 46, 23, 32, 74, 86, 150, 67, 183, 88 }, // left = d63
+ { 36, 38, 48, 92, 122, 165, 88, 137, 91 } // left = tm
+ },
+ {
+ // above = tm
+ { 65, 70, 60, 155, 159, 199, 61, 60, 81 }, // left = dc
+ { 44, 78, 115, 132, 119, 173, 71, 112, 93 }, // left = v
+ { 39, 38, 21, 184, 227, 206, 42, 32, 64 }, // left = h
+ { 58, 47, 36, 124, 137, 193, 80, 82, 78 }, // left = d45
+ { 49, 50, 35, 144, 95, 205, 63, 78, 59 }, // left = d135
+ { 41, 53, 52, 148, 71, 142, 65, 128, 51 }, // left = d117
+ { 40, 36, 28, 143, 143, 202, 40, 55, 137 }, // left = d153
+ { 52, 34, 29, 129, 183, 227, 42, 35, 43 }, // left = d207
+ { 42, 44, 44, 104, 105, 164, 64, 130, 80 }, // left = d63
+ { 43, 81, 53, 140, 169, 204, 68, 84, 72 } // left = tm
+ } };
+
+static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = {
+ { 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8
+ { 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16
+ { 173, 80, 19, 176, 240, 193, 64, 35, 46 }, // block_size < 32x32
+ { 221, 135, 38, 194, 248, 121, 96, 85, 29 } // block_size >= 32x32
+};
+
+static const vpx_prob default_uv_probs[INTRA_MODES][INTRA_MODES - 1] = {
+ { 120, 7, 76, 176, 208, 126, 28, 54, 103 }, // y = dc
+ { 48, 12, 154, 155, 139, 90, 34, 117, 119 }, // y = v
+ { 67, 6, 25, 204, 243, 158, 13, 21, 96 }, // y = h
+ { 97, 5, 44, 131, 176, 139, 48, 68, 97 }, // y = d45
+ { 83, 5, 42, 156, 111, 152, 26, 49, 152 }, // y = d135
+ { 80, 5, 58, 178, 74, 83, 33, 62, 145 }, // y = d117
+ { 86, 5, 32, 154, 192, 168, 14, 22, 163 }, // y = d153
+ { 85, 5, 32, 156, 216, 148, 19, 29, 73 }, // y = d207
+ { 77, 7, 64, 116, 132, 122, 37, 126, 120 }, // y = d63
+ { 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm
+};
+
+#if CONFIG_EXT_PARTITION_TYPES
+static const vpx_prob
+ default_partition_probs[PARTITION_CONTEXTS][EXT_PARTITION_TYPES - 1] = {
+ // 8x8 -> 4x4
+ { 199, 122, 141, 128, 128, 128, 128 }, // a/l both not split
+ { 147, 63, 159, 128, 128, 128, 128 }, // a split, l not split
+ { 148, 133, 118, 128, 128, 128, 128 }, // l split, a not split
+ { 121, 104, 114, 128, 128, 128, 128 }, // a/l both split
+ // 16x16 -> 8x8
+ { 174, 73, 87, 128, 128, 128, 128 }, // a/l both not split
+ { 92, 41, 83, 128, 128, 128, 128 }, // a split, l not split
+ { 82, 99, 50, 128, 128, 128, 128 }, // l split, a not split
+ { 53, 39, 39, 128, 128, 128, 128 }, // a/l both split
+ // 32x32 -> 16x16
+ { 177, 58, 59, 128, 128, 128, 128 }, // a/l both not split
+ { 68, 26, 63, 128, 128, 128, 128 }, // a split, l not split
+ { 52, 79, 25, 128, 128, 128, 128 }, // l split, a not split
+ { 17, 14, 12, 128, 128, 128, 128 }, // a/l both split
+ // 64x64 -> 32x32
+ { 222, 34, 30, 128, 128, 128, 128 }, // a/l both not split
+ { 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split
+ { 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split
+ { 10, 7, 6, 128, 128, 128, 128 }, // a/l both split
+#if CONFIG_EXT_PARTITION
+ // 128x128 -> 64x64
+ { 222, 34, 30, 128, 128, 128, 128 }, // a/l both not split
+ { 72, 16, 44, 128, 128, 128, 128 }, // a split, l not split
+ { 58, 32, 12, 128, 128, 128, 128 }, // l split, a not split
+ { 10, 7, 6, 128, 128, 128, 128 }, // a/l both split
+#endif // CONFIG_EXT_PARTITION
+ };
+#else
+static const vpx_prob
+ default_partition_probs[PARTITION_CONTEXTS][PARTITION_TYPES - 1] = {
+ // 8x8 -> 4x4
+ { 199, 122, 141 }, // a/l both not split
+ { 147, 63, 159 }, // a split, l not split
+ { 148, 133, 118 }, // l split, a not split
+ { 121, 104, 114 }, // a/l both split
+ // 16x16 -> 8x8
+ { 174, 73, 87 }, // a/l both not split
+ { 92, 41, 83 }, // a split, l not split
+ { 82, 99, 50 }, // l split, a not split
+ { 53, 39, 39 }, // a/l both split
+ // 32x32 -> 16x16
+ { 177, 58, 59 }, // a/l both not split
+ { 68, 26, 63 }, // a split, l not split
+ { 52, 79, 25 }, // l split, a not split
+ { 17, 14, 12 }, // a/l both split
+ // 64x64 -> 32x32
+ { 222, 34, 30 }, // a/l both not split
+ { 72, 16, 44 }, // a split, l not split
+ { 58, 32, 12 }, // l split, a not split
+ { 10, 7, 6 }, // a/l both split
+#if CONFIG_EXT_PARTITION
+ // 128x128 -> 64x64
+ { 222, 34, 30 }, // a/l both not split
+ { 72, 16, 44 }, // a split, l not split
+ { 58, 32, 12 }, // l split, a not split
+ { 10, 7, 6 }, // a/l both split
+#endif // CONFIG_EXT_PARTITION
+ };
+#endif // CONFIG_EXT_PARTITION_TYPES
+
+#if CONFIG_REF_MV
+static const vpx_prob default_newmv_prob[NEWMV_MODE_CONTEXTS] = {
+ 200, 180, 150, 150, 110, 70, 60,
+};
+
+static const vpx_prob default_zeromv_prob[ZEROMV_MODE_CONTEXTS] = {
+ 192, 64,
+};
+
+static const vpx_prob default_refmv_prob[REFMV_MODE_CONTEXTS] = {
+ 220, 220, 200, 200, 180, 128, 30, 220, 30,
+};
+
+static const vpx_prob default_drl_prob[DRL_MODE_CONTEXTS] = { 128, 160, 180,
+ 128, 160 };
+
+#if CONFIG_EXT_INTER
+static const vpx_prob default_new2mv_prob = 180;
+#endif // CONFIG_EXT_INTER
+#endif // CONFIG_REF_MV
+
+static const vpx_prob
+ default_inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1] = {
+#if CONFIG_EXT_INTER
+ // TODO(zoeliu): To adjust the initial default probs
+ { 2, 173, 34, 173 }, // 0 = both zero mv
+ { 7, 145, 85, 145 }, // 1 = one zero mv + one a predicted mv
+ { 7, 166, 63, 166 }, // 2 = two predicted mvs
+ { 7, 94, 66, 128 }, // 3 = one predicted/zero and one new mv
+ { 8, 64, 46, 128 }, // 4 = two new mvs
+ { 17, 81, 31, 128 }, // 5 = one intra neighbour + x
+ { 25, 29, 30, 96 }, // 6 = two intra neighbours
+#else
+ { 2, 173, 34 }, // 0 = both zero mv
+ { 7, 145, 85 }, // 1 = one zero mv + one a predicted mv
+ { 7, 166, 63 }, // 2 = two predicted mvs
+ { 7, 94, 66 }, // 3 = one predicted/zero and one new mv
+ { 8, 64, 46 }, // 4 = two new mvs
+ { 17, 81, 31 }, // 5 = one intra neighbour + x
+ { 25, 29, 30 }, // 6 = two intra neighbours
+#endif // CONFIG_EXT_INTER
+ };
+
+#if CONFIG_EXT_INTER
+static const vpx_prob default_inter_compound_mode_probs
+ [INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES - 1] = {
+ { 2, 173, 68, 192, 64, 192, 128, 180, 180 }, // 0 = both zero mv
+ { 7, 145, 160, 192, 64, 192, 128, 180, 180 }, // 1 = 1 zero + 1 predicted
+ { 7, 166, 126, 192, 64, 192, 128, 180, 180 }, // 2 = two predicted mvs
+ { 7, 94, 132, 192, 64, 192, 128, 180, 180 }, // 3 = 1 pred/zero, 1 new
+ { 8, 64, 64, 192, 64, 192, 128, 180, 180 }, // 4 = two new mvs
+ { 17, 81, 52, 192, 64, 192, 128, 180, 180 }, // 5 = one intra neighbour
+ { 25, 29, 50, 192, 64, 192, 128, 180, 180 }, // 6 = two intra neighbours
+ };
+
+static const vpx_prob default_interintra_prob[BLOCK_SIZE_GROUPS] = {
+ 208, 208, 208, 208,
+};
+
+static const vpx_prob
+ default_interintra_mode_prob[BLOCK_SIZE_GROUPS][INTERINTRA_MODES - 1] = {
+ { 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8
+ { 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16
+ { 173, 80, 19, 176, 240, 193, 64, 35, 46 }, // block_size < 32x32
+ { 221, 135, 38, 194, 248, 121, 96, 85, 29 } // block_size >= 32x32
+ };
+
+static const vpx_prob default_wedge_interintra_prob[BLOCK_SIZES] = {
+ 208, 208, 208, 208, 208, 208, 216, 216, 216, 224, 224, 224, 240,
+#if CONFIG_EXT_PARTITION
+ 208, 208, 208
+#endif // CONFIG_EXT_PARTITION
+};
+
+static const vpx_prob default_wedge_interinter_prob[BLOCK_SIZES] = {
+ 208, 208, 208, 208, 208, 208, 216, 216, 216, 224, 224, 224, 240,
+#if CONFIG_EXT_PARTITION
+ 255, 255, 255
+#endif // CONFIG_EXT_PARTITION
+};
+#endif // CONFIG_EXT_INTER
+
+// Change this section appropriately once warped motion is supported
+#if CONFIG_OBMC && !CONFIG_WARPED_MOTION
+const vpx_tree_index vp10_motvar_tree[TREE_SIZE(MOTION_VARIATIONS)] = {
+ -SIMPLE_TRANSLATION, -OBMC_CAUSAL
+};
+static const vpx_prob default_motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1] =
+ {
+ { 255 }, { 255 }, { 255 }, { 151 }, { 153 }, { 144 }, { 178 },
+ { 165 }, { 160 }, { 207 }, { 195 }, { 168 }, { 244 },
+#if CONFIG_EXT_PARTITION
+ { 252 }, { 252 }, { 252 },
+#endif // CONFIG_EXT_PARTITION
+ };
+
+#elif !CONFIG_OBMC && CONFIG_WARPED_MOTION
+
+const vpx_tree_index vp10_motvar_tree[TREE_SIZE(MOTION_VARIATIONS)] = {
+ -SIMPLE_TRANSLATION, -WARPED_CAUSAL
+};
+static const vpx_prob default_motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1] =
+ {
+ { 255 }, { 255 }, { 255 }, { 151 }, { 153 }, { 144 }, { 178 },
+ { 165 }, { 160 }, { 207 }, { 195 }, { 168 }, { 244 },
+#if CONFIG_EXT_PARTITION
+ { 252 }, { 252 }, { 252 },
+#endif // CONFIG_EXT_PARTITION
+ };
+
+#elif CONFIG_OBMC && CONFIG_WARPED_MOTION
+
+const vpx_tree_index vp10_motvar_tree[TREE_SIZE(MOTION_VARIATIONS)] = {
+ -SIMPLE_TRANSLATION, 2, -OBMC_CAUSAL, -WARPED_CAUSAL,
+};
+static const vpx_prob default_motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1] =
+ {
+ { 255, 200 }, { 255, 200 }, { 255, 200 }, { 151, 200 }, { 153, 200 },
+ { 144, 200 }, { 178, 200 }, { 165, 200 }, { 160, 200 }, { 207, 200 },
+ { 195, 200 }, { 168, 200 }, { 244, 200 },
+#if CONFIG_EXT_PARTITION
+ { 252, 200 }, { 252, 200 }, { 252, 200 },
+#endif // CONFIG_EXT_PARTITION
+ };
+#endif // CONFIG_OBMC || !CONFIG_WARPED_MOTION
+
+/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
+const vpx_tree_index vp10_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = {
+ -DC_PRED, 2, /* 0 = DC_NODE */
+ -TM_PRED, 4, /* 1 = TM_NODE */
+ -V_PRED, 6, /* 2 = V_NODE */
+ 8, 12, /* 3 = COM_NODE */
+ -H_PRED, 10, /* 4 = H_NODE */
+ -D135_PRED, -D117_PRED, /* 5 = D135_NODE */
+ -D45_PRED, 14, /* 6 = D45_NODE */
+ -D63_PRED, 16, /* 7 = D63_NODE */
+ -D153_PRED, -D207_PRED /* 8 = D153_NODE */
+};
+
+const vpx_tree_index vp10_inter_mode_tree[TREE_SIZE(INTER_MODES)] = {
+ -INTER_OFFSET(ZEROMV), 2,
+ -INTER_OFFSET(NEARESTMV), 4,
+#if CONFIG_EXT_INTER
+ -INTER_OFFSET(NEARMV), 6,
+ -INTER_OFFSET(NEWMV), -INTER_OFFSET(NEWFROMNEARMV)
+#else
+ -INTER_OFFSET(NEARMV), -INTER_OFFSET(NEWMV)
+#endif // CONFIG_EXT_INTER
+};
+
+#if CONFIG_EXT_INTER
+/* clang-format off */
+const vpx_tree_index vp10_interintra_mode_tree[TREE_SIZE(INTERINTRA_MODES)] = {
+ -II_DC_PRED, 2, /* 0 = II_DC_NODE */
+ -II_TM_PRED, 4, /* 1 = II_TM_NODE */
+ -II_V_PRED, 6, /* 2 = II_V_NODE */
+ 8, 12, /* 3 = II_COM_NODE */
+ -II_H_PRED, 10, /* 4 = II_H_NODE */
+ -II_D135_PRED, -II_D117_PRED, /* 5 = II_D135_NODE */
+ -II_D45_PRED, 14, /* 6 = II_D45_NODE */
+ -II_D63_PRED, 16, /* 7 = II_D63_NODE */
+ -II_D153_PRED, -II_D207_PRED /* 8 = II_D153_NODE */
+};
+
+const vpx_tree_index vp10_inter_compound_mode_tree
+ [TREE_SIZE(INTER_COMPOUND_MODES)] = {
+ -INTER_COMPOUND_OFFSET(ZERO_ZEROMV), 2,
+ -INTER_COMPOUND_OFFSET(NEAREST_NEARESTMV), 4,
+ 6, -INTER_COMPOUND_OFFSET(NEW_NEWMV),
+ 8, 12,
+ -INTER_COMPOUND_OFFSET(NEAR_NEARMV), 10,
+ -INTER_COMPOUND_OFFSET(NEAREST_NEARMV),
+ -INTER_COMPOUND_OFFSET(NEAR_NEARESTMV),
+ 14, 16,
+ -INTER_COMPOUND_OFFSET(NEAREST_NEWMV), -INTER_COMPOUND_OFFSET(NEW_NEARESTMV),
+ -INTER_COMPOUND_OFFSET(NEAR_NEWMV), -INTER_COMPOUND_OFFSET(NEW_NEARMV)
+};
+/* clang-format on */
+#endif // CONFIG_EXT_INTER
+
+const vpx_tree_index vp10_partition_tree[TREE_SIZE(PARTITION_TYPES)] = {
+ -PARTITION_NONE, 2, -PARTITION_HORZ, 4, -PARTITION_VERT, -PARTITION_SPLIT
+};
+
+#if CONFIG_EXT_PARTITION_TYPES
+/* clang-format off */
+const vpx_tree_index vp10_ext_partition_tree[TREE_SIZE(EXT_PARTITION_TYPES)] = {
+ -PARTITION_NONE, 2,
+ 6, 4,
+ 8, -PARTITION_SPLIT,
+ -PARTITION_HORZ, 10,
+ -PARTITION_VERT, 12,
+ -PARTITION_HORZ_A, -PARTITION_HORZ_B,
+ -PARTITION_VERT_A, -PARTITION_VERT_B
+};
+/* clang-format on */
+#endif // CONFIG_EXT_PARTITION_TYPES
+
+static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = {
+ 9, 102, 187, 225
+};
+
+static const vpx_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = {
+ 239, 183, 119, 96, 41
+};
+
+#if CONFIG_EXT_REFS
+static const vpx_prob default_comp_ref_p[REF_CONTEXTS][FWD_REFS - 1] = {
+ // TODO(zoeliu): To adjust the initial prob values.
+ { 33, 16, 16 },
+ { 77, 74, 74 },
+ { 142, 142, 142 },
+ { 172, 170, 170 },
+ { 238, 247, 247 }
+};
+static const vpx_prob default_comp_bwdref_p[REF_CONTEXTS][BWD_REFS - 1] = {
+ { 16 }, { 74 }, { 142 }, { 170 }, { 247 }
+};
+#else
+static const vpx_prob default_comp_ref_p[REF_CONTEXTS][COMP_REFS - 1] = {
+ { 50 }, { 126 }, { 123 }, { 221 }, { 226 }
+};
+#endif // CONFIG_EXT_REFS
+
+static const vpx_prob default_single_ref_p[REF_CONTEXTS][SINGLE_REFS - 1] = {
+#if CONFIG_EXT_REFS
+ { 33, 16, 16, 16, 16 },
+ { 77, 74, 74, 74, 74 },
+ { 142, 142, 142, 142, 142 },
+ { 172, 170, 170, 170, 170 },
+ { 238, 247, 247, 247, 247 }
+#else
+ { 33, 16 }, { 77, 74 }, { 142, 142 }, { 172, 170 }, { 238, 247 }
+#endif // CONFIG_EXT_REFS
+};
+
+const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)] = {
+ -TWO_COLORS, 2, -THREE_COLORS, 4, -FOUR_COLORS, 6,
+ -FIVE_COLORS, 8, -SIX_COLORS, 10, -SEVEN_COLORS, -EIGHT_COLORS,
+};
+
+// TODO(huisu): tune these probs
+const vpx_prob
+ vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] = {
+ { 96, 89, 100, 64, 77, 130 }, { 22, 15, 44, 16, 34, 82 },
+ { 30, 19, 57, 18, 38, 86 }, { 94, 36, 104, 23, 43, 92 },
+ { 116, 76, 107, 46, 65, 105 }, { 112, 82, 94, 40, 70, 112 },
+ { 147, 124, 123, 58, 69, 103 }, { 180, 113, 136, 49, 45, 114 },
+ { 107, 70, 87, 49, 154, 156 }, { 98, 105, 142, 63, 64, 152 },
+#if CONFIG_EXT_PARTITION
+ { 98, 105, 142, 63, 64, 152 }, { 98, 105, 142, 63, 64, 152 },
+ { 98, 105, 142, 63, 64, 152 },
+#endif // CONFIG_EXT_PARTITION
+ };
+
+const vpx_prob
+ vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1] =
+ {
+ { 160, 196, 228, 213, 175, 230 }, { 87, 148, 208, 141, 166, 163 },
+ { 72, 151, 204, 139, 155, 161 }, { 78, 135, 171, 104, 120, 173 },
+ { 59, 92, 131, 78, 92, 142 }, { 75, 118, 149, 84, 90, 128 },
+ { 89, 87, 92, 66, 66, 128 }, { 67, 53, 54, 55, 66, 93 },
+ { 120, 130, 83, 171, 75, 214 }, { 72, 55, 66, 68, 79, 107 },
+#if CONFIG_EXT_PARTITION
+ { 72, 55, 66, 68, 79, 107 }, { 72, 55, 66, 68, 79, 107 },
+ { 72, 55, 66, 68, 79, 107 },
+#endif // CONFIG_EXT_PARTITION
+ };
+
+const vpx_prob vp10_default_palette_y_mode_prob
+ [PALETTE_BLOCK_SIZES][PALETTE_Y_MODE_CONTEXTS] = {
+ { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 },
+ { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 },
+ { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 },
+ { 240, 180, 100 },
+#if CONFIG_EXT_PARTITION
+ { 240, 180, 100 }, { 240, 180, 100 }, { 240, 180, 100 },
+#endif // CONFIG_EXT_PARTITION
+ };
+
+const vpx_prob vp10_default_palette_uv_mode_prob[2] = { 253, 229 };
+
+const vpx_tree_index
+ vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)] = {
+ { // 2 colors
+ -PALETTE_COLOR_ONE, -PALETTE_COLOR_TWO },
+ { // 3 colors
+ -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, -PALETTE_COLOR_THREE },
+ { // 4 colors
+ -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE,
+ -PALETTE_COLOR_FOUR },
+ { // 5 colors
+ -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE, 6,
+ -PALETTE_COLOR_FOUR, -PALETTE_COLOR_FIVE },
+ { // 6 colors
+ -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE, 6,
+ -PALETTE_COLOR_FOUR, 8, -PALETTE_COLOR_FIVE, -PALETTE_COLOR_SIX },
+ { // 7 colors
+ -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE, 6,
+ -PALETTE_COLOR_FOUR, 8, -PALETTE_COLOR_FIVE, 10, -PALETTE_COLOR_SIX,
+ -PALETTE_COLOR_SEVEN },
+ { // 8 colors
+ -PALETTE_COLOR_ONE, 2, -PALETTE_COLOR_TWO, 4, -PALETTE_COLOR_THREE, 6,
+ -PALETTE_COLOR_FOUR, 8, -PALETTE_COLOR_FIVE, 10, -PALETTE_COLOR_SIX, 12,
+ -PALETTE_COLOR_SEVEN, -PALETTE_COLOR_EIGHT },
+ };
+
+const vpx_prob vp10_default_palette_y_color_prob
+ [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] = {
+ {
+ // 2 colors
+ { 230, 255, 128, 128, 128, 128, 128 },
+ { 214, 255, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 240, 255, 128, 128, 128, 128, 128 },
+ { 73, 255, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 130, 255, 128, 128, 128, 128, 128 },
+ { 227, 255, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 188, 255, 128, 128, 128, 128, 128 },
+ { 75, 255, 128, 128, 128, 128, 128 },
+ { 250, 255, 128, 128, 128, 128, 128 },
+ { 223, 255, 128, 128, 128, 128, 128 },
+ { 252, 255, 128, 128, 128, 128, 128 },
+ },
+ {
+ // 3 colors
+ { 229, 137, 255, 128, 128, 128, 128 },
+ { 197, 120, 255, 128, 128, 128, 128 },
+ { 107, 195, 255, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 27, 151, 255, 128, 128, 128, 128 },
+ { 230, 130, 255, 128, 128, 128, 128 },
+ { 37, 230, 255, 128, 128, 128, 128 },
+ { 67, 221, 255, 128, 128, 128, 128 },
+ { 124, 230, 255, 128, 128, 128, 128 },
+ { 195, 109, 255, 128, 128, 128, 128 },
+ { 99, 122, 255, 128, 128, 128, 128 },
+ { 205, 208, 255, 128, 128, 128, 128 },
+ { 40, 235, 255, 128, 128, 128, 128 },
+ { 251, 132, 255, 128, 128, 128, 128 },
+ { 237, 186, 255, 128, 128, 128, 128 },
+ { 253, 112, 255, 128, 128, 128, 128 },
+ },
+ {
+ // 4 colors
+ { 195, 87, 128, 255, 128, 128, 128 },
+ { 143, 100, 123, 255, 128, 128, 128 },
+ { 94, 124, 119, 255, 128, 128, 128 },
+ { 77, 91, 130, 255, 128, 128, 128 },
+ { 39, 114, 178, 255, 128, 128, 128 },
+ { 222, 94, 125, 255, 128, 128, 128 },
+ { 44, 203, 132, 255, 128, 128, 128 },
+ { 68, 175, 122, 255, 128, 128, 128 },
+ { 110, 187, 124, 255, 128, 128, 128 },
+ { 152, 91, 128, 255, 128, 128, 128 },
+ { 70, 109, 181, 255, 128, 128, 128 },
+ { 133, 113, 164, 255, 128, 128, 128 },
+ { 47, 205, 133, 255, 128, 128, 128 },
+ { 247, 94, 136, 255, 128, 128, 128 },
+ { 205, 122, 146, 255, 128, 128, 128 },
+ { 251, 100, 141, 255, 128, 128, 128 },
+ },
+ {
+ // 5 colors
+ { 195, 65, 84, 125, 255, 128, 128 },
+ { 150, 76, 84, 121, 255, 128, 128 },
+ { 94, 110, 81, 117, 255, 128, 128 },
+ { 79, 85, 91, 139, 255, 128, 128 },
+ { 26, 102, 139, 127, 255, 128, 128 },
+ { 220, 73, 91, 119, 255, 128, 128 },
+ { 38, 203, 86, 127, 255, 128, 128 },
+ { 61, 186, 72, 124, 255, 128, 128 },
+ { 132, 199, 84, 128, 255, 128, 128 },
+ { 172, 52, 62, 120, 255, 128, 128 },
+ { 102, 89, 121, 122, 255, 128, 128 },
+ { 182, 48, 69, 186, 255, 128, 128 },
+ { 36, 206, 87, 126, 255, 128, 128 },
+ { 249, 55, 67, 122, 255, 128, 128 },
+ { 218, 88, 75, 122, 255, 128, 128 },
+ { 253, 64, 80, 119, 255, 128, 128 },
+ },
+ {
+ // 6 colors
+ { 182, 54, 64, 75, 118, 255, 128 },
+ { 126, 67, 70, 76, 116, 255, 128 },
+ { 79, 92, 67, 85, 120, 255, 128 },
+ { 63, 61, 81, 118, 132, 255, 128 },
+ { 21, 80, 105, 83, 119, 255, 128 },
+ { 215, 72, 74, 74, 111, 255, 128 },
+ { 50, 176, 63, 79, 120, 255, 128 },
+ { 72, 148, 66, 77, 120, 255, 128 },
+ { 105, 177, 57, 78, 130, 255, 128 },
+ { 150, 66, 66, 80, 127, 255, 128 },
+ { 81, 76, 109, 85, 116, 255, 128 },
+ { 113, 81, 62, 96, 148, 255, 128 },
+ { 54, 179, 69, 82, 121, 255, 128 },
+ { 244, 47, 48, 67, 118, 255, 128 },
+ { 198, 83, 53, 65, 121, 255, 128 },
+ { 250, 42, 51, 69, 110, 255, 128 },
+ },
+ {
+ // 7 colors
+ { 182, 45, 54, 62, 74, 113, 255 },
+ { 124, 63, 57, 62, 77, 114, 255 },
+ { 77, 80, 56, 66, 76, 117, 255 },
+ { 63, 57, 69, 98, 85, 131, 255 },
+ { 19, 81, 98, 63, 80, 116, 255 },
+ { 215, 56, 60, 63, 68, 105, 255 },
+ { 50, 174, 50, 60, 79, 118, 255 },
+ { 68, 151, 50, 58, 73, 117, 255 },
+ { 104, 182, 53, 57, 79, 127, 255 },
+ { 156, 50, 51, 63, 77, 111, 255 },
+ { 88, 67, 97, 59, 82, 120, 255 },
+ { 114, 81, 46, 65, 103, 132, 255 },
+ { 55, 166, 57, 66, 82, 120, 255 },
+ { 245, 34, 38, 43, 63, 114, 255 },
+ { 203, 68, 45, 47, 60, 118, 255 },
+ { 250, 35, 37, 47, 66, 110, 255 },
+ },
+ {
+ // 8 colors
+ { 180, 43, 46, 50, 56, 69, 109 },
+ { 116, 53, 51, 49, 57, 73, 115 },
+ { 79, 70, 49, 50, 59, 74, 117 },
+ { 60, 54, 57, 70, 62, 83, 129 },
+ { 20, 73, 85, 52, 66, 81, 119 },
+ { 213, 56, 52, 49, 53, 62, 104 },
+ { 48, 161, 41, 45, 56, 77, 116 },
+ { 68, 139, 40, 47, 54, 71, 116 },
+ { 123, 166, 42, 43, 52, 76, 130 },
+ { 153, 44, 44, 47, 54, 79, 129 },
+ { 87, 64, 83, 49, 60, 75, 127 },
+ { 131, 68, 43, 48, 73, 96, 130 },
+ { 55, 152, 45, 51, 64, 77, 113 },
+ { 243, 30, 28, 33, 41, 65, 114 },
+ { 202, 56, 35, 36, 42, 63, 123 },
+ { 249, 31, 29, 32, 45, 68, 111 },
+ }
+ };
+
+const vpx_prob vp10_default_palette_uv_color_prob
+ [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] = {
+ {
+ // 2 colors
+ { 228, 255, 128, 128, 128, 128, 128 },
+ { 195, 255, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 228, 255, 128, 128, 128, 128, 128 },
+ { 71, 255, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 129, 255, 128, 128, 128, 128, 128 },
+ { 206, 255, 128, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 136, 255, 128, 128, 128, 128, 128 },
+ { 98, 255, 128, 128, 128, 128, 128 },
+ { 236, 255, 128, 128, 128, 128, 128 },
+ { 222, 255, 128, 128, 128, 128, 128 },
+ { 249, 255, 128, 128, 128, 128, 128 },
+ },
+ {
+ // 3 colors
+ { 198, 136, 255, 128, 128, 128, 128 },
+ { 178, 105, 255, 128, 128, 128, 128 },
+ { 100, 206, 255, 128, 128, 128, 128 },
+ { 128, 128, 128, 128, 128, 128, 128 },
+ { 12, 136, 255, 128, 128, 128, 128 },
+ { 219, 134, 255, 128, 128, 128, 128 },
+ { 50, 198, 255, 128, 128, 128, 128 },
+ { 61, 231, 255, 128, 128, 128, 128 },
+ { 110, 209, 255, 128, 128, 128, 128 },
+ { 173, 106, 255, 128, 128, 128, 128 },
+ { 145, 166, 255, 128, 128, 128, 128 },
+ { 156, 175, 255, 128, 128, 128, 128 },
+ { 69, 183, 255, 128, 128, 128, 128 },
+ { 241, 163, 255, 128, 128, 128, 128 },
+ { 224, 160, 255, 128, 128, 128, 128 },
+ { 246, 154, 255, 128, 128, 128, 128 },
+ },
+ {
+ // 4 colors
+ { 173, 88, 143, 255, 128, 128, 128 },
+ { 146, 81, 127, 255, 128, 128, 128 },
+ { 84, 134, 102, 255, 128, 128, 128 },
+ { 69, 138, 140, 255, 128, 128, 128 },
+ { 31, 103, 200, 255, 128, 128, 128 },
+ { 217, 101, 139, 255, 128, 128, 128 },
+ { 51, 174, 121, 255, 128, 128, 128 },
+ { 64, 177, 109, 255, 128, 128, 128 },
+ { 96, 179, 145, 255, 128, 128, 128 },
+ { 164, 77, 114, 255, 128, 128, 128 },
+ { 87, 94, 156, 255, 128, 128, 128 },
+ { 105, 57, 173, 255, 128, 128, 128 },
+ { 63, 158, 137, 255, 128, 128, 128 },
+ { 236, 102, 156, 255, 128, 128, 128 },
+ { 197, 115, 153, 255, 128, 128, 128 },
+ { 245, 106, 154, 255, 128, 128, 128 },
+ },
+ {
+ // 5 colors
+ { 179, 64, 97, 129, 255, 128, 128 },
+ { 137, 56, 88, 125, 255, 128, 128 },
+ { 82, 107, 61, 118, 255, 128, 128 },
+ { 59, 113, 86, 115, 255, 128, 128 },
+ { 23, 88, 118, 130, 255, 128, 128 },
+ { 213, 66, 90, 125, 255, 128, 128 },
+ { 37, 181, 103, 121, 255, 128, 128 },
+ { 47, 188, 61, 131, 255, 128, 128 },
+ { 104, 185, 103, 144, 255, 128, 128 },
+ { 163, 39, 76, 112, 255, 128, 128 },
+ { 94, 74, 131, 126, 255, 128, 128 },
+ { 142, 42, 103, 163, 255, 128, 128 },
+ { 53, 162, 99, 149, 255, 128, 128 },
+ { 239, 54, 84, 108, 255, 128, 128 },
+ { 203, 84, 110, 147, 255, 128, 128 },
+ { 248, 70, 105, 151, 255, 128, 128 },
+ },
+ {
+ // 6 colors
+ { 189, 50, 67, 90, 130, 255, 128 },
+ { 114, 50, 55, 90, 123, 255, 128 },
+ { 66, 76, 54, 82, 128, 255, 128 },
+ { 43, 69, 69, 80, 129, 255, 128 },
+ { 22, 59, 87, 88, 141, 255, 128 },
+ { 203, 49, 68, 87, 122, 255, 128 },
+ { 43, 157, 74, 104, 146, 255, 128 },
+ { 54, 138, 51, 95, 138, 255, 128 },
+ { 82, 171, 58, 102, 146, 255, 128 },
+ { 129, 38, 59, 64, 168, 255, 128 },
+ { 56, 67, 119, 92, 112, 255, 128 },
+ { 96, 62, 53, 132, 82, 255, 128 },
+ { 60, 147, 77, 108, 145, 255, 128 },
+ { 238, 76, 73, 93, 148, 255, 128 },
+ { 189, 86, 73, 103, 157, 255, 128 },
+ { 246, 62, 75, 83, 167, 255, 128 },
+ },
+ {
+ // 7 colors
+ { 179, 42, 51, 73, 99, 134, 255 },
+ { 119, 52, 52, 61, 64, 114, 255 },
+ { 53, 77, 35, 65, 71, 131, 255 },
+ { 38, 70, 51, 68, 89, 144, 255 },
+ { 23, 65, 128, 73, 97, 131, 255 },
+ { 210, 47, 52, 63, 81, 143, 255 },
+ { 42, 159, 57, 68, 98, 143, 255 },
+ { 49, 153, 45, 82, 93, 143, 255 },
+ { 81, 169, 52, 72, 113, 151, 255 },
+ { 136, 46, 35, 56, 75, 96, 255 },
+ { 57, 84, 109, 47, 107, 131, 255 },
+ { 128, 78, 57, 36, 128, 85, 255 },
+ { 54, 149, 68, 77, 94, 153, 255 },
+ { 243, 58, 50, 71, 81, 167, 255 },
+ { 189, 92, 64, 70, 121, 173, 255 },
+ { 248, 35, 38, 51, 82, 201, 255 },
+ },
+ {
+ // 8 colors
+ { 201, 40, 36, 42, 64, 92, 123 },
+ { 116, 43, 33, 43, 73, 102, 128 },
+ { 46, 77, 37, 69, 62, 78, 150 },
+ { 40, 65, 52, 50, 76, 89, 133 },
+ { 28, 48, 91, 17, 64, 77, 133 },
+ { 218, 43, 43, 37, 56, 72, 163 },
+ { 41, 155, 44, 83, 82, 129, 180 },
+ { 44, 141, 29, 55, 64, 89, 147 },
+ { 92, 166, 48, 45, 59, 126, 179 },
+ { 169, 35, 49, 41, 36, 99, 139 },
+ { 55, 77, 77, 56, 60, 75, 156 },
+ { 155, 81, 51, 64, 57, 182, 255 },
+ { 60, 134, 49, 49, 93, 128, 174 },
+ { 244, 98, 51, 46, 22, 73, 238 },
+ { 189, 70, 40, 87, 93, 79, 201 },
+ { 248, 54, 49, 40, 29, 42, 227 },
+ }
+ };
+
+static const int palette_color_context_lookup[PALETTE_COLOR_CONTEXTS] = {
+ // (3, 0, 0, 0), (3, 2, 0, 0), (3, 3, 2, 0), (3, 3, 2, 2),
+ 3993, 4235, 4378, 4380,
+ // (4, 3, 3, 0), (5, 0, 0, 0), (5, 3, 0, 0), (5, 3, 2, 0),
+ 5720, 6655, 7018, 7040,
+ // (5, 5, 0, 0), (6, 2, 0, 0), (6, 2, 2, 0), (6, 4, 0, 0),
+ 7260, 8228, 8250, 8470,
+ // (7, 3, 0, 0), (8, 0, 0, 0), (8, 2, 0, 0), (10, 0, 0, 0)
+ 9680, 10648, 10890, 13310
+};
+
+const vpx_tree_index vp10_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)] = {
+ {
+ // Max tx_size is 8X8
+ -TX_4X4, -TX_8X8,
+ },
+ {
+ // Max tx_size is 16X16
+ -TX_4X4, 2, -TX_8X8, -TX_16X16,
+ },
+ {
+ // Max tx_size is 32X32
+ -TX_4X4, 2, -TX_8X8, 4, -TX_16X16, -TX_32X32,
+ },
+};
+
+static const vpx_prob
+ default_tx_size_prob[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1] = {
+ {
+ // Max tx_size is 8X8
+ { 100 },
+ { 66 },
+ },
+ {
+ // Max tx_size is 16X16
+ { 20, 152 },
+ { 15, 101 },
+ },
+ {
+ // Max tx_size is 32X32
+ { 3, 136, 37 },
+ { 5, 52, 13 },
+ },
+ };
+
+int vp10_get_palette_color_context(const uint8_t *color_map, int cols, int r,
+ int c, int n, int *color_order) {
+ int i, j, max, max_idx, temp;
+ int scores[PALETTE_MAX_SIZE + 10];
+ int weights[4] = { 3, 2, 3, 2 };
+ int color_ctx = 0;
+ int color_neighbors[4];
+
+ assert(n <= PALETTE_MAX_SIZE);
+
+ if (c - 1 >= 0)
+ color_neighbors[0] = color_map[r * cols + c - 1];
+ else
+ color_neighbors[0] = -1;
+ if (c - 1 >= 0 && r - 1 >= 0)
+ color_neighbors[1] = color_map[(r - 1) * cols + c - 1];
+ else
+ color_neighbors[1] = -1;
+ if (r - 1 >= 0)
+ color_neighbors[2] = color_map[(r - 1) * cols + c];
+ else
+ color_neighbors[2] = -1;
+ if (r - 1 >= 0 && c + 1 <= cols - 1)
+ color_neighbors[3] = color_map[(r - 1) * cols + c + 1];
+ else
+ color_neighbors[3] = -1;
+
+ for (i = 0; i < PALETTE_MAX_SIZE; ++i) color_order[i] = i;
+ memset(scores, 0, PALETTE_MAX_SIZE * sizeof(scores[0]));
+ for (i = 0; i < 4; ++i) {
+ if (color_neighbors[i] >= 0) scores[color_neighbors[i]] += weights[i];
+ }
+
+ for (i = 0; i < 4; ++i) {
+ max = scores[i];
+ max_idx = i;
+ j = i + 1;
+ while (j < n) {
+ if (scores[j] > max) {
+ max = scores[j];
+ max_idx = j;
+ }
+ ++j;
+ }
+
+ if (max_idx != i) {
+ temp = scores[i];
+ scores[i] = scores[max_idx];
+ scores[max_idx] = temp;
+
+ temp = color_order[i];
+ color_order[i] = color_order[max_idx];
+ color_order[max_idx] = temp;
+ }
+ }
+
+ for (i = 0; i < 4; ++i) color_ctx = color_ctx * 11 + scores[i];
+
+ for (i = 0; i < PALETTE_COLOR_CONTEXTS; ++i)
+ if (color_ctx == palette_color_context_lookup[i]) {
+ color_ctx = i;
+ break;
+ }
+
+ if (color_ctx >= PALETTE_COLOR_CONTEXTS) color_ctx = 0;
+
+ return color_ctx;
+}
+
+#if CONFIG_VAR_TX
+static const vpx_prob default_txfm_partition_probs[TXFM_PARTITION_CONTEXTS] = {
+ 192, 128, 64, 192, 128, 64, 192, 128, 64,
+};
+#endif
+
+static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = { 192, 128, 64 };
+
+#if CONFIG_EXT_INTERP
+static const vpx_prob default_switchable_interp_prob
+ [SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - 1] = {
+#if CONFIG_DUAL_FILTER
+ { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 },
+ { 36, 243, 48, 128 }, { 34, 16, 128, 128 }, { 149, 160, 128, 128 },
+
+ { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 },
+ { 36, 243, 48, 128 }, { 34, 16, 128, 128 }, { 149, 160, 128, 128 },
+
+ { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 },
+ { 36, 243, 48, 128 }, { 34, 16, 128, 128 }, { 149, 160, 128, 128 },
+
+ { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 },
+ { 36, 243, 48, 128 }, { 34, 16, 128, 128 }, { 149, 160, 128, 128 },
+#else
+ { 235, 192, 128, 128 }, { 36, 243, 208, 128 }, { 34, 16, 128, 128 },
+ { 36, 243, 48, 128 }, { 34, 16, 128, 128 }, { 149, 160, 128, 128 },
+#endif
+ };
+#else // CONFIG_EXT_INTERP
+#if CONFIG_DUAL_FILTER
+static const vpx_prob default_switchable_interp_prob
+ [SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - 1] = {
+ { 235, 162 }, { 36, 255 }, { 34, 3 }, { 149, 144 },
+
+ { 235, 162 }, { 36, 255 }, { 34, 3 }, { 10, 3 },
+
+ { 235, 162 }, { 36, 255 }, { 34, 3 }, { 149, 144 },
+
+ { 235, 162 }, { 36, 255 }, { 34, 3 }, { 10, 3 },
+ };
+#else
+static const vpx_prob default_switchable_interp_prob
+ [SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS - 1] = {
+ { 235, 162 }, { 36, 255 }, { 34, 3 }, { 149, 144 },
+ };
+#endif
+#endif // CONFIG_EXT_INTERP
+
+#if CONFIG_EXT_TX
+/* clang-format off */
+const vpx_tree_index vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER]
+ [TREE_SIZE(TX_TYPES)] = {
+ { // ToDo(yaowu): remove used entry 0.
+ 0
+ }, {
+ -IDTX, 2,
+ 4, 14,
+ 6, 8,
+ -V_DCT, -H_DCT,
+ 10, 12,
+ -V_ADST, -H_ADST,
+ -V_FLIPADST, -H_FLIPADST,
+ -DCT_DCT, 16,
+ 18, 24,
+ 20, 22,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 26, 28,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST
+ }, {
+ -IDTX, 2,
+ 4, 6,
+ -V_DCT, -H_DCT,
+ -DCT_DCT, 8,
+ 10, 16,
+ 12, 14,
+ -ADST_DCT, -DCT_ADST,
+ -FLIPADST_DCT, -DCT_FLIPADST,
+ 18, 20,
+ -ADST_ADST, -FLIPADST_FLIPADST,
+ -ADST_FLIPADST, -FLIPADST_ADST
+ }, {
+ -IDTX, -DCT_DCT,
+ }
+};
+
+const vpx_tree_index vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA]
+ [TREE_SIZE(TX_TYPES)] = {
+ { // ToDo(yaowu): remove unused entry 0.
+ 0
+ }, {
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ 6, 8,
+ -V_DCT, -H_DCT,
+ -ADST_ADST, 10,
+ -ADST_DCT, -DCT_ADST,
+ }, {
+ -IDTX, 2,
+ -DCT_DCT, 4,
+ -ADST_ADST, 6,
+ -ADST_DCT, -DCT_ADST,
+ }
+};
+/* clang-format on */
+
+static const vpx_prob
+ default_inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1] = {
+ {
+ // ToDo(yaowu): remove unused entry 0.
+ { 0 },
+ { 0 },
+ { 0 },
+#if EXT_TX_SIZES == 4
+ { 0 },
+#endif
+ },
+ {
+ { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128,
+ 128 },
+ { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128,
+ 128 },
+ { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128,
+ 128 },
+#if EXT_TX_SIZES == 4
+ { 10, 24, 30, 128, 128, 128, 128, 112, 160, 128, 128, 128, 128, 128,
+ 128 },
+#endif
+ },
+ {
+ { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
+ { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
+ { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
+#if EXT_TX_SIZES == 4
+ { 10, 30, 128, 112, 160, 128, 128, 128, 128, 128, 128 },
+#endif
+ },
+ {
+ { 12 },
+ { 12 },
+ { 12 },
+#if EXT_TX_SIZES == 4
+ { 12 },
+#endif
+ }
+ };
+
+static const vpx_prob default_intra_ext_tx_prob
+ [EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][TX_TYPES - 1] = {
+ {
+ // ToDo(yaowu): remove unused entry 0.
+ {
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ },
+ {
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ },
+ {
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+#if EXT_TX_SIZES == 4
+ },
+ {
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+ { 0 },
+#endif
+ },
+ },
+ {
+ {
+ { 8, 224, 32, 128, 64, 128 },
+ { 10, 32, 32, 128, 16, 192 },
+ { 10, 32, 32, 128, 16, 64 },
+ { 9, 200, 32, 128, 64, 128 },
+ { 8, 8, 32, 128, 224, 128 },
+ { 10, 32, 32, 128, 16, 192 },
+ { 10, 32, 32, 128, 16, 64 },
+ { 10, 23, 32, 128, 80, 176 },
+ { 10, 23, 32, 128, 80, 176 },
+ { 10, 32, 32, 128, 16, 64 },
+ },
+ {
+ { 8, 224, 32, 128, 64, 128 },
+ { 10, 32, 32, 128, 16, 192 },
+ { 10, 32, 32, 128, 16, 64 },
+ { 9, 200, 32, 128, 64, 128 },
+ { 8, 8, 32, 128, 224, 128 },
+ { 10, 32, 32, 128, 16, 192 },
+ { 10, 32, 32, 128, 16, 64 },
+ { 10, 23, 32, 128, 80, 176 },
+ { 10, 23, 32, 128, 80, 176 },
+ { 10, 32, 32, 128, 16, 64 },
+ },
+ {
+ { 8, 224, 32, 128, 64, 128 },
+ { 10, 32, 32, 128, 16, 192 },
+ { 10, 32, 32, 128, 16, 64 },
+ { 9, 200, 32, 128, 64, 128 },
+ { 8, 8, 32, 128, 224, 128 },
+ { 10, 32, 32, 128, 16, 192 },
+ { 10, 32, 32, 128, 16, 64 },
+ { 10, 23, 32, 128, 80, 176 },
+ { 10, 23, 32, 128, 80, 176 },
+ { 10, 32, 32, 128, 16, 64 },
+#if EXT_TX_SIZES == 4
+ },
+ {
+ { 8, 224, 32, 128, 64, 128 },
+ { 10, 32, 32, 128, 16, 192 },
+ { 10, 32, 32, 128, 16, 64 },
+ { 9, 200, 32, 128, 64, 128 },
+ { 8, 8, 32, 128, 224, 128 },
+ { 10, 32, 32, 128, 16, 192 },
+ { 10, 32, 32, 128, 16, 64 },
+ { 10, 23, 32, 128, 80, 176 },
+ { 10, 23, 32, 128, 80, 176 },
+ { 10, 32, 32, 128, 16, 64 },
+#endif
+ },
+ },
+ {
+ {
+ { 8, 224, 64, 128 },
+ { 10, 32, 16, 192 },
+ { 10, 32, 16, 64 },
+ { 9, 200, 64, 128 },
+ { 8, 8, 224, 128 },
+ { 10, 32, 16, 192 },
+ { 10, 32, 16, 64 },
+ { 10, 23, 80, 176 },
+ { 10, 23, 80, 176 },
+ { 10, 32, 16, 64 },
+ },
+ {
+ { 8, 224, 64, 128 },
+ { 10, 32, 16, 192 },
+ { 10, 32, 16, 64 },
+ { 9, 200, 64, 128 },
+ { 8, 8, 224, 128 },
+ { 10, 32, 16, 192 },
+ { 10, 32, 16, 64 },
+ { 10, 23, 80, 176 },
+ { 10, 23, 80, 176 },
+ { 10, 32, 16, 64 },
+ },
+ {
+ { 8, 224, 64, 128 },
+ { 10, 32, 16, 192 },
+ { 10, 32, 16, 64 },
+ { 9, 200, 64, 128 },
+ { 8, 8, 224, 128 },
+ { 10, 32, 16, 192 },
+ { 10, 32, 16, 64 },
+ { 10, 23, 80, 176 },
+ { 10, 23, 80, 176 },
+ { 10, 32, 16, 64 },
+#if EXT_TX_SIZES == 4
+ },
+ {
+ { 8, 224, 64, 128 },
+ { 10, 32, 16, 192 },
+ { 10, 32, 16, 64 },
+ { 9, 200, 64, 128 },
+ { 8, 8, 224, 128 },
+ { 10, 32, 16, 192 },
+ { 10, 32, 16, 64 },
+ { 10, 23, 80, 176 },
+ { 10, 23, 80, 176 },
+ { 10, 32, 16, 64 },
+#endif
+ },
+ },
+ };
+
+#else
+
+/* clang-format off */
+const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)] = {
+ -DCT_DCT, 2,
+ -ADST_ADST, 4,
+ -ADST_DCT, -DCT_ADST
+};
+/* clang-format on */
+
+static const vpx_prob
+ default_intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1] = {
+ { { 240, 85, 128 }, { 4, 1, 248 }, { 4, 1, 8 }, { 4, 248, 128 } },
+ { { 244, 85, 128 }, { 8, 2, 248 }, { 8, 2, 8 }, { 8, 248, 128 } },
+ { { 248, 85, 128 }, { 16, 4, 248 }, { 16, 4, 8 }, { 16, 248, 128 } },
+ };
+
+static const vpx_prob default_inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1] = {
+ { 160, 85, 128 }, { 176, 85, 128 }, { 192, 85, 128 },
+};
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_EXT_INTRA
+static const vpx_prob
+ default_intra_filter_probs[INTRA_FILTERS + 1][INTRA_FILTERS - 1] = {
+ { 98, 63, 60 }, { 98, 82, 80 }, { 94, 65, 103 },
+ { 49, 25, 24 }, { 72, 38, 50 },
+ };
+static const vpx_prob default_ext_intra_probs[2] = { 230, 230 };
+
+const vpx_tree_index vp10_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)] = {
+ -INTRA_FILTER_LINEAR, 2, -INTRA_FILTER_8TAP, 4, -INTRA_FILTER_8TAP_SHARP,
+ -INTRA_FILTER_8TAP_SMOOTH,
+};
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_SUPERTX
+static const vpx_prob
+ default_supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES] = {
+ { 1, 160, 160, 170 }, { 1, 200, 200, 210 },
+ };
+#endif // CONFIG_SUPERTX
+
+// FIXME(someone) need real defaults here
+static const struct segmentation_probs default_seg_probs = {
+ { 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128 },
+};
+
+static void init_mode_probs(FRAME_CONTEXT *fc) {
+ vp10_copy(fc->uv_mode_prob, default_uv_probs);
+ vp10_copy(fc->y_mode_prob, default_if_y_probs);
+ vp10_copy(fc->switchable_interp_prob, default_switchable_interp_prob);
+ vp10_copy(fc->partition_prob, default_partition_probs);
+ vp10_copy(fc->intra_inter_prob, default_intra_inter_p);
+ vp10_copy(fc->comp_inter_prob, default_comp_inter_p);
+ vp10_copy(fc->comp_ref_prob, default_comp_ref_p);
+#if CONFIG_EXT_REFS
+ vp10_copy(fc->comp_bwdref_prob, default_comp_bwdref_p);
+#endif // CONFIG_EXT_REFS
+ vp10_copy(fc->single_ref_prob, default_single_ref_p);
+ vp10_copy(fc->tx_size_probs, default_tx_size_prob);
+#if CONFIG_VAR_TX
+ vp10_copy(fc->txfm_partition_prob, default_txfm_partition_probs);
+#endif
+ vp10_copy(fc->skip_probs, default_skip_probs);
+#if CONFIG_REF_MV
+ vp10_copy(fc->newmv_prob, default_newmv_prob);
+ vp10_copy(fc->zeromv_prob, default_zeromv_prob);
+ vp10_copy(fc->refmv_prob, default_refmv_prob);
+ vp10_copy(fc->drl_prob, default_drl_prob);
+#if CONFIG_EXT_INTER
+ fc->new2mv_prob = default_new2mv_prob;
+#endif // CONFIG_EXT_INTER
+#endif // CONFIG_REF_MV
+ vp10_copy(fc->inter_mode_probs, default_inter_mode_probs);
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ vp10_copy(fc->motvar_prob, default_motvar_prob);
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+#if CONFIG_EXT_INTER
+ vp10_copy(fc->inter_compound_mode_probs, default_inter_compound_mode_probs);
+ vp10_copy(fc->interintra_prob, default_interintra_prob);
+ vp10_copy(fc->interintra_mode_prob, default_interintra_mode_prob);
+ vp10_copy(fc->wedge_interintra_prob, default_wedge_interintra_prob);
+ vp10_copy(fc->wedge_interinter_prob, default_wedge_interinter_prob);
+#endif // CONFIG_EXT_INTER
+#if CONFIG_SUPERTX
+ vp10_copy(fc->supertx_prob, default_supertx_prob);
+#endif // CONFIG_SUPERTX
+ vp10_copy(fc->seg.tree_probs, default_seg_probs.tree_probs);
+ vp10_copy(fc->seg.pred_probs, default_seg_probs.pred_probs);
+#if CONFIG_EXT_INTRA
+ vp10_copy(fc->ext_intra_probs, default_ext_intra_probs);
+ vp10_copy(fc->intra_filter_probs, default_intra_filter_probs);
+#endif // CONFIG_EXT_INTRA
+ vp10_copy(fc->inter_ext_tx_prob, default_inter_ext_tx_prob);
+ vp10_copy(fc->intra_ext_tx_prob, default_intra_ext_tx_prob);
+}
+
+#if CONFIG_EXT_INTERP
+const vpx_tree_index
+ vp10_switchable_interp_tree[TREE_SIZE(SWITCHABLE_FILTERS)] = {
+ -EIGHTTAP_REGULAR,
+ 2,
+ 4,
+ 6,
+ -EIGHTTAP_SMOOTH,
+ -EIGHTTAP_SMOOTH2,
+ -MULTITAP_SHARP,
+ -MULTITAP_SHARP2,
+ };
+#else
+const vpx_tree_index vp10_switchable_interp_tree[TREE_SIZE(
+ SWITCHABLE_FILTERS)] = { -EIGHTTAP_REGULAR, 2, -EIGHTTAP_SMOOTH,
+ -MULTITAP_SHARP };
+#endif // CONFIG_EXT_INTERP
+
+void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) {
+ int i, j;
+ FRAME_CONTEXT *fc = cm->fc;
+ const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
+ const FRAME_COUNTS *counts = &cm->counts;
+
+ for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
+ fc->intra_inter_prob[i] = vp10_mode_mv_merge_probs(
+ pre_fc->intra_inter_prob[i], counts->intra_inter[i]);
+ for (i = 0; i < COMP_INTER_CONTEXTS; i++)
+ fc->comp_inter_prob[i] = vp10_mode_mv_merge_probs(
+ pre_fc->comp_inter_prob[i], counts->comp_inter[i]);
+
+#if CONFIG_EXT_REFS
+ for (i = 0; i < REF_CONTEXTS; i++)
+ for (j = 0; j < (FWD_REFS - 1); j++)
+ fc->comp_ref_prob[i][j] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i][j],
+ counts->comp_ref[i][j]);
+ for (i = 0; i < REF_CONTEXTS; i++)
+ for (j = 0; j < (BWD_REFS - 1); j++)
+ fc->comp_bwdref_prob[i][j] = mode_mv_merge_probs(
+ pre_fc->comp_bwdref_prob[i][j], counts->comp_bwdref[i][j]);
+#else
+ for (i = 0; i < REF_CONTEXTS; i++)
+ for (j = 0; j < (COMP_REFS - 1); j++)
+ fc->comp_ref_prob[i][j] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i][j],
+ counts->comp_ref[i][j]);
+#endif // CONFIG_EXT_REFS
+
+ for (i = 0; i < REF_CONTEXTS; i++)
+ for (j = 0; j < (SINGLE_REFS - 1); j++)
+ fc->single_ref_prob[i][j] = vp10_mode_mv_merge_probs(
+ pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]);
+
+#if CONFIG_REF_MV
+ for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
+ fc->newmv_prob[i] =
+ vp10_mode_mv_merge_probs(pre_fc->newmv_prob[i], counts->newmv_mode[i]);
+ for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i)
+ fc->zeromv_prob[i] = vp10_mode_mv_merge_probs(pre_fc->zeromv_prob[i],
+ counts->zeromv_mode[i]);
+ for (i = 0; i < REFMV_MODE_CONTEXTS; ++i)
+ fc->refmv_prob[i] =
+ vp10_mode_mv_merge_probs(pre_fc->refmv_prob[i], counts->refmv_mode[i]);
+
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
+ fc->drl_prob[i] =
+ vp10_mode_mv_merge_probs(pre_fc->drl_prob[i], counts->drl_mode[i]);
+#if CONFIG_EXT_INTER
+ fc->new2mv_prob =
+ vp10_mode_mv_merge_probs(pre_fc->new2mv_prob, counts->new2mv_mode);
+#endif // CONFIG_EXT_INTER
+#else
+ for (i = 0; i < INTER_MODE_CONTEXTS; i++)
+ vpx_tree_merge_probs(vp10_inter_mode_tree, pre_fc->inter_mode_probs[i],
+ counts->inter_mode[i], fc->inter_mode_probs[i]);
+#endif
+
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ for (i = BLOCK_8X8; i < BLOCK_SIZES; ++i)
+ vpx_tree_merge_probs(vp10_motvar_tree, pre_fc->motvar_prob[i],
+ counts->motvar[i], fc->motvar_prob[i]);
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+#if CONFIG_SUPERTX
+ for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
+ int j;
+ for (j = 1; j < TX_SIZES; ++j) {
+ fc->supertx_prob[i][j] = vp10_mode_mv_merge_probs(
+ pre_fc->supertx_prob[i][j], counts->supertx[i][j]);
+ }
+ }
+#endif // CONFIG_SUPERTX
+
+#if CONFIG_EXT_INTER
+ for (i = 0; i < INTER_MODE_CONTEXTS; i++)
+ vpx_tree_merge_probs(
+ vp10_inter_compound_mode_tree, pre_fc->inter_compound_mode_probs[i],
+ counts->inter_compound_mode[i], fc->inter_compound_mode_probs[i]);
+ for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
+ if (is_interintra_allowed_bsize_group(i))
+ fc->interintra_prob[i] = vp10_mode_mv_merge_probs(
+ pre_fc->interintra_prob[i], counts->interintra[i]);
+ }
+ for (i = 0; i < BLOCK_SIZE_GROUPS; i++) {
+ vpx_tree_merge_probs(
+ vp10_interintra_mode_tree, pre_fc->interintra_mode_prob[i],
+ counts->interintra_mode[i], fc->interintra_mode_prob[i]);
+ }
+ for (i = 0; i < BLOCK_SIZES; ++i) {
+ if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i))
+ fc->wedge_interintra_prob[i] = vp10_mode_mv_merge_probs(
+ pre_fc->wedge_interintra_prob[i], counts->wedge_interintra[i]);
+ }
+ for (i = 0; i < BLOCK_SIZES; ++i) {
+ if (is_interinter_wedge_used(i))
+ fc->wedge_interinter_prob[i] = vp10_mode_mv_merge_probs(
+ pre_fc->wedge_interinter_prob[i], counts->wedge_interinter[i]);
+ }
+#endif // CONFIG_EXT_INTER
+
+ for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
+ vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->y_mode_prob[i],
+ counts->y_mode[i], fc->y_mode_prob[i]);
+
+ if (cm->interp_filter == SWITCHABLE) {
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
+ vpx_tree_merge_probs(
+ vp10_switchable_interp_tree, pre_fc->switchable_interp_prob[i],
+ counts->switchable_interp[i], fc->switchable_interp_prob[i]);
+ }
+}
+
+void vp10_adapt_intra_frame_probs(VP10_COMMON *cm) {
+ int i, j;
+ FRAME_CONTEXT *fc = cm->fc;
+ const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
+ const FRAME_COUNTS *counts = &cm->counts;
+
+ if (cm->tx_mode == TX_MODE_SELECT) {
+ for (i = 0; i < TX_SIZES - 1; ++i) {
+ for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
+ vpx_tree_merge_probs(vp10_tx_size_tree[i], pre_fc->tx_size_probs[i][j],
+ counts->tx_size[i][j], fc->tx_size_probs[i][j]);
+ }
+ }
+
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT)
+ for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
+ fc->txfm_partition_prob[i] = vp10_mode_mv_merge_probs(
+ pre_fc->txfm_partition_prob[i], counts->txfm_partition[i]);
+#endif
+
+ for (i = 0; i < SKIP_CONTEXTS; ++i)
+ fc->skip_probs[i] =
+ vp10_mode_mv_merge_probs(pre_fc->skip_probs[i], counts->skip[i]);
+
+#if CONFIG_EXT_TX
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ if (use_inter_ext_tx_for_txsize[s][i]) {
+ vpx_tree_merge_probs(
+ vp10_ext_tx_inter_tree[s], pre_fc->inter_ext_tx_prob[s][i],
+ counts->inter_ext_tx[s][i], fc->inter_ext_tx_prob[s][i]);
+ }
+ }
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ if (use_intra_ext_tx_for_txsize[s][i]) {
+ int j;
+ for (j = 0; j < INTRA_MODES; ++j)
+ vpx_tree_merge_probs(
+ vp10_ext_tx_intra_tree[s], pre_fc->intra_ext_tx_prob[s][i][j],
+ counts->intra_ext_tx[s][i][j], fc->intra_ext_tx_prob[s][i][j]);
+ }
+ }
+ }
+#else
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ for (j = 0; j < TX_TYPES; ++j)
+ vpx_tree_merge_probs(vp10_ext_tx_tree, pre_fc->intra_ext_tx_prob[i][j],
+ counts->intra_ext_tx[i][j],
+ fc->intra_ext_tx_prob[i][j]);
+ }
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ vpx_tree_merge_probs(vp10_ext_tx_tree, pre_fc->inter_ext_tx_prob[i],
+ counts->inter_ext_tx[i], fc->inter_ext_tx_prob[i]);
+ }
+#endif // CONFIG_EXT_TX
+
+ if (cm->seg.temporal_update) {
+ for (i = 0; i < PREDICTION_PROBS; i++)
+ fc->seg.pred_probs[i] = vp10_mode_mv_merge_probs(
+ pre_fc->seg.pred_probs[i], counts->seg.pred[i]);
+
+ vpx_tree_merge_probs(vp10_segment_tree, pre_fc->seg.tree_probs,
+ counts->seg.tree_mispred, fc->seg.tree_probs);
+ } else {
+ vpx_tree_merge_probs(vp10_segment_tree, pre_fc->seg.tree_probs,
+ counts->seg.tree_total, fc->seg.tree_probs);
+ }
+
+ for (i = 0; i < INTRA_MODES; ++i)
+ vpx_tree_merge_probs(vp10_intra_mode_tree, pre_fc->uv_mode_prob[i],
+ counts->uv_mode[i], fc->uv_mode_prob[i]);
+
+#if CONFIG_EXT_PARTITION_TYPES
+ vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[0],
+ counts->partition[0], fc->partition_prob[0]);
+ for (i = 1; i < PARTITION_CONTEXTS; i++)
+ vpx_tree_merge_probs(vp10_ext_partition_tree, pre_fc->partition_prob[i],
+ counts->partition[i], fc->partition_prob[i]);
+#else
+ for (i = 0; i < PARTITION_CONTEXTS; i++)
+ vpx_tree_merge_probs(vp10_partition_tree, pre_fc->partition_prob[i],
+ counts->partition[i], fc->partition_prob[i]);
+#endif // CONFIG_EXT_PARTITION_TYPES
+
+#if CONFIG_EXT_INTRA
+ for (i = 0; i < PLANE_TYPES; ++i) {
+ fc->ext_intra_probs[i] = vp10_mode_mv_merge_probs(
+ pre_fc->ext_intra_probs[i], counts->ext_intra[i]);
+ }
+
+ for (i = 0; i < INTRA_FILTERS + 1; ++i)
+ vpx_tree_merge_probs(vp10_intra_filter_tree, pre_fc->intra_filter_probs[i],
+ counts->intra_filter[i], fc->intra_filter_probs[i]);
+#endif // CONFIG_EXT_INTRA
+}
+
+static void set_default_lf_deltas(struct loopfilter *lf) {
+ lf->mode_ref_delta_enabled = 1;
+ lf->mode_ref_delta_update = 1;
+
+ lf->ref_deltas[INTRA_FRAME] = 1;
+ lf->ref_deltas[LAST_FRAME] = 0;
+#if CONFIG_EXT_REFS
+ lf->ref_deltas[LAST2_FRAME] = lf->ref_deltas[LAST_FRAME];
+ lf->ref_deltas[LAST3_FRAME] = lf->ref_deltas[LAST_FRAME];
+ lf->ref_deltas[BWDREF_FRAME] = lf->ref_deltas[LAST_FRAME];
+#endif // CONFIG_EXT_REFS
+ lf->ref_deltas[GOLDEN_FRAME] = -1;
+ lf->ref_deltas[ALTREF_FRAME] = -1;
+
+ lf->mode_deltas[0] = 0;
+ lf->mode_deltas[1] = 0;
+}
+
+void vp10_setup_past_independence(VP10_COMMON *cm) {
+ // Reset the segment feature data to the default stats:
+ // Features disabled, 0, with delta coding (Default state).
+ struct loopfilter *const lf = &cm->lf;
+
+ int i;
+ vp10_clearall_segfeatures(&cm->seg);
+ cm->seg.abs_delta = SEGMENT_DELTADATA;
+
+ if (cm->last_frame_seg_map && !cm->frame_parallel_decode)
+ memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
+
+ if (cm->current_frame_seg_map)
+ memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols));
+
+ // Reset the mode ref deltas for loop filter
+ vp10_zero(lf->last_ref_deltas);
+ vp10_zero(lf->last_mode_deltas);
+ set_default_lf_deltas(lf);
+
+ // To force update of the sharpness
+ lf->last_sharpness_level = -1;
+#if CONFIG_LOOP_RESTORATION
+ if (cm->rst_info.bilateral_level) {
+ for (i = 0; i < cm->rst_internal.ntiles; ++i)
+ cm->rst_info.bilateral_level[i] = -1;
+ }
+#endif // CONFIG_LOOP_RESTORATION
+
+ vp10_default_coef_probs(cm);
+ init_mode_probs(cm->fc);
+ vp10_init_mv_probs(cm);
+ cm->fc->initialized = 1;
+
+ if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
+ cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL) {
+ // Reset all frame contexts.
+ for (i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc;
+ } else if (cm->reset_frame_context == RESET_FRAME_CONTEXT_CURRENT) {
+ // Reset only the frame context specified in the frame header.
+ cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+ }
+
+ // prev_mip will only be allocated in encoder.
+ if (frame_is_intra_only(cm) && cm->prev_mip && !cm->frame_parallel_decode)
+ memset(cm->prev_mip, 0,
+ cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip));
+
+ cm->frame_context_idx = 0;
+}
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
new file mode 100644
index 0000000..4616aa2
--- /dev/null
+++ b/av1/common/entropymode.h
@@ -0,0 +1,280 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ENTROPYMODE_H_
+#define VP10_COMMON_ENTROPYMODE_H_
+
+#include "av1/common/entropy.h"
+#include "av1/common/entropymv.h"
+#include "av1/common/filter.h"
+#include "av1/common/seg_common.h"
+#include "aom_dsp/vpx_filter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BLOCK_SIZE_GROUPS 4
+
+#define TX_SIZE_CONTEXTS 2
+
+#define INTER_OFFSET(mode) ((mode)-NEARESTMV)
+#if CONFIG_EXT_INTER
+#define INTER_COMPOUND_OFFSET(mode) ((mode)-NEAREST_NEARESTMV)
+#endif // CONFIG_EXT_INTER
+
+#define PALETTE_COLOR_CONTEXTS 16
+#define PALETTE_MAX_SIZE 8
+#define PALETTE_BLOCK_SIZES (BLOCK_LARGEST - BLOCK_8X8 + 1)
+#define PALETTE_Y_MODE_CONTEXTS 3
+#define PALETTE_MAX_BLOCK_SIZE (64 * 64)
+
+struct VP10Common;
+
+struct seg_counts {
+ unsigned int tree_total[MAX_SEGMENTS];
+ unsigned int tree_mispred[MAX_SEGMENTS];
+ unsigned int pred[PREDICTION_PROBS][2];
+};
+
+typedef struct frame_contexts {
+ vpx_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1];
+ vpx_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1];
+#if CONFIG_EXT_PARTITION_TYPES
+ vpx_prob partition_prob[PARTITION_CONTEXTS][EXT_PARTITION_TYPES - 1];
+#else
+ vpx_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1];
+#endif
+ vp10_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES];
+#if CONFIG_ANS
+ coeff_cdf_model coef_cdfs[TX_SIZES][PLANE_TYPES];
+#endif
+ vpx_prob
+ switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS -
+ 1];
+
+#if CONFIG_REF_MV
+ vpx_prob newmv_prob[NEWMV_MODE_CONTEXTS];
+ vpx_prob zeromv_prob[ZEROMV_MODE_CONTEXTS];
+ vpx_prob refmv_prob[REFMV_MODE_CONTEXTS];
+ vpx_prob drl_prob[DRL_MODE_CONTEXTS];
+
+#if CONFIG_EXT_INTER
+ vpx_prob new2mv_prob;
+#endif // CONFIG_EXT_INTER
+#endif // CONFIG_REF_MV
+
+ vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1];
+#if CONFIG_EXT_INTER
+ vpx_prob
+ inter_compound_mode_probs[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES - 1];
+ vpx_prob interintra_prob[BLOCK_SIZE_GROUPS];
+ vpx_prob interintra_mode_prob[BLOCK_SIZE_GROUPS][INTERINTRA_MODES - 1];
+ vpx_prob wedge_interintra_prob[BLOCK_SIZES];
+ vpx_prob wedge_interinter_prob[BLOCK_SIZES];
+#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ vpx_prob motvar_prob[BLOCK_SIZES][MOTION_VARIATIONS - 1];
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS];
+ vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS];
+ vpx_prob single_ref_prob[REF_CONTEXTS][SINGLE_REFS - 1];
+#if CONFIG_EXT_REFS
+ vpx_prob comp_ref_prob[REF_CONTEXTS][FWD_REFS - 1];
+ vpx_prob comp_bwdref_prob[REF_CONTEXTS][BWD_REFS - 1];
+#else
+ vpx_prob comp_ref_prob[REF_CONTEXTS][COMP_REFS - 1];
+#endif // CONFIG_EXT_REFS
+ vpx_prob tx_size_probs[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1];
+#if CONFIG_VAR_TX
+ vpx_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
+#endif
+ vpx_prob skip_probs[SKIP_CONTEXTS];
+#if CONFIG_REF_MV
+ nmv_context nmvc[NMV_CONTEXTS];
+#else
+ nmv_context nmvc;
+#endif
+ int initialized;
+#if CONFIG_EXT_TX
+ vpx_prob inter_ext_tx_prob[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES - 1];
+ vpx_prob
+ intra_ext_tx_prob[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][TX_TYPES -
+ 1];
+#else
+ vpx_prob intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1];
+ vpx_prob inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1];
+#endif // CONFIG_EXT_TX
+#if CONFIG_SUPERTX
+ vpx_prob supertx_prob[PARTITION_SUPERTX_CONTEXTS][TX_SIZES];
+#endif // CONFIG_SUPERTX
+ struct segmentation_probs seg;
+#if CONFIG_EXT_INTRA
+ vpx_prob ext_intra_probs[PLANE_TYPES];
+ vpx_prob intra_filter_probs[INTRA_FILTERS + 1][INTRA_FILTERS - 1];
+#endif // CONFIG_EXT_INTRA
+#if CONFIG_GLOBAL_MOTION
+ vpx_prob global_motion_types_prob[GLOBAL_MOTION_TYPES - 1];
+#endif // CONFIG_GLOBAL_MOTION
+} FRAME_CONTEXT;
+
+typedef struct FRAME_COUNTS {
+ // Note: This structure should only contain 'unsigned int' fields, or
+ // aggregates built solely from 'unsigned int' fields/elements
+ unsigned int kf_y_mode[INTRA_MODES][INTRA_MODES][INTRA_MODES];
+ unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
+ unsigned int uv_mode[INTRA_MODES][INTRA_MODES];
+#if CONFIG_EXT_PARTITION_TYPES
+ unsigned int partition[PARTITION_CONTEXTS][EXT_PARTITION_TYPES];
+#else
+ unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES];
+#endif
+ vp10_coeff_count_model coef[TX_SIZES][PLANE_TYPES];
+ unsigned int
+ eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES][COEF_BANDS][COEFF_CONTEXTS];
+ unsigned int
+ switchable_interp[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
+#if CONFIG_REF_MV
+ unsigned int newmv_mode[NEWMV_MODE_CONTEXTS][2];
+ unsigned int zeromv_mode[ZEROMV_MODE_CONTEXTS][2];
+ unsigned int refmv_mode[REFMV_MODE_CONTEXTS][2];
+ unsigned int drl_mode[DRL_MODE_CONTEXTS][2];
+#if CONFIG_EXT_INTER
+ unsigned int new2mv_mode[2];
+#endif // CONFIG_EXT_INTER
+#endif
+
+ unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES];
+#if CONFIG_EXT_INTER
+ unsigned int inter_compound_mode[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
+ unsigned int interintra[BLOCK_SIZE_GROUPS][2];
+ unsigned int interintra_mode[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
+ unsigned int wedge_interintra[BLOCK_SIZES][2];
+ unsigned int wedge_interinter[BLOCK_SIZES][2];
+#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ unsigned int motvar[BLOCK_SIZES][MOTION_VARIATIONS];
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
+ unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
+ unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS - 1][2];
+#if CONFIG_EXT_REFS
+ unsigned int comp_ref[REF_CONTEXTS][FWD_REFS - 1][2];
+ unsigned int comp_bwdref[REF_CONTEXTS][BWD_REFS - 1][2];
+#else
+ unsigned int comp_ref[REF_CONTEXTS][COMP_REFS - 1][2];
+#endif // CONFIG_EXT_REFS
+ // TODO(any): tx_size_totals is only used by the encoder to decide whether
+ // to use forward updates for the coeff probs, and as such it does not really
+ // belong into this structure.
+ unsigned int tx_size_totals[TX_SIZES];
+ unsigned int tx_size[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
+#if CONFIG_VAR_TX
+ unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
+#endif
+ unsigned int skip[SKIP_CONTEXTS][2];
+#if CONFIG_REF_MV
+ nmv_context_counts mv[NMV_CONTEXTS];
+#else
+ nmv_context_counts mv;
+#endif
+#if CONFIG_EXT_TX
+#if CONFIG_RECT_TX
+ unsigned int tx_size_implied[TX_SIZES][TX_SIZES];
+#endif // CONFIG_RECT_TX
+ unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
+ unsigned int
+ intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES][TX_TYPES];
+#else
+ unsigned int intra_ext_tx[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
+ unsigned int inter_ext_tx[EXT_TX_SIZES][TX_TYPES];
+#endif // CONFIG_EXT_TX
+#if CONFIG_SUPERTX
+ unsigned int supertx[PARTITION_SUPERTX_CONTEXTS][TX_SIZES][2];
+ unsigned int supertx_size[TX_SIZES];
+#endif // CONFIG_SUPERTX
+ struct seg_counts seg;
+#if CONFIG_EXT_INTRA
+ unsigned int ext_intra[PLANE_TYPES][2];
+ unsigned int intra_filter[INTRA_FILTERS + 1][INTRA_FILTERS];
+#endif // CONFIG_EXT_INTRA
+} FRAME_COUNTS;
+
+extern const vpx_prob
+ vp10_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1];
+extern const vpx_prob vp10_default_palette_y_mode_prob[PALETTE_BLOCK_SIZES]
+ [PALETTE_Y_MODE_CONTEXTS];
+extern const vpx_prob vp10_default_palette_uv_mode_prob[2];
+extern const vpx_prob
+ vp10_default_palette_y_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1];
+extern const vpx_prob
+ vp10_default_palette_uv_size_prob[PALETTE_BLOCK_SIZES][PALETTE_SIZES - 1];
+extern const vpx_prob vp10_default_palette_y_color_prob
+ [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1];
+extern const vpx_prob vp10_default_palette_uv_color_prob
+ [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1];
+
+extern const vpx_tree_index vp10_intra_mode_tree[TREE_SIZE(INTRA_MODES)];
+extern const vpx_tree_index vp10_inter_mode_tree[TREE_SIZE(INTER_MODES)];
+#if CONFIG_EXT_INTER
+extern const vpx_tree_index
+ vp10_interintra_mode_tree[TREE_SIZE(INTERINTRA_MODES)];
+extern const vpx_tree_index
+ vp10_inter_compound_mode_tree[TREE_SIZE(INTER_COMPOUND_MODES)];
+#endif // CONFIG_EXT_INTER
+extern const vpx_tree_index vp10_partition_tree[TREE_SIZE(PARTITION_TYPES)];
+#if CONFIG_EXT_PARTITION_TYPES
+extern const vpx_tree_index
+ vp10_ext_partition_tree[TREE_SIZE(EXT_PARTITION_TYPES)];
+#endif
+extern const vpx_tree_index
+ vp10_switchable_interp_tree[TREE_SIZE(SWITCHABLE_FILTERS)];
+extern const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)];
+extern const vpx_tree_index
+ vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)];
+extern const vpx_tree_index
+ vp10_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)];
+#if CONFIG_EXT_INTRA
+extern const vpx_tree_index vp10_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)];
+#endif // CONFIG_EXT_INTRA
+#if CONFIG_EXT_TX
+extern const vpx_tree_index
+ vp10_ext_tx_inter_tree[EXT_TX_SETS_INTER][TREE_SIZE(TX_TYPES)];
+extern const vpx_tree_index
+ vp10_ext_tx_intra_tree[EXT_TX_SETS_INTRA][TREE_SIZE(TX_TYPES)];
+#else
+extern const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(TX_TYPES)];
+#endif // CONFIG_EXT_TX
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+extern const vpx_tree_index vp10_motvar_tree[TREE_SIZE(MOTION_VARIATIONS)];
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+void vp10_setup_past_independence(struct VP10Common *cm);
+
+void vp10_adapt_intra_frame_probs(struct VP10Common *cm);
+void vp10_adapt_inter_frame_probs(struct VP10Common *cm);
+
+static INLINE int vp10_ceil_log2(int n) {
+ int i = 1, p = 2;
+ while (p < n) {
+ i++;
+ p = p << 1;
+ }
+ return i;
+}
+
+int vp10_get_palette_color_context(const uint8_t *color_map, int cols, int r,
+ int c, int n, int *color_order);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_ENTROPYMODE_H_
diff --git a/av1/common/entropymv.c b/av1/common/entropymv.c
new file mode 100644
index 0000000..f3dba3f
--- /dev/null
+++ b/av1/common/entropymv.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/onyxc_int.h"
+#include "av1/common/entropymv.h"
+
+// Integer pel reference mv threshold for use of high-precision 1/8 mv
+#define COMPANDED_MVREF_THRESH 8
+
+const vpx_tree_index vp10_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = {
+ -MV_JOINT_ZERO, 2, -MV_JOINT_HNZVZ, 4, -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ
+};
+
+/* clang-format off */
+const vpx_tree_index vp10_mv_class_tree[TREE_SIZE(MV_CLASSES)] = {
+ -MV_CLASS_0, 2,
+ -MV_CLASS_1, 4,
+ 6, 8,
+ -MV_CLASS_2, -MV_CLASS_3,
+ 10, 12,
+ -MV_CLASS_4, -MV_CLASS_5,
+ -MV_CLASS_6, 14,
+ 16, 18,
+ -MV_CLASS_7, -MV_CLASS_8,
+ -MV_CLASS_9, -MV_CLASS_10,
+};
+/* clang-format on */
+
+const vpx_tree_index vp10_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = {
+ -0, -1,
+};
+
+const vpx_tree_index vp10_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2, -1,
+ 4, -2, -3 };
+
+static const nmv_context default_nmv_context = {
+#if CONFIG_REF_MV
+ { 1, 64, 96 },
+ 128,
+#else
+ { 32, 64, 96 },
+#endif
+ { {
+ // Vertical component
+ 128, // sign
+ { 224, 144, 192, 168, 192, 176, 192, 198, 198, 245 }, // class
+ { 216 }, // class0
+ { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, // bits
+ { { 128, 128, 64 }, { 96, 112, 64 } }, // class0_fp
+ { 64, 96, 64 }, // fp
+ 160, // class0_hp bit
+ 128, // hp
+ },
+ {
+ // Horizontal component
+ 128, // sign
+ { 216, 128, 176, 160, 176, 176, 192, 198, 198, 208 }, // class
+ { 208 }, // class0
+ { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, // bits
+ { { 128, 128, 64 }, { 96, 112, 64 } }, // class0_fp
+ { 64, 96, 64 }, // fp
+ 160, // class0_hp bit
+ 128, // hp
+ } },
+};
+
+static const uint8_t log_in_base_2[] = {
+ 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10
+};
+
+#if CONFIG_GLOBAL_MOTION
+const vpx_tree_index
+ vp10_global_motion_types_tree[TREE_SIZE(GLOBAL_MOTION_TYPES)] = {
+ -GLOBAL_ZERO, 2, -GLOBAL_TRANSLATION, 4, -GLOBAL_ROTZOOM, -GLOBAL_AFFINE
+ };
+
+static const vpx_prob default_global_motion_types_prob[GLOBAL_MOTION_TYPES -
+ 1] = { 224, 128, 128 };
+#endif // CONFIG_GLOBAL_MOTION
+
+static INLINE int mv_class_base(MV_CLASS_TYPE c) {
+ return c ? CLASS0_SIZE << (c + 2) : 0;
+}
+
+MV_CLASS_TYPE vp10_get_mv_class(int z, int *offset) {
+ const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096)
+ ? MV_CLASS_10
+ : (MV_CLASS_TYPE)log_in_base_2[z >> 3];
+ if (offset) *offset = z - mv_class_base(c);
+ return c;
+}
+
+// TODO(jingning): This idle function is intentionally left as is for
+// experimental purpose.
+int vp10_use_mv_hp(const MV *ref) {
+ (void)ref;
+ return 1;
+}
+
+static void inc_mv_component(int v, nmv_component_counts *comp_counts, int incr,
+ int usehp) {
+ int s, z, c, o, d, e, f;
+ assert(v != 0); /* should not be zero */
+ s = v < 0;
+ comp_counts->sign[s] += incr;
+ z = (s ? -v : v) - 1; /* magnitude - 1 */
+
+ c = vp10_get_mv_class(z, &o);
+ comp_counts->classes[c] += incr;
+
+ d = (o >> 3); /* int mv data */
+ f = (o >> 1) & 3; /* fractional pel mv data */
+ e = (o & 1); /* high precision mv data */
+
+ if (c == MV_CLASS_0) {
+ comp_counts->class0[d] += incr;
+ comp_counts->class0_fp[d][f] += incr;
+ if (usehp) comp_counts->class0_hp[e] += incr;
+ } else {
+ int i;
+ int b = c + CLASS0_BITS - 1; // number of bits
+ for (i = 0; i < b; ++i) comp_counts->bits[i][((d >> i) & 1)] += incr;
+ comp_counts->fp[f] += incr;
+ if (usehp) comp_counts->hp[e] += incr;
+ }
+}
+
+void vp10_inc_mv(const MV *mv, nmv_context_counts *counts, const int usehp) {
+ if (counts != NULL) {
+ const MV_JOINT_TYPE j = vp10_get_mv_joint(mv);
+
+#if CONFIG_REF_MV
+ ++counts->zero_rmv[j == MV_JOINT_ZERO];
+ if (j == MV_JOINT_ZERO) return;
+#endif
+ ++counts->joints[j];
+
+ if (mv_joint_vertical(j))
+ inc_mv_component(mv->row, &counts->comps[0], 1, usehp);
+
+ if (mv_joint_horizontal(j))
+ inc_mv_component(mv->col, &counts->comps[1], 1, usehp);
+ }
+}
+
+void vp10_adapt_mv_probs(VP10_COMMON *cm, int allow_hp) {
+ int i, j;
+#if CONFIG_REF_MV
+ int idx;
+ for (idx = 0; idx < NMV_CONTEXTS; ++idx) {
+ nmv_context *fc = &cm->fc->nmvc[idx];
+ const nmv_context *pre_fc =
+ &cm->frame_contexts[cm->frame_context_idx].nmvc[idx];
+ const nmv_context_counts *counts = &cm->counts.mv[idx];
+
+ vpx_tree_merge_probs(vp10_mv_joint_tree, pre_fc->joints, counts->joints,
+ fc->joints);
+#if CONFIG_REF_MV
+ fc->zero_rmv = vp10_mode_mv_merge_probs(pre_fc->zero_rmv, counts->zero_rmv);
+#endif
+
+ for (i = 0; i < 2; ++i) {
+ nmv_component *comp = &fc->comps[i];
+ const nmv_component *pre_comp = &pre_fc->comps[i];
+ const nmv_component_counts *c = &counts->comps[i];
+
+ comp->sign = vp10_mode_mv_merge_probs(pre_comp->sign, c->sign);
+ vpx_tree_merge_probs(vp10_mv_class_tree, pre_comp->classes, c->classes,
+ comp->classes);
+ vpx_tree_merge_probs(vp10_mv_class0_tree, pre_comp->class0, c->class0,
+ comp->class0);
+
+ for (j = 0; j < MV_OFFSET_BITS; ++j)
+ comp->bits[j] = vp10_mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]);
+
+ for (j = 0; j < CLASS0_SIZE; ++j)
+ vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->class0_fp[j],
+ c->class0_fp[j], comp->class0_fp[j]);
+
+ vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
+
+ if (allow_hp) {
+ comp->class0_hp =
+ vp10_mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp);
+ comp->hp = vp10_mode_mv_merge_probs(pre_comp->hp, c->hp);
+ }
+ }
+ }
+#else
+ nmv_context *fc = &cm->fc->nmvc;
+ const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc;
+ const nmv_context_counts *counts = &cm->counts.mv;
+
+ vpx_tree_merge_probs(vp10_mv_joint_tree, pre_fc->joints, counts->joints,
+ fc->joints);
+
+ for (i = 0; i < 2; ++i) {
+ nmv_component *comp = &fc->comps[i];
+ const nmv_component *pre_comp = &pre_fc->comps[i];
+ const nmv_component_counts *c = &counts->comps[i];
+
+ comp->sign = vp10_mode_mv_merge_probs(pre_comp->sign, c->sign);
+ vpx_tree_merge_probs(vp10_mv_class_tree, pre_comp->classes, c->classes,
+ comp->classes);
+ vpx_tree_merge_probs(vp10_mv_class0_tree, pre_comp->class0, c->class0,
+ comp->class0);
+
+ for (j = 0; j < MV_OFFSET_BITS; ++j)
+ comp->bits[j] = vp10_mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]);
+
+ for (j = 0; j < CLASS0_SIZE; ++j)
+ vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->class0_fp[j],
+ c->class0_fp[j], comp->class0_fp[j]);
+
+ vpx_tree_merge_probs(vp10_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
+
+ if (allow_hp) {
+ comp->class0_hp =
+ vp10_mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp);
+ comp->hp = vp10_mode_mv_merge_probs(pre_comp->hp, c->hp);
+ }
+ }
+#endif
+}
+
+void vp10_init_mv_probs(VP10_COMMON *cm) {
+#if CONFIG_REF_MV
+ int i;
+ for (i = 0; i < NMV_CONTEXTS; ++i) cm->fc->nmvc[i] = default_nmv_context;
+#else
+ cm->fc->nmvc = default_nmv_context;
+#endif
+#if CONFIG_GLOBAL_MOTION
+ vp10_copy(cm->fc->global_motion_types_prob, default_global_motion_types_prob);
+#endif // CONFIG_GLOBAL_MOTION
+}
diff --git a/av1/common/entropymv.h b/av1/common/entropymv.h
new file mode 100644
index 0000000..c809a67
--- /dev/null
+++ b/av1/common/entropymv.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ENTROPYMV_H_
+#define VP10_COMMON_ENTROPYMV_H_
+
+#include "./vpx_config.h"
+
+#include "aom_dsp/prob.h"
+
+#include "av1/common/mv.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP10Common;
+
+void vp10_init_mv_probs(struct VP10Common *cm);
+
+void vp10_adapt_mv_probs(struct VP10Common *cm, int usehp);
+int vp10_use_mv_hp(const MV *ref);
+
+#define MV_UPDATE_PROB 252
+
+/* Symbols for coding which components are zero jointly */
+#define MV_JOINTS 4
+typedef enum {
+ MV_JOINT_ZERO = 0, /* Zero vector */
+ MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */
+ MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */
+ MV_JOINT_HNZVNZ = 3, /* Both components nonzero */
+} MV_JOINT_TYPE;
+
+static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) {
+ return type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ;
+}
+
+static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) {
+ return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ;
+}
+
+/* Symbols for coding magnitude class of nonzero components */
+#define MV_CLASSES 11
+typedef enum {
+ MV_CLASS_0 = 0, /* (0, 2] integer pel */
+ MV_CLASS_1 = 1, /* (2, 4] integer pel */
+ MV_CLASS_2 = 2, /* (4, 8] integer pel */
+ MV_CLASS_3 = 3, /* (8, 16] integer pel */
+ MV_CLASS_4 = 4, /* (16, 32] integer pel */
+ MV_CLASS_5 = 5, /* (32, 64] integer pel */
+ MV_CLASS_6 = 6, /* (64, 128] integer pel */
+ MV_CLASS_7 = 7, /* (128, 256] integer pel */
+ MV_CLASS_8 = 8, /* (256, 512] integer pel */
+ MV_CLASS_9 = 9, /* (512, 1024] integer pel */
+ MV_CLASS_10 = 10, /* (1024,2048] integer pel */
+} MV_CLASS_TYPE;
+
+#define CLASS0_BITS 1 /* bits at integer precision for class 0 */
+#define CLASS0_SIZE (1 << CLASS0_BITS)
+#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2)
+#define MV_FP_SIZE 4
+
+#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2)
+#define MV_MAX ((1 << MV_MAX_BITS) - 1)
+#define MV_VALS ((MV_MAX << 1) + 1)
+
+#define MV_IN_USE_BITS 14
+#define MV_UPP ((1 << MV_IN_USE_BITS) - 1)
+#define MV_LOW (-(1 << MV_IN_USE_BITS))
+
+extern const vpx_tree_index vp10_mv_joint_tree[];
+extern const vpx_tree_index vp10_mv_class_tree[];
+extern const vpx_tree_index vp10_mv_class0_tree[];
+extern const vpx_tree_index vp10_mv_fp_tree[];
+
+typedef struct {
+ vpx_prob sign;
+ vpx_prob classes[MV_CLASSES - 1];
+ vpx_prob class0[CLASS0_SIZE - 1];
+ vpx_prob bits[MV_OFFSET_BITS];
+ vpx_prob class0_fp[CLASS0_SIZE][MV_FP_SIZE - 1];
+ vpx_prob fp[MV_FP_SIZE - 1];
+ vpx_prob class0_hp;
+ vpx_prob hp;
+} nmv_component;
+
+typedef struct {
+ vpx_prob joints[MV_JOINTS - 1];
+#if CONFIG_REF_MV
+ vpx_prob zero_rmv;
+#endif
+ nmv_component comps[2];
+} nmv_context;
+
+static INLINE MV_JOINT_TYPE vp10_get_mv_joint(const MV *mv) {
+ if (mv->row == 0) {
+ return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ;
+ } else {
+ return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ;
+ }
+}
+
+MV_CLASS_TYPE vp10_get_mv_class(int z, int *offset);
+
+typedef struct {
+ unsigned int sign[2];
+ unsigned int classes[MV_CLASSES];
+ unsigned int class0[CLASS0_SIZE];
+ unsigned int bits[MV_OFFSET_BITS][2];
+ unsigned int class0_fp[CLASS0_SIZE][MV_FP_SIZE];
+ unsigned int fp[MV_FP_SIZE];
+ unsigned int class0_hp[2];
+ unsigned int hp[2];
+} nmv_component_counts;
+
+typedef struct {
+ unsigned int joints[MV_JOINTS];
+#if CONFIG_REF_MV
+ unsigned int zero_rmv[2];
+#endif
+ nmv_component_counts comps[2];
+} nmv_context_counts;
+
+void vp10_inc_mv(const MV *mv, nmv_context_counts *mvctx, const int usehp);
+
+#if CONFIG_GLOBAL_MOTION
+extern const vpx_tree_index
+ vp10_global_motion_types_tree[TREE_SIZE(GLOBAL_MOTION_TYPES)];
+#endif // CONFIG_GLOBAL_MOTION
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_ENTROPYMV_H_
diff --git a/av1/common/enums.h b/av1/common/enums.h
new file mode 100644
index 0000000..8cdec8e
--- /dev/null
+++ b/av1/common/enums.h
@@ -0,0 +1,438 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ENUMS_H_
+#define VP10_COMMON_ENUMS_H_
+
+#include "./vpx_config.h"
+#include "aom/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#undef MAX_SB_SIZE
+
+// Max superblock size
+#if CONFIG_EXT_PARTITION
+#define MAX_SB_SIZE_LOG2 7
+#else
+#define MAX_SB_SIZE_LOG2 6
+#endif // CONFIG_EXT_PARTITION
+#define MAX_SB_SIZE (1 << MAX_SB_SIZE_LOG2)
+#define MAX_SB_SQUARE (MAX_SB_SIZE * MAX_SB_SIZE)
+
+// Min superblock size
+#define MIN_SB_SIZE_LOG2 6
+
+// Pixels per Mode Info (MI) unit
+#define MI_SIZE_LOG2 3
+#define MI_SIZE (1 << MI_SIZE_LOG2)
+
+// MI-units per max superblock (MI Block - MIB)
+#define MAX_MIB_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2)
+#define MAX_MIB_SIZE (1 << MAX_MIB_SIZE_LOG2)
+
+// MI-units per min superblock
+#define MIN_MIB_SIZE_LOG2 (MIN_SB_SIZE_LOG2 - MI_SIZE_LOG2)
+
+// Mask to extract MI offset within max MIB
+#define MAX_MIB_MASK (MAX_MIB_SIZE - 1)
+#define MAX_MIB_MASK_2 (MAX_MIB_SIZE * 2 - 1)
+
+// Maximum number of tile rows and tile columns
+#if CONFIG_EXT_TILE
+#define MAX_TILE_ROWS 1024
+#define MAX_TILE_COLS 1024
+#else
+#define MAX_TILE_ROWS 4
+#define MAX_TILE_COLS 64
+#endif // CONFIG_EXT_TILE
+
+// Bitstream profiles indicated by 2-3 bits in the uncompressed header.
+// 00: Profile 0. 8-bit 4:2:0 only.
+// 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0.
+// 01: Profile 2. 10-bit and 12-bit color only, with 4:2:0 sampling.
+// 110: Profile 3. 10-bit and 12-bit color only, with 4:2:2/4:4:4/4:4:0
+// sampling.
+// 111: Undefined profile.
+typedef enum BITSTREAM_PROFILE {
+ PROFILE_0,
+ PROFILE_1,
+ PROFILE_2,
+ PROFILE_3,
+ MAX_PROFILES
+} BITSTREAM_PROFILE;
+
+#define BLOCK_4X4 0
+#define BLOCK_4X8 1
+#define BLOCK_8X4 2
+#define BLOCK_8X8 3
+#define BLOCK_8X16 4
+#define BLOCK_16X8 5
+#define BLOCK_16X16 6
+#define BLOCK_16X32 7
+#define BLOCK_32X16 8
+#define BLOCK_32X32 9
+#define BLOCK_32X64 10
+#define BLOCK_64X32 11
+#define BLOCK_64X64 12
+#if !CONFIG_EXT_PARTITION
+#define BLOCK_SIZES 13
+#else
+#define BLOCK_64X128 13
+#define BLOCK_128X64 14
+#define BLOCK_128X128 15
+#define BLOCK_SIZES 16
+#endif // !CONFIG_EXT_PARTITION
+#define BLOCK_INVALID BLOCK_SIZES
+#define BLOCK_LARGEST (BLOCK_SIZES - 1)
+typedef uint8_t BLOCK_SIZE;
+
+#if CONFIG_EXT_PARTITION_TYPES
+typedef enum PARTITION_TYPE {
+ PARTITION_NONE,
+ PARTITION_HORZ,
+ PARTITION_VERT,
+ PARTITION_SPLIT,
+ PARTITION_HORZ_A, // HORZ split and the left partition is split again
+ PARTITION_HORZ_B, // HORZ split and the right partition is split again
+ PARTITION_VERT_A, // VERT split and the top partition is split again
+ PARTITION_VERT_B, // VERT split and the bottom partition is split again
+ EXT_PARTITION_TYPES,
+ PARTITION_TYPES = PARTITION_SPLIT + 1,
+ PARTITION_INVALID = EXT_PARTITION_TYPES
+} PARTITION_TYPE;
+#else
+typedef enum PARTITION_TYPE {
+ PARTITION_NONE,
+ PARTITION_HORZ,
+ PARTITION_VERT,
+ PARTITION_SPLIT,
+ PARTITION_TYPES,
+ PARTITION_INVALID = PARTITION_TYPES
+} PARTITION_TYPE;
+#endif // CONFIG_EXT_PARTITION_TYPES
+
+typedef char PARTITION_CONTEXT;
+#define PARTITION_PLOFFSET 4 // number of probability models per block size
+#if CONFIG_EXT_PARTITION
+#define PARTITION_CONTEXTS (5 * PARTITION_PLOFFSET)
+#else
+#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
+#endif // CONFIG_EXT_PARTITION
+
+// block transform size
+typedef uint8_t TX_SIZE;
+#define TX_4X4 ((TX_SIZE)0) // 4x4 transform
+#define TX_8X8 ((TX_SIZE)1) // 8x8 transform
+#define TX_16X16 ((TX_SIZE)2) // 16x16 transform
+#define TX_32X32 ((TX_SIZE)3) // 32x32 transform
+#define TX_SIZES ((TX_SIZE)4)
+
+#if CONFIG_EXT_TX
+#define TX_4X8 ((TX_SIZE)4) // 4x8 transform
+#define TX_8X4 ((TX_SIZE)5) // 8x4 transform
+#define TX_8X16 ((TX_SIZE)6) // 8x16 transform
+#define TX_16X8 ((TX_SIZE)7) // 16x8 transform
+#define TX_16X32 ((TX_SIZE)8) // 16x32 transform
+#define TX_32X16 ((TX_SIZE)9) // 32x16 transform
+#define TX_SIZES_ALL ((TX_SIZE)10) // Includes rectangular transforms
+#else
+#define TX_SIZES_ALL ((TX_SIZE)4)
+#endif // CONFIG_EXT_TX
+
+#define MAX_TX_SIZE_LOG2 5
+#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
+#define MIN_TX_SIZE_LOG2 2
+#define MIN_TX_SIZE (1 << MIN_TX_SIZE_LOG2)
+#define MAX_TX_SQUARE (MAX_TX_SIZE * MAX_TX_SIZE)
+
+// Number of maxium size transform blocks in the maximum size superblock
+#define MAX_TX_BLOCKS_IN_MAX_SB_LOG2 ((MAX_SB_SIZE_LOG2 - MAX_TX_SIZE_LOG2) * 2)
+#define MAX_TX_BLOCKS_IN_MAX_SB (1 << MAX_TX_BLOCKS_IN_MAX_SB_LOG2)
+
+#define MAX_NUM_TXB (1 << (MAX_SB_SIZE_LOG2 - MIN_TX_SIZE_LOG2))
+
+// frame transform mode
+typedef enum {
+ ONLY_4X4 = 0, // only 4x4 transform used
+ ALLOW_8X8 = 1, // allow block transform size up to 8x8
+ ALLOW_16X16 = 2, // allow block transform size up to 16x16
+ ALLOW_32X32 = 3, // allow block transform size up to 32x32
+ TX_MODE_SELECT = 4, // transform specified for each block
+ TX_MODES = 5,
+} TX_MODE;
+
+// 1D tx types
+typedef enum {
+ DCT_1D = 0,
+ ADST_1D = 1,
+ FLIPADST_1D = 2,
+ IDTX_1D = 3,
+ TX_TYPES_1D = 4,
+} TX_TYPE_1D;
+
+typedef enum {
+ DCT_DCT = 0, // DCT in both horizontal and vertical
+ ADST_DCT = 1, // ADST in vertical, DCT in horizontal
+ DCT_ADST = 2, // DCT in vertical, ADST in horizontal
+ ADST_ADST = 3, // ADST in both directions
+#if CONFIG_EXT_TX
+ FLIPADST_DCT = 4,
+ DCT_FLIPADST = 5,
+ FLIPADST_FLIPADST = 6,
+ ADST_FLIPADST = 7,
+ FLIPADST_ADST = 8,
+ IDTX = 9,
+ V_DCT = 10,
+ H_DCT = 11,
+ V_ADST = 12,
+ H_ADST = 13,
+ V_FLIPADST = 14,
+ H_FLIPADST = 15,
+#endif // CONFIG_EXT_TX
+ TX_TYPES,
+} TX_TYPE;
+
+#if CONFIG_EXT_TX
+#define EXT_TX_SIZES 4 // number of sizes that use extended transforms
+#define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER
+#define EXT_TX_SETS_INTRA 3 // Sets of transform selections for INTRA
+#else
+#define EXT_TX_SIZES 3 // number of sizes that use extended transforms
+#endif // CONFIG_EXT_TX
+
+typedef enum {
+ VPX_LAST_FLAG = 1 << 0,
+#if CONFIG_EXT_REFS
+ VPX_LAST2_FLAG = 1 << 1,
+ VPX_LAST3_FLAG = 1 << 2,
+ VPX_GOLD_FLAG = 1 << 3,
+ VPX_BWD_FLAG = 1 << 4,
+ VPX_ALT_FLAG = 1 << 5,
+ VPX_REFFRAME_ALL = (1 << 6) - 1
+#else
+ VPX_GOLD_FLAG = 1 << 1,
+ VPX_ALT_FLAG = 1 << 2,
+ VPX_REFFRAME_ALL = (1 << 3) - 1
+#endif // CONFIG_EXT_REFS
+} VPX_REFFRAME;
+
+typedef enum { PLANE_TYPE_Y = 0, PLANE_TYPE_UV = 1, PLANE_TYPES } PLANE_TYPE;
+
+typedef enum {
+ TWO_COLORS,
+ THREE_COLORS,
+ FOUR_COLORS,
+ FIVE_COLORS,
+ SIX_COLORS,
+ SEVEN_COLORS,
+ EIGHT_COLORS,
+ PALETTE_SIZES
+} PALETTE_SIZE;
+
+typedef enum {
+ PALETTE_COLOR_ONE,
+ PALETTE_COLOR_TWO,
+ PALETTE_COLOR_THREE,
+ PALETTE_COLOR_FOUR,
+ PALETTE_COLOR_FIVE,
+ PALETTE_COLOR_SIX,
+ PALETTE_COLOR_SEVEN,
+ PALETTE_COLOR_EIGHT,
+ PALETTE_COLORS
+} PALETTE_COLOR;
+
+#define DC_PRED 0 // Average of above and left pixels
+#define V_PRED 1 // Vertical
+#define H_PRED 2 // Horizontal
+#define D45_PRED 3 // Directional 45 deg = round(arctan(1/1) * 180/pi)
+#define D135_PRED 4 // Directional 135 deg = 180 - 45
+#define D117_PRED 5 // Directional 117 deg = 180 - 63
+#define D153_PRED 6 // Directional 153 deg = 180 - 27
+#define D207_PRED 7 // Directional 207 deg = 180 + 27
+#define D63_PRED 8 // Directional 63 deg = round(arctan(2/1) * 180/pi)
+#define TM_PRED 9 // True-motion
+#define NEARESTMV 10
+#define NEARMV 11
+#define ZEROMV 12
+#define NEWMV 13
+#if CONFIG_EXT_INTER
+#define NEWFROMNEARMV 14
+#define NEAREST_NEARESTMV 15
+#define NEAREST_NEARMV 16
+#define NEAR_NEARESTMV 17
+#define NEAR_NEARMV 18
+#define NEAREST_NEWMV 19
+#define NEW_NEARESTMV 20
+#define NEAR_NEWMV 21
+#define NEW_NEARMV 22
+#define ZERO_ZEROMV 23
+#define NEW_NEWMV 24
+#define MB_MODE_COUNT 25
+#else
+#define MB_MODE_COUNT 14
+#endif // CONFIG_EXT_INTER
+typedef uint8_t PREDICTION_MODE;
+
+#define INTRA_MODES (TM_PRED + 1)
+
+typedef enum {
+ SIMPLE_TRANSLATION = 0,
+#if CONFIG_OBMC
+ OBMC_CAUSAL, // 2-sided OBMC
+#endif // CONFIG_OBMC
+#if CONFIG_WARPED_MOTION
+ WARPED_CAUSAL, // 2-sided WARPED
+#endif // CONFIG_WARPED_MOTION
+ MOTION_VARIATIONS
+} MOTION_VARIATION;
+
+#if CONFIG_EXT_INTER
+typedef enum {
+ II_DC_PRED = 0,
+ II_V_PRED,
+ II_H_PRED,
+ II_D45_PRED,
+ II_D135_PRED,
+ II_D117_PRED,
+ II_D153_PRED,
+ II_D207_PRED,
+ II_D63_PRED,
+ II_TM_PRED,
+ INTERINTRA_MODES
+} INTERINTRA_MODE;
+
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_EXT_INTRA
+typedef enum {
+ FILTER_DC_PRED,
+ FILTER_V_PRED,
+ FILTER_H_PRED,
+ FILTER_D45_PRED,
+ FILTER_D135_PRED,
+ FILTER_D117_PRED,
+ FILTER_D153_PRED,
+ FILTER_D207_PRED,
+ FILTER_D63_PRED,
+ FILTER_TM_PRED,
+ EXT_INTRA_MODES,
+} EXT_INTRA_MODE;
+
+#define FILTER_INTRA_MODES (FILTER_TM_PRED + 1)
+#define DIRECTIONAL_MODES (INTRA_MODES - 2)
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTER
+#define INTER_MODES (1 + NEWFROMNEARMV - NEARESTMV)
+#else
+#define INTER_MODES (1 + NEWMV - NEARESTMV)
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_EXT_INTER
+#define INTER_COMPOUND_MODES (1 + NEW_NEWMV - NEAREST_NEARESTMV)
+#endif // CONFIG_EXT_INTER
+
+#define SKIP_CONTEXTS 3
+
+#if CONFIG_REF_MV
+#define NMV_CONTEXTS 3
+
+#define NEWMV_MODE_CONTEXTS 7
+#define ZEROMV_MODE_CONTEXTS 2
+#define REFMV_MODE_CONTEXTS 9
+#define DRL_MODE_CONTEXTS 5
+
+#define ZEROMV_OFFSET 3
+#define REFMV_OFFSET 4
+
+#define NEWMV_CTX_MASK ((1 << ZEROMV_OFFSET) - 1)
+#define ZEROMV_CTX_MASK ((1 << (REFMV_OFFSET - ZEROMV_OFFSET)) - 1)
+#define REFMV_CTX_MASK ((1 << (8 - REFMV_OFFSET)) - 1)
+
+#define ALL_ZERO_FLAG_OFFSET 8
+#define SKIP_NEARESTMV_OFFSET 9
+#define SKIP_NEARMV_OFFSET 10
+#define SKIP_NEARESTMV_SUB8X8_OFFSET 11
+#endif
+
+#define INTER_MODE_CONTEXTS 7
+
+/* Segment Feature Masks */
+#define MAX_MV_REF_CANDIDATES 2
+
+#if CONFIG_REF_MV
+#define MAX_REF_MV_STACK_SIZE 16
+#if CONFIG_EXT_PARTITION
+#define REF_CAT_LEVEL 640
+#else
+#define REF_CAT_LEVEL 160
+#endif // CONFIG_EXT_PARTITION
+#endif // CONFIG_REF_MV
+
+#define INTRA_INTER_CONTEXTS 4
+#define COMP_INTER_CONTEXTS 5
+#define REF_CONTEXTS 5
+
+#if CONFIG_VAR_TX
+#define TXFM_PARTITION_CONTEXTS 9
+typedef TX_SIZE TXFM_CONTEXT;
+#endif
+
+#define NONE -1
+#define INTRA_FRAME 0
+#define LAST_FRAME 1
+
+#if CONFIG_EXT_REFS
+#define LAST2_FRAME 2
+#define LAST3_FRAME 3
+#define GOLDEN_FRAME 4
+#define BWDREF_FRAME 5
+#define ALTREF_FRAME 6
+#define LAST_REF_FRAMES (LAST3_FRAME - LAST_FRAME + 1)
+#else
+#define GOLDEN_FRAME 2
+#define ALTREF_FRAME 3
+#endif // CONFIG_EXT_REFS
+
+#define INTER_REFS_PER_FRAME (ALTREF_FRAME - LAST_FRAME + 1)
+#define TOTAL_REFS_PER_FRAME (ALTREF_FRAME - INTRA_FRAME + 1)
+
+#define FWD_REFS (GOLDEN_FRAME - LAST_FRAME + 1)
+#define FWD_RF_OFFSET(ref) (ref - LAST_FRAME)
+#if CONFIG_EXT_REFS
+#define BWD_REFS (ALTREF_FRAME - BWDREF_FRAME + 1)
+#define BWD_RF_OFFSET(ref) (ref - BWDREF_FRAME)
+#else
+#define BWD_REFS 1
+#define BWD_RF_OFFSET(ref) (ref - ALTREF_FRAME)
+#endif // CONFIG_EXT_REFS
+
+#define SINGLE_REFS (FWD_REFS + BWD_REFS)
+#define COMP_REFS (FWD_REFS * BWD_REFS)
+
+#if CONFIG_REF_MV
+#define MODE_CTX_REF_FRAMES (TOTAL_REFS_PER_FRAME + COMP_REFS)
+#else
+#define MODE_CTX_REF_FRAMES TOTAL_REFS_PER_FRAME
+#endif
+
+#if CONFIG_SUPERTX
+#define PARTITION_SUPERTX_CONTEXTS 2
+#define MAX_SUPERTX_BLOCK_SIZE BLOCK_32X32
+#endif // CONFIG_SUPERTX
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_ENUMS_H_
diff --git a/av1/common/filter.c b/av1/common/filter.c
new file mode 100644
index 0000000..46eca5d
--- /dev/null
+++ b/av1/common/filter.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/filter.h"
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+ bilinear_filters[SUBPEL_SHIFTS]) = {
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 },
+ { 0, 0, 0, 112, 16, 0, 0, 0 }, { 0, 0, 0, 104, 24, 0, 0, 0 },
+ { 0, 0, 0, 96, 32, 0, 0, 0 }, { 0, 0, 0, 88, 40, 0, 0, 0 },
+ { 0, 0, 0, 80, 48, 0, 0, 0 }, { 0, 0, 0, 72, 56, 0, 0, 0 },
+ { 0, 0, 0, 64, 64, 0, 0, 0 }, { 0, 0, 0, 56, 72, 0, 0, 0 },
+ { 0, 0, 0, 48, 80, 0, 0, 0 }, { 0, 0, 0, 40, 88, 0, 0, 0 },
+ { 0, 0, 0, 32, 96, 0, 0, 0 }, { 0, 0, 0, 24, 104, 0, 0, 0 },
+ { 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 }
+};
+
+#if USE_TEMPORALFILTER_12TAP
+DECLARE_ALIGNED(16, static const int16_t,
+ sub_pel_filters_temporalfilter_12[SUBPEL_SHIFTS][12]) = {
+ // intfilt 0.8
+ { 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 },
+ { 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0 },
+ { 0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0 },
+ { -1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1 },
+ { -1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1 },
+ { -1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1 },
+ { -1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1 },
+ { -1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1 },
+ { -1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1 },
+ { -1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1 },
+ { -1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1 },
+ { -1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1 },
+ { -1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1 },
+ { -1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1 },
+ { 0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0 },
+ { 0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0 },
+};
+#endif // USE_TEMPORALFILTER_12TAP
+
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8[SUBPEL_SHIFTS]) = {
+ // intfilt 0.575
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 1, -5, 126, 8, -3, 1, 0 },
+ { -1, 3, -10, 123, 18, -6, 2, -1 }, { -1, 4, -14, 118, 27, -9, 3, 0 },
+ { -1, 5, -16, 112, 37, -12, 4, -1 }, { -1, 5, -18, 105, 48, -14, 4, -1 },
+ { -1, 6, -19, 97, 58, -17, 5, -1 }, { -1, 6, -20, 88, 68, -18, 6, -1 },
+ { -1, 6, -19, 78, 78, -19, 6, -1 }, { -1, 6, -18, 68, 88, -20, 6, -1 },
+ { -1, 5, -17, 58, 97, -19, 6, -1 }, { -1, 4, -14, 48, 105, -18, 5, -1 },
+ { -1, 4, -12, 37, 112, -16, 5, -1 }, { 0, 3, -9, 27, 118, -14, 4, -1 },
+ { -1, 2, -6, 18, 123, -10, 3, -1 }, { 0, 1, -3, 8, 126, -5, 1, 0 },
+};
+
+#if CONFIG_EXT_INTRA
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
+ // intfilt 0.8
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { -1, 2, -6, 127, 9, -4, 2, -1 },
+ { -2, 5, -12, 124, 18, -7, 4, -2 }, { -2, 7, -16, 119, 28, -11, 5, -2 },
+ { -3, 8, -19, 114, 38, -14, 7, -3 }, { -3, 9, -22, 107, 49, -17, 8, -3 },
+ { -4, 10, -23, 99, 60, -20, 10, -4 }, { -4, 11, -23, 90, 70, -22, 10, -4 },
+ { -4, 11, -23, 80, 80, -23, 11, -4 }, { -4, 10, -22, 70, 90, -23, 11, -4 },
+ { -4, 10, -20, 60, 99, -23, 10, -4 }, { -3, 8, -17, 49, 107, -22, 9, -3 },
+ { -3, 7, -14, 38, 114, -19, 8, -3 }, { -2, 5, -11, 28, 119, -16, 7, -2 },
+ { -2, 4, -7, 18, 124, -12, 5, -2 }, { -1, 2, -4, 9, 127, -6, 2, -1 },
+};
+#endif // CONFIG_EXT_INTRA
+
+DECLARE_ALIGNED(256, static const int16_t,
+ sub_pel_filters_10sharp[SUBPEL_SHIFTS][10]) = {
+ // intfilt 0.77
+ { 0, 0, 0, 0, 128, 0, 0, 0, 0, 0 },
+ { 0, -1, 3, -6, 127, 8, -4, 2, -1, 0 },
+ { 1, -2, 5, -12, 124, 18, -7, 3, -2, 0 },
+ { 1, -3, 7, -17, 119, 28, -11, 5, -2, 1 },
+ { 1, -4, 8, -20, 114, 38, -14, 7, -3, 1 },
+ { 1, -4, 9, -22, 107, 49, -17, 8, -4, 1 },
+ { 2, -5, 10, -24, 99, 59, -20, 9, -4, 2 },
+ { 2, -5, 10, -24, 90, 70, -22, 10, -5, 2 },
+ { 2, -5, 10, -23, 80, 80, -23, 10, -5, 2 },
+ { 2, -5, 10, -22, 70, 90, -24, 10, -5, 2 },
+ { 2, -4, 9, -20, 59, 99, -24, 10, -5, 2 },
+ { 1, -4, 8, -17, 49, 107, -22, 9, -4, 1 },
+ { 1, -3, 7, -14, 38, 114, -20, 8, -4, 1 },
+ { 1, -2, 5, -11, 28, 119, -17, 7, -3, 1 },
+ { 0, -2, 3, -7, 18, 124, -12, 5, -2, 1 },
+ { 0, -1, 2, -4, 8, 127, -6, 3, -1, 0 },
+};
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8smooth2[SUBPEL_SHIFTS]) = {
+ // freqmultiplier = 0.35
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { -1, 8, 31, 47, 34, 10, 0, -1 },
+ { -1, 7, 29, 46, 36, 12, 0, -1 }, { -1, 6, 28, 46, 37, 13, 0, -1 },
+ { -1, 5, 26, 46, 38, 14, 1, -1 }, { -1, 4, 25, 45, 39, 16, 1, -1 },
+ { -1, 4, 23, 44, 41, 17, 1, -1 }, { -1, 3, 21, 44, 42, 18, 2, -1 },
+ { -1, 2, 20, 43, 43, 20, 2, -1 }, { -1, 2, 18, 42, 44, 21, 3, -1 },
+ { -1, 1, 17, 41, 44, 23, 4, -1 }, { -1, 1, 16, 39, 45, 25, 4, -1 },
+ { -1, 1, 14, 38, 46, 26, 5, -1 }, { -1, 0, 13, 37, 46, 28, 6, -1 },
+ { -1, 0, 12, 36, 46, 29, 7, -1 }, { -1, 0, 10, 34, 47, 31, 8, -1 },
+};
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
+ // freqmultiplier = 0.75
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { 2, -10, 19, 95, 31, -11, 2, 0 },
+ { 2, -9, 14, 94, 37, -12, 2, 0 }, { 2, -8, 9, 92, 43, -12, 1, 1 },
+ { 2, -7, 5, 90, 49, -12, 1, 0 }, { 2, -5, 1, 86, 55, -12, 0, 1 },
+ { 1, -4, -2, 82, 61, -11, 0, 1 }, { 1, -3, -5, 77, 67, -9, -1, 1 },
+ { 1, -2, -7, 72, 72, -7, -2, 1 }, { 1, -1, -9, 67, 77, -5, -3, 1 },
+ { 1, 0, -11, 61, 82, -2, -4, 1 }, { 1, 0, -12, 55, 86, 1, -5, 2 },
+ { 0, 1, -12, 49, 90, 5, -7, 2 }, { 1, 1, -12, 43, 92, 9, -8, 2 },
+ { 0, 2, -12, 37, 94, 14, -9, 2 }, { 0, 2, -11, 31, 95, 19, -10, 2 },
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ sub_pel_filters_12sharp[SUBPEL_SHIFTS][12]) = {
+ // intfilt 0.85
+ { 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 },
+ { 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0 },
+ { -1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1 },
+ { -1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1 },
+ { -1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1 },
+ { -2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2 },
+ { -2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2 },
+ { -2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2 },
+ { -2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2 },
+ { -2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2 },
+ { -2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2 },
+ { -2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2 },
+ { -1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1 },
+ { -1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1 },
+ { -1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1 },
+ { 0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0 },
+};
+#else // CONFIG_EXT_INTERP
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8[SUBPEL_SHIFTS]) = {
+ // Lagrangian interpolation filter
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 1, -5, 126, 8, -3, 1, 0 },
+ { -1, 3, -10, 122, 18, -6, 2, 0 }, { -1, 4, -13, 118, 27, -9, 3, -1 },
+ { -1, 4, -16, 112, 37, -11, 4, -1 }, { -1, 5, -18, 105, 48, -14, 4, -1 },
+ { -1, 5, -19, 97, 58, -16, 5, -1 }, { -1, 6, -19, 88, 68, -18, 5, -1 },
+ { -1, 6, -19, 78, 78, -19, 6, -1 }, { -1, 5, -18, 68, 88, -19, 6, -1 },
+ { -1, 5, -16, 58, 97, -19, 5, -1 }, { -1, 4, -14, 48, 105, -18, 5, -1 },
+ { -1, 4, -11, 37, 112, -16, 4, -1 }, { -1, 3, -9, 27, 118, -13, 4, -1 },
+ { 0, 2, -6, 18, 122, -10, 3, -1 }, { 0, 1, -3, 8, 126, -5, 1, 0 }
+};
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
+ // DCT based filter
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { -1, 3, -7, 127, 8, -3, 1, 0 },
+ { -2, 5, -13, 125, 17, -6, 3, -1 }, { -3, 7, -17, 121, 27, -10, 5, -2 },
+ { -4, 9, -20, 115, 37, -13, 6, -2 }, { -4, 10, -23, 108, 48, -16, 8, -3 },
+ { -4, 10, -24, 100, 59, -19, 9, -3 }, { -4, 11, -24, 90, 70, -21, 10, -4 },
+ { -4, 11, -23, 80, 80, -23, 11, -4 }, { -4, 10, -21, 70, 90, -24, 11, -4 },
+ { -3, 9, -19, 59, 100, -24, 10, -4 }, { -3, 8, -16, 48, 108, -23, 10, -4 },
+ { -2, 6, -13, 37, 115, -20, 9, -4 }, { -2, 5, -10, 27, 121, -17, 7, -3 },
+ { -1, 3, -6, 17, 125, -13, 5, -2 }, { 0, 1, -3, 8, 127, -7, 3, -1 }
+};
+
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
+ // freqmultiplier = 0.5
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { -3, -1, 32, 64, 38, 1, -3, 0 },
+ { -2, -2, 29, 63, 41, 2, -3, 0 }, { -2, -2, 26, 63, 43, 4, -4, 0 },
+ { -2, -3, 24, 62, 46, 5, -4, 0 }, { -2, -3, 21, 60, 49, 7, -4, 0 },
+ { -1, -4, 18, 59, 51, 9, -4, 0 }, { -1, -4, 16, 57, 53, 12, -4, -1 },
+ { -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 12, 53, 57, 16, -4, -1 },
+ { 0, -4, 9, 51, 59, 18, -4, -1 }, { 0, -4, 7, 49, 60, 21, -3, -2 },
+ { 0, -4, 5, 46, 62, 24, -3, -2 }, { 0, -4, 4, 43, 63, 26, -2, -2 },
+ { 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 1, 38, 64, 32, -1, -3 }
+};
+#endif // CONFIG_EXT_INTERP
+
+#if CONFIG_EXT_INTRA
+const InterpKernel *vp10_intra_filter_kernels[INTRA_FILTERS] = {
+ bilinear_filters, // INTRA_FILTER_LINEAR
+ sub_pel_filters_8, // INTRA_FILTER_8TAP
+ sub_pel_filters_8sharp, // INTRA_FILTER_8TAP_SHARP
+ sub_pel_filters_8smooth, // INTRA_FILTER_8TAP_SMOOTH
+};
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTERP
+static const InterpFilterParams
+ vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
+ { (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS },
+ { (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS },
+ { (const int16_t *)sub_pel_filters_10sharp, 10, SUBPEL_SHIFTS },
+ { (const int16_t *)sub_pel_filters_8smooth2, SUBPEL_TAPS, SUBPEL_SHIFTS },
+ { (const int16_t *)sub_pel_filters_12sharp, 12, SUBPEL_SHIFTS },
+ { (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS }
+ };
+#else
+static const InterpFilterParams
+ vp10_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
+ { (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS },
+ { (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS },
+ { (const int16_t *)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS },
+ { (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS }
+ };
+#endif // CONFIG_EXT_INTERP
+
+#if USE_TEMPORALFILTER_12TAP
+static const InterpFilterParams vp10_interp_temporalfilter_12tap = {
+ (const int16_t *)sub_pel_filters_temporalfilter_12, 12, SUBPEL_SHIFTS
+};
+#endif // USE_TEMPORALFILTER_12TAP
+
+InterpFilterParams vp10_get_interp_filter_params(
+ const INTERP_FILTER interp_filter) {
+#if USE_TEMPORALFILTER_12TAP
+ if (interp_filter == TEMPORALFILTER_12TAP)
+ return vp10_interp_temporalfilter_12tap;
+#endif // USE_TEMPORALFILTER_12TAP
+ return vp10_interp_filter_params_list[interp_filter];
+}
+
+const int16_t *vp10_get_interp_filter_kernel(
+ const INTERP_FILTER interp_filter) {
+#if USE_TEMPORALFILTER_12TAP
+ if (interp_filter == TEMPORALFILTER_12TAP)
+ return vp10_interp_temporalfilter_12tap.filter_ptr;
+#endif // USE_TEMPORALFILTER_12TAP
+ return (const int16_t *)vp10_interp_filter_params_list[interp_filter]
+ .filter_ptr;
+}
+
+SubpelFilterCoeffs vp10_get_subpel_filter_signal_dir(const InterpFilterParams p,
+ int index) {
+#if CONFIG_EXT_INTERP && HAVE_SSSE3
+ if (p.filter_ptr == (const int16_t *)sub_pel_filters_12sharp) {
+ return &sub_pel_filters_12sharp_signal_dir[index][0];
+ }
+ if (p.filter_ptr == (const int16_t *)sub_pel_filters_10sharp) {
+ return &sub_pel_filters_10sharp_signal_dir[index][0];
+ }
+#endif
+#if USE_TEMPORALFILTER_12TAP && HAVE_SSSE3
+ if (p.filter_ptr == (const int16_t *)sub_pel_filters_temporalfilter_12) {
+ return &sub_pel_filters_temporalfilter_12_signal_dir[index][0];
+ }
+#endif
+ (void)p;
+ (void)index;
+ return NULL;
+}
+
+SubpelFilterCoeffs vp10_get_subpel_filter_ver_signal_dir(
+ const InterpFilterParams p, int index) {
+#if CONFIG_EXT_INTERP && HAVE_SSSE3
+ if (p.filter_ptr == (const int16_t *)sub_pel_filters_12sharp) {
+ return &sub_pel_filters_12sharp_ver_signal_dir[index][0];
+ }
+ if (p.filter_ptr == (const int16_t *)sub_pel_filters_10sharp) {
+ return &sub_pel_filters_10sharp_ver_signal_dir[index][0];
+ }
+#endif
+#if USE_TEMPORALFILTER_12TAP && HAVE_SSSE3
+ if (p.filter_ptr == (const int16_t *)sub_pel_filters_temporalfilter_12) {
+ return &sub_pel_filters_temporalfilter_12_ver_signal_dir[index][0];
+ }
+#endif
+ (void)p;
+ (void)index;
+ return NULL;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+HbdSubpelFilterCoeffs vp10_hbd_get_subpel_filter_ver_signal_dir(
+ const InterpFilterParams p, int index) {
+#if CONFIG_EXT_INTERP && HAVE_SSE4_1
+ if (p.filter_ptr == (const int16_t *)sub_pel_filters_12sharp) {
+ return &sub_pel_filters_12sharp_highbd_ver_signal_dir[index][0];
+ }
+ if (p.filter_ptr == (const int16_t *)sub_pel_filters_10sharp) {
+ return &sub_pel_filters_10sharp_highbd_ver_signal_dir[index][0];
+ }
+#endif
+#if USE_TEMPORALFILTER_12TAP && HAVE_SSE4_1
+ if (p.filter_ptr == (const int16_t *)sub_pel_filters_temporalfilter_12) {
+ return &sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[index][0];
+ }
+#endif
+ (void)p;
+ (void)index;
+ return NULL;
+}
+#endif
diff --git a/av1/common/filter.h b/av1/common/filter.h
new file mode 100644
index 0000000..39fad23
--- /dev/null
+++ b/av1/common/filter.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_FILTER_H_
+#define VP10_COMMON_FILTER_H_
+
+#include "./vpx_config.h"
+#include "aom/vpx_integer.h"
+#include "aom_dsp/vpx_filter.h"
+#include "aom_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define EIGHTTAP_REGULAR 0
+#define EIGHTTAP_SMOOTH 1
+#define MULTITAP_SHARP 2
+
+#if CONFIG_EXT_INTERP
+#define EIGHTTAP_SMOOTH2 3
+#define MULTITAP_SHARP2 4
+
+#define MAX_SUBPEL_TAPS 12
+
+#define SUPPORT_NONINTERPOLATING_FILTERS 0 /* turn on for experimentation */
+#define SWITCHABLE_FILTERS 5 /* Number of switchable filters */
+#else
+#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */
+#endif // CONFIG_EXT_INTERP
+
+#define USE_TEMPORALFILTER_12TAP 1
+#if USE_TEMPORALFILTER_12TAP
+#define TEMPORALFILTER_12TAP (SWITCHABLE_FILTERS + 1)
+#endif
+
+// The codec can operate in four possible inter prediction filter mode:
+// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
+
+#define BILINEAR (SWITCHABLE_FILTERS)
+#define SWITCHABLE (SWITCHABLE_FILTERS + 1) /* the last one */
+#if CONFIG_DUAL_FILTER
+#define SWITCHABLE_FILTER_CONTEXTS ((SWITCHABLE_FILTERS + 1) * 4)
+#define INTER_FILTER_COMP_OFFSET (SWITCHABLE_FILTERS + 1)
+#define INTER_FILTER_DIR_OFFSET ((SWITCHABLE_FILTERS + 1) * 2)
+#else
+#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
+#endif
+
+typedef uint8_t INTERP_FILTER;
+
+#if CONFIG_EXT_INTRA
+typedef enum {
+ INTRA_FILTER_LINEAR,
+ INTRA_FILTER_8TAP,
+ INTRA_FILTER_8TAP_SHARP,
+ INTRA_FILTER_8TAP_SMOOTH,
+ INTRA_FILTERS,
+} INTRA_FILTER;
+
+extern const InterpKernel *vp10_intra_filter_kernels[INTRA_FILTERS];
+#endif // CONFIG_EXT_INTRA
+
+typedef struct InterpFilterParams {
+ const int16_t *filter_ptr;
+ uint16_t taps;
+ uint16_t subpel_shifts;
+} InterpFilterParams;
+
+InterpFilterParams vp10_get_interp_filter_params(
+ const INTERP_FILTER interp_filter);
+
+const int16_t *vp10_get_interp_filter_kernel(const INTERP_FILTER interp_filter);
+
+static INLINE const int16_t *vp10_get_interp_filter_subpel_kernel(
+ const InterpFilterParams filter_params, const int subpel) {
+ return filter_params.filter_ptr + filter_params.taps * subpel;
+}
+
+static INLINE int vp10_is_interpolating_filter(
+ const INTERP_FILTER interp_filter) {
+ const InterpFilterParams ip = vp10_get_interp_filter_params(interp_filter);
+ return (ip.filter_ptr[ip.taps / 2 - 1] == 128);
+}
+
+#if USE_TEMPORALFILTER_12TAP
+extern const int8_t sub_pel_filters_temporalfilter_12_signal_dir[15][2][16];
+extern const int8_t sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6][16];
+#if CONFIG_VP9_HIGHBITDEPTH
+extern const int16_t
+ sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8];
+#endif
+#endif
+
+#if CONFIG_EXT_INTERP
+extern const int8_t sub_pel_filters_12sharp_signal_dir[15][2][16];
+extern const int8_t sub_pel_filters_10sharp_signal_dir[15][2][16];
+extern const int8_t sub_pel_filters_12sharp_ver_signal_dir[15][6][16];
+extern const int8_t sub_pel_filters_10sharp_ver_signal_dir[15][6][16];
+#if CONFIG_VP9_HIGHBITDEPTH
+extern const int16_t sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8];
+extern const int16_t sub_pel_filters_10sharp_highbd_ver_signal_dir[15][6][8];
+#endif
+#endif
+
+typedef const int8_t (*SubpelFilterCoeffs)[16];
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef const int16_t (*HbdSubpelFilterCoeffs)[8];
+#endif
+
+SubpelFilterCoeffs vp10_get_subpel_filter_signal_dir(const InterpFilterParams p,
+ int index);
+
+SubpelFilterCoeffs vp10_get_subpel_filter_ver_signal_dir(
+ const InterpFilterParams p, int index);
+#if CONFIG_VP9_HIGHBITDEPTH
+HbdSubpelFilterCoeffs vp10_hbd_get_subpel_filter_ver_signal_dir(
+ const InterpFilterParams p, int index);
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_FILTER_H_
diff --git a/av1/common/frame_buffers.c b/av1/common/frame_buffers.c
new file mode 100644
index 0000000..5c736a9
--- /dev/null
+++ b/av1/common/frame_buffers.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/frame_buffers.h"
+#include "aom_mem/vpx_mem.h"
+
+int vp10_alloc_internal_frame_buffers(InternalFrameBufferList *list) {
+ assert(list != NULL);
+ vp10_free_internal_frame_buffers(list);
+
+ list->num_internal_frame_buffers =
+ VPX_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
+ list->int_fb = (InternalFrameBuffer *)vpx_calloc(
+ list->num_internal_frame_buffers, sizeof(*list->int_fb));
+ return (list->int_fb == NULL);
+}
+
+void vp10_free_internal_frame_buffers(InternalFrameBufferList *list) {
+ int i;
+
+ assert(list != NULL);
+
+ for (i = 0; i < list->num_internal_frame_buffers; ++i) {
+ vpx_free(list->int_fb[i].data);
+ list->int_fb[i].data = NULL;
+ }
+ vpx_free(list->int_fb);
+ list->int_fb = NULL;
+}
+
+int vp10_get_frame_buffer(void *cb_priv, size_t min_size,
+ vpx_codec_frame_buffer_t *fb) {
+ int i;
+ InternalFrameBufferList *const int_fb_list =
+ (InternalFrameBufferList *)cb_priv;
+ if (int_fb_list == NULL) return -1;
+
+ // Find a free frame buffer.
+ for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) {
+ if (!int_fb_list->int_fb[i].in_use) break;
+ }
+
+ if (i == int_fb_list->num_internal_frame_buffers) return -1;
+
+ if (int_fb_list->int_fb[i].size < min_size) {
+ int_fb_list->int_fb[i].data =
+ (uint8_t *)vpx_realloc(int_fb_list->int_fb[i].data, min_size);
+ if (!int_fb_list->int_fb[i].data) return -1;
+
+ // This memset is needed for fixing valgrind error from C loop filter
+ // due to access uninitialized memory in frame border. It could be
+ // removed if border is totally removed.
+ memset(int_fb_list->int_fb[i].data, 0, min_size);
+ int_fb_list->int_fb[i].size = min_size;
+ }
+
+ fb->data = int_fb_list->int_fb[i].data;
+ fb->size = int_fb_list->int_fb[i].size;
+ int_fb_list->int_fb[i].in_use = 1;
+
+ // Set the frame buffer's private data to point at the internal frame buffer.
+ fb->priv = &int_fb_list->int_fb[i];
+ return 0;
+}
+
+int vp10_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) {
+ InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv;
+ (void)cb_priv;
+ if (int_fb) int_fb->in_use = 0;
+ return 0;
+}
diff --git a/av1/common/frame_buffers.h b/av1/common/frame_buffers.h
new file mode 100644
index 0000000..6667132
--- /dev/null
+++ b/av1/common/frame_buffers.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_FRAME_BUFFERS_H_
+#define VP10_COMMON_FRAME_BUFFERS_H_
+
+#include "aom/vpx_frame_buffer.h"
+#include "aom/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct InternalFrameBuffer {
+ uint8_t *data;
+ size_t size;
+ int in_use;
+} InternalFrameBuffer;
+
+typedef struct InternalFrameBufferList {
+ int num_internal_frame_buffers;
+ InternalFrameBuffer *int_fb;
+} InternalFrameBufferList;
+
+// Initializes |list|. Returns 0 on success.
+int vp10_alloc_internal_frame_buffers(InternalFrameBufferList *list);
+
+// Free any data allocated to the frame buffers.
+void vp10_free_internal_frame_buffers(InternalFrameBufferList *list);
+
+// Callback used by libaom to request an external frame buffer. |cb_priv|
+// Callback private data, which points to an InternalFrameBufferList.
+// |min_size| is the minimum size in bytes needed to decode the next frame.
+// |fb| pointer to the frame buffer.
+int vp10_get_frame_buffer(void *cb_priv, size_t min_size,
+ vpx_codec_frame_buffer_t *fb);
+
+// Callback used by libaom when there are no references to the frame buffer.
+// |cb_priv| is not used. |fb| pointer to the frame buffer.
+int vp10_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_FRAME_BUFFERS_H_
diff --git a/av1/common/idct.c b/av1/common/idct.c
new file mode 100644
index 0000000..83b44d5
--- /dev/null
+++ b/av1/common/idct.c
@@ -0,0 +1,2230 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "av1/common/blockd.h"
+#include "av1/common/enums.h"
+#include "av1/common/idct.h"
+#include "av1/common/vp10_inv_txfm2d_cfg.h"
+#include "aom_dsp/inv_txfm.h"
+#include "aom_ports/mem.h"
+
+int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
+ const TX_SIZE tx_size) {
+ (void)tx_type;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ return txsize_sqr_up_map[tx_size] == TX_32X32;
+ }
+#else
+ (void)xd;
+#endif
+ return txsize_sqr_up_map[tx_size] == TX_32X32;
+}
+
+#if CONFIG_EXT_TX
+static void iidtx4_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 4; ++i)
+ output[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+}
+
+static void iidtx8_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
+}
+
+static void iidtx16_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 16; ++i)
+ output[i] = (tran_low_t)dct_const_round_shift(input[i] * 2 * Sqrt2);
+}
+
+static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
+}
+
+// For use in lieu of DST
+static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 16; ++i) {
+ output[i] = input[16 + i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+ }
+ idct16_c(inputhalf, output + 16);
+ // Note overall scaling factor is 4 times orthogonal
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_iidtx4_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ for (i = 0; i < 4; ++i)
+ output[i] =
+ HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
+}
+
+static void highbd_iidtx8_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ (void)bd;
+ for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
+}
+
+static void highbd_iidtx16_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ for (i = 0; i < 16; ++i)
+ output[i] =
+ HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 2 * Sqrt2), bd);
+}
+
+static void highbd_iidtx32_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ (void)bd;
+ for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
+}
+
+static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 16; ++i) {
+ output[i] = input[16 + i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] =
+ HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * Sqrt2), bd);
+ }
+ vpx_highbd_idct16_c(inputhalf, output + 16, bd);
+ // Note overall scaling factor is 4 times orthogonal
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+// Inverse identity transform and add.
+static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int bs, int tx_type) {
+ int r, c;
+ const int shift = bs < 32 ? 3 : 2;
+ if (tx_type == IDTX) {
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c)
+ dest[c] = clip_pixel_add(dest[c], input[c] >> shift);
+ dest += stride;
+ input += bs;
+ }
+ }
+}
+
+#define FLIPUD_PTR(dest, stride, size) \
+ do { \
+ (dest) = (dest) + ((size)-1) * (stride); \
+ (stride) = -(stride); \
+ } while (0)
+
+static void maybe_flip_strides(uint8_t **dst, int *dstride, tran_low_t **src,
+ int *sstride, int tx_type, int sizey,
+ int sizex) {
+ // Note that the transpose of src will be added to dst. In order to LR
+ // flip the addends (in dst coordinates), we UD flip the src. To UD flip
+ // the addends, we UD flip the dst.
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case IDTX:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST: break;
+ case FLIPADST_DCT:
+ case FLIPADST_ADST:
+ case V_FLIPADST:
+ // flip UD
+ FLIPUD_PTR(*dst, *dstride, sizey);
+ break;
+ case DCT_FLIPADST:
+ case ADST_FLIPADST:
+ case H_FLIPADST:
+ // flip LR
+ FLIPUD_PTR(*src, *sstride, sizex);
+ break;
+ case FLIPADST_FLIPADST:
+ // flip UD
+ FLIPUD_PTR(*dst, *dstride, sizey);
+ // flip LR
+ FLIPUD_PTR(*src, *sstride, sizex);
+ break;
+ default: assert(0); break;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ tran_low_t step[4];
+ tran_high_t temp1, temp2;
+ (void)bd;
+ // stage 1
+ temp1 = (input[3] + input[1]) * cospi_16_64;
+ temp2 = (input[3] - input[1]) * cospi_16_64;
+ step[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = input[2] * cospi_24_64 - input[0] * cospi_8_64;
+ temp2 = input[2] * cospi_8_64 + input[0] * cospi_24_64;
+ step[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ // stage 2
+ output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd);
+ output[1] = HIGHBD_WRAPLOW(-step[1] - step[2], bd);
+ output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd);
+ output[3] = HIGHBD_WRAPLOW(step[3] - step[0], bd);
+}
+
+void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ tran_low_t step1[8], step2[8];
+ tran_high_t temp1, temp2;
+ (void)bd;
+ // stage 1
+ step1[0] = input[7];
+ step1[2] = input[3];
+ step1[1] = input[5];
+ step1[3] = input[1];
+ temp1 = input[6] * cospi_28_64 - input[0] * cospi_4_64;
+ temp2 = input[6] * cospi_4_64 + input[0] * cospi_28_64;
+ step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = input[2] * cospi_12_64 - input[4] * cospi_20_64;
+ temp2 = input[2] * cospi_20_64 + input[4] * cospi_12_64;
+ step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ // stage 2
+ temp1 = (step1[0] + step1[2]) * cospi_16_64;
+ temp2 = (step1[0] - step1[2]) * cospi_16_64;
+ step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+ step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
+ step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
+ step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
+ step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
+
+ // stage 3
+ step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
+ step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
+ step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
+ step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+ step1[7] = step2[7];
+
+ // stage 4
+ output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
+ output[1] = HIGHBD_WRAPLOW(-step1[1] - step1[6], bd);
+ output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
+ output[3] = HIGHBD_WRAPLOW(-step1[3] - step1[4], bd);
+ output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
+ output[5] = HIGHBD_WRAPLOW(-step1[2] + step1[5], bd);
+ output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
+ output[7] = HIGHBD_WRAPLOW(-step1[0] + step1[7], bd);
+}
+
+void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ // vp9_highbd_igentx16(input, output, bd, Tx16);
+ tran_low_t step1[16], step2[16];
+ tran_high_t temp1, temp2;
+ (void)bd;
+
+ // stage 1
+ step1[0] = input[15];
+ step1[1] = input[7];
+ step1[2] = input[11];
+ step1[3] = input[3];
+ step1[4] = input[13];
+ step1[5] = input[5];
+ step1[6] = input[9];
+ step1[7] = input[1];
+ step1[8] = input[14];
+ step1[9] = input[6];
+ step1[10] = input[10];
+ step1[11] = input[2];
+ step1[12] = input[12];
+ step1[13] = input[4];
+ step1[14] = input[8];
+ step1[15] = input[0];
+
+ // stage 2
+ step2[0] = step1[0];
+ step2[1] = step1[1];
+ step2[2] = step1[2];
+ step2[3] = step1[3];
+ step2[4] = step1[4];
+ step2[5] = step1[5];
+ step2[6] = step1[6];
+ step2[7] = step1[7];
+
+ temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+ temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+ step2[8] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[15] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+ temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+ step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+ temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+ step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+ temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+ step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ // stage 3
+ step1[0] = step2[0];
+ step1[1] = step2[1];
+ step1[2] = step2[2];
+ step1[3] = step2[3];
+
+ temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+ temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+ step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+ temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+ step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+
+ step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd);
+ step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd);
+ step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd);
+ step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd);
+ step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd);
+ step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd);
+ step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd);
+ step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd);
+
+ // stage 4
+ temp1 = (step1[0] + step1[1]) * cospi_16_64;
+ temp2 = (step1[0] - step1[1]) * cospi_16_64;
+ step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+ step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
+ step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
+ step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
+ step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
+
+ step2[8] = step1[8];
+ step2[15] = step1[15];
+ temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+ temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+ step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+ temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+ step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+ step2[11] = step1[11];
+ step2[12] = step1[12];
+
+ // stage 5
+ step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
+ step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
+ step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
+ step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+ step1[7] = step2[7];
+
+ step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd);
+ step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd);
+ step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd);
+ step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd);
+ step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd);
+ step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd);
+ step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd);
+ step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd);
+
+ // stage 6
+ step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
+ step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd);
+ step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
+ step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd);
+ step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
+ step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd);
+ step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
+ step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd);
+ step2[8] = step1[8];
+ step2[9] = step1[9];
+ temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+ temp2 = (step1[10] + step1[13]) * cospi_16_64;
+ step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+ temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+ temp2 = (step1[11] + step1[12]) * cospi_16_64;
+ step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
+ step2[14] = step1[14];
+ step2[15] = step1[15];
+
+ // stage 7
+ output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd);
+ output[1] = HIGHBD_WRAPLOW(-step2[1] - step2[14], bd);
+ output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd);
+ output[3] = HIGHBD_WRAPLOW(-step2[3] - step2[12], bd);
+ output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd);
+ output[5] = HIGHBD_WRAPLOW(-step2[5] - step2[10], bd);
+ output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd);
+ output[7] = HIGHBD_WRAPLOW(-step2[7] - step2[8], bd);
+ output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd);
+ output[9] = HIGHBD_WRAPLOW(-step2[6] + step2[9], bd);
+ output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd);
+ output[11] = HIGHBD_WRAPLOW(-step2[4] + step2[11], bd);
+ output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd);
+ output[13] = HIGHBD_WRAPLOW(-step2[2] + step2[13], bd);
+ output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd);
+ output[15] = HIGHBD_WRAPLOW(-step2[0] + step2[15], bd);
+}
+
+static void highbd_inv_idtx_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bs, int tx_type, int bd) {
+ int r, c;
+ const int shift = bs < 32 ? 3 : 2;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ if (tx_type == IDTX) {
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c)
+ dest[c] = highbd_clip_pixel_add(dest[c], input[c] >> shift, bd);
+ dest += stride;
+ input += bs;
+ }
+ }
+}
+
+static void maybe_flip_strides16(uint16_t **dst, int *dstride, tran_low_t **src,
+ int *sstride, int tx_type, int sizey,
+ int sizex) {
+ // Note that the transpose of src will be added to dst. In order to LR
+ // flip the addends (in dst coordinates), we UD flip the src. To UD flip
+ // the addends, we UD flip the dst.
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case IDTX:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST: break;
+ case FLIPADST_DCT:
+ case FLIPADST_ADST:
+ case V_FLIPADST:
+ // flip UD
+ FLIPUD_PTR(*dst, *dstride, sizey);
+ break;
+ case DCT_FLIPADST:
+ case ADST_FLIPADST:
+ case H_FLIPADST:
+ // flip LR
+ FLIPUD_PTR(*src, *sstride, sizex);
+ break;
+ case FLIPADST_FLIPADST:
+ // flip UD
+ FLIPUD_PTR(*dst, *dstride, sizey);
+ // flip LR
+ FLIPUD_PTR(*src, *sstride, sizex);
+ break;
+ default: assert(0); break;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_TX
+
+void vp10_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ static const transform_2d IHT_4[] = {
+ { idct4_c, idct4_c }, // DCT_DCT
+ { iadst4_c, idct4_c }, // ADST_DCT
+ { idct4_c, iadst4_c }, // DCT_ADST
+ { iadst4_c, iadst4_c }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { iadst4_c, idct4_c }, // FLIPADST_DCT
+ { idct4_c, iadst4_c }, // DCT_FLIPADST
+ { iadst4_c, iadst4_c }, // FLIPADST_FLIPADST
+ { iadst4_c, iadst4_c }, // ADST_FLIPADST
+ { iadst4_c, iadst4_c }, // FLIPADST_ADST
+ { iidtx4_c, iidtx4_c }, // IDTX
+ { idct4_c, iidtx4_c }, // V_DCT
+ { iidtx4_c, idct4_c }, // H_DCT
+ { iadst4_c, iidtx4_c }, // V_ADST
+ { iidtx4_c, iadst4_c }, // H_ADST
+ { iadst4_c, iidtx4_c }, // V_FLIPADST
+ { iidtx4_c, iadst4_c }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+ };
+
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[4][4];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 4;
+
+ // inverse transform row vectors
+ for (i = 0; i < 4; ++i) {
+ IHT_4[tx_type].rows(input, out[i]);
+ input += 4;
+ }
+
+ // transpose
+ for (i = 1; i < 4; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 4; ++i) {
+ IHT_4[tx_type].cols(out[i], out[i]);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4));
+ }
+ }
+}
+
+#if CONFIG_EXT_TX
+void vp10_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ static const transform_2d IHT_4x8[] = {
+ { idct8_c, idct4_c }, // DCT_DCT
+ { iadst8_c, idct4_c }, // ADST_DCT
+ { idct8_c, iadst4_c }, // DCT_ADST
+ { iadst8_c, iadst4_c }, // ADST_ADST
+ { iadst8_c, idct4_c }, // FLIPADST_DCT
+ { idct8_c, iadst4_c }, // DCT_FLIPADST
+ { iadst8_c, iadst4_c }, // FLIPADST_FLIPADST
+ { iadst8_c, iadst4_c }, // ADST_FLIPADST
+ { iadst8_c, iadst4_c }, // FLIPADST_ADST
+ { iidtx8_c, iidtx4_c }, // IDTX
+ { idct8_c, iidtx4_c }, // V_DCT
+ { iidtx8_c, idct4_c }, // H_DCT
+ { iadst8_c, iidtx4_c }, // V_ADST
+ { iidtx8_c, iadst4_c }, // H_ADST
+ { iadst8_c, iidtx4_c }, // V_FLIPADST
+ { iidtx8_c, iadst4_c }, // H_FLIPADST
+ };
+
+ const int n = 4;
+ const int n2 = 8;
+ int i, j;
+ tran_low_t out[4][8], outtmp[4];
+ tran_low_t *outp = &out[0][0];
+ int outstride = n2;
+
+ // inverse transform row vectors and transpose
+ for (i = 0; i < n2; ++i) {
+ IHT_4x8[tx_type].rows(input, outtmp);
+ for (j = 0; j < n; ++j)
+ out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+ input += n;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < n; ++i) {
+ IHT_4x8[tx_type].cols(out[i], out[i]);
+ }
+
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
+
+ // Sum with the destination
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+ }
+ }
+}
+
+void vp10_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ static const transform_2d IHT_8x4[] = {
+ { idct4_c, idct8_c }, // DCT_DCT
+ { iadst4_c, idct8_c }, // ADST_DCT
+ { idct4_c, iadst8_c }, // DCT_ADST
+ { iadst4_c, iadst8_c }, // ADST_ADST
+ { iadst4_c, idct8_c }, // FLIPADST_DCT
+ { idct4_c, iadst8_c }, // DCT_FLIPADST
+ { iadst4_c, iadst8_c }, // FLIPADST_FLIPADST
+ { iadst4_c, iadst8_c }, // ADST_FLIPADST
+ { iadst4_c, iadst8_c }, // FLIPADST_ADST
+ { iidtx4_c, iidtx8_c }, // IDTX
+ { idct4_c, iidtx8_c }, // V_DCT
+ { iidtx4_c, idct8_c }, // H_DCT
+ { iadst4_c, iidtx8_c }, // V_ADST
+ { iidtx4_c, iadst8_c }, // H_ADST
+ { iadst4_c, iidtx8_c }, // V_FLIPADST
+ { iidtx4_c, iadst8_c }, // H_FLIPADST
+ };
+ const int n = 4;
+ const int n2 = 8;
+
+ int i, j;
+ tran_low_t out[8][4], outtmp[8];
+ tran_low_t *outp = &out[0][0];
+ int outstride = n;
+
+ // inverse transform row vectors and transpose
+ for (i = 0; i < n; ++i) {
+ IHT_8x4[tx_type].rows(input, outtmp);
+ for (j = 0; j < n2; ++j)
+ out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+ input += n2;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < n2; ++i) {
+ IHT_8x4[tx_type].cols(out[i], out[i]);
+ }
+
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
+
+ // Sum with the destination
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+ }
+ }
+}
+
+void vp10_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ static const transform_2d IHT_8x16[] = {
+ { idct16_c, idct8_c }, // DCT_DCT
+ { iadst16_c, idct8_c }, // ADST_DCT
+ { idct16_c, iadst8_c }, // DCT_ADST
+ { iadst16_c, iadst8_c }, // ADST_ADST
+ { iadst16_c, idct8_c }, // FLIPADST_DCT
+ { idct16_c, iadst8_c }, // DCT_FLIPADST
+ { iadst16_c, iadst8_c }, // FLIPADST_FLIPADST
+ { iadst16_c, iadst8_c }, // ADST_FLIPADST
+ { iadst16_c, iadst8_c }, // FLIPADST_ADST
+ { iidtx16_c, iidtx8_c }, // IDTX
+ { idct16_c, iidtx8_c }, // V_DCT
+ { iidtx16_c, idct8_c }, // H_DCT
+ { iadst16_c, iidtx8_c }, // V_ADST
+ { iidtx16_c, iadst8_c }, // H_ADST
+ { iadst16_c, iidtx8_c }, // V_FLIPADST
+ { iidtx16_c, iadst8_c }, // H_FLIPADST
+ };
+
+ const int n = 8;
+ const int n2 = 16;
+ int i, j;
+ tran_low_t out[8][16], outtmp[8];
+ tran_low_t *outp = &out[0][0];
+ int outstride = n2;
+
+ // inverse transform row vectors and transpose
+ for (i = 0; i < n2; ++i) {
+ IHT_8x16[tx_type].rows(input, outtmp);
+ for (j = 0; j < n; ++j)
+ out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+ input += n;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < n; ++i) {
+ IHT_8x16[tx_type].cols(out[i], out[i]);
+ }
+
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
+
+ // Sum with the destination
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+ }
+ }
+}
+
+void vp10_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ static const transform_2d IHT_16x8[] = {
+ { idct8_c, idct16_c }, // DCT_DCT
+ { iadst8_c, idct16_c }, // ADST_DCT
+ { idct8_c, iadst16_c }, // DCT_ADST
+ { iadst8_c, iadst16_c }, // ADST_ADST
+ { iadst8_c, idct16_c }, // FLIPADST_DCT
+ { idct8_c, iadst16_c }, // DCT_FLIPADST
+ { iadst8_c, iadst16_c }, // FLIPADST_FLIPADST
+ { iadst8_c, iadst16_c }, // ADST_FLIPADST
+ { iadst8_c, iadst16_c }, // FLIPADST_ADST
+ { iidtx8_c, iidtx16_c }, // IDTX
+ { idct8_c, iidtx16_c }, // V_DCT
+ { iidtx8_c, idct16_c }, // H_DCT
+ { iadst8_c, iidtx16_c }, // V_ADST
+ { iidtx8_c, iadst16_c }, // H_ADST
+ { iadst8_c, iidtx16_c }, // V_FLIPADST
+ { iidtx8_c, iadst16_c }, // H_FLIPADST
+ };
+ const int n = 8;
+ const int n2 = 16;
+
+ int i, j;
+ tran_low_t out[16][8], outtmp[16];
+ tran_low_t *outp = &out[0][0];
+ int outstride = n;
+
+ // inverse transform row vectors and transpose
+ for (i = 0; i < n; ++i) {
+ IHT_16x8[tx_type].rows(input, outtmp);
+ for (j = 0; j < n2; ++j)
+ out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+ input += n2;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < n2; ++i) {
+ IHT_16x8[tx_type].cols(out[i], out[i]);
+ }
+
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
+
+ // Sum with the destination
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+ }
+ }
+}
+
+void vp10_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ static const transform_2d IHT_16x32[] = {
+ { idct32_c, idct16_c }, // DCT_DCT
+ { ihalfright32_c, idct16_c }, // ADST_DCT
+ { idct32_c, iadst16_c }, // DCT_ADST
+ { ihalfright32_c, iadst16_c }, // ADST_ADST
+ { ihalfright32_c, idct16_c }, // FLIPADST_DCT
+ { idct32_c, iadst16_c }, // DCT_FLIPADST
+ { ihalfright32_c, iadst16_c }, // FLIPADST_FLIPADST
+ { ihalfright32_c, iadst16_c }, // ADST_FLIPADST
+ { ihalfright32_c, iadst16_c }, // FLIPADST_ADST
+ { iidtx32_c, iidtx16_c }, // IDTX
+ { idct32_c, iidtx16_c }, // V_DCT
+ { iidtx32_c, idct16_c }, // H_DCT
+ { ihalfright32_c, iidtx16_c }, // V_ADST
+ { iidtx32_c, iadst16_c }, // H_ADST
+ { ihalfright32_c, iidtx16_c }, // V_FLIPADST
+ { iidtx32_c, iadst16_c }, // H_FLIPADST
+ };
+
+ const int n = 16;
+ const int n2 = 32;
+ int i, j;
+ tran_low_t out[16][32], outtmp[16];
+ tran_low_t *outp = &out[0][0];
+ int outstride = n2;
+
+ // inverse transform row vectors and transpose
+ for (i = 0; i < n2; ++i) {
+ IHT_16x32[tx_type].rows(input, outtmp);
+ for (j = 0; j < n; ++j)
+ out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+ input += n;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < n; ++i) {
+ IHT_16x32[tx_type].cols(out[i], out[i]);
+ }
+
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n2, n);
+
+ // Sum with the destination
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+ }
+ }
+}
+
+void vp10_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ static const transform_2d IHT_32x16[] = {
+ { idct16_c, idct32_c }, // DCT_DCT
+ { iadst16_c, idct32_c }, // ADST_DCT
+ { idct16_c, ihalfright32_c }, // DCT_ADST
+ { iadst16_c, ihalfright32_c }, // ADST_ADST
+ { iadst16_c, idct32_c }, // FLIPADST_DCT
+ { idct16_c, ihalfright32_c }, // DCT_FLIPADST
+ { iadst16_c, ihalfright32_c }, // FLIPADST_FLIPADST
+ { iadst16_c, ihalfright32_c }, // ADST_FLIPADST
+ { iadst16_c, ihalfright32_c }, // FLIPADST_ADST
+ { iidtx16_c, iidtx32_c }, // IDTX
+ { idct16_c, iidtx32_c }, // V_DCT
+ { iidtx16_c, idct32_c }, // H_DCT
+ { iadst16_c, iidtx32_c }, // V_ADST
+ { iidtx16_c, ihalfright32_c }, // H_ADST
+ { iadst16_c, iidtx32_c }, // V_FLIPADST
+ { iidtx16_c, ihalfright32_c }, // H_FLIPADST
+ };
+ const int n = 16;
+ const int n2 = 32;
+
+ int i, j;
+ tran_low_t out[32][16], outtmp[32];
+ tran_low_t *outp = &out[0][0];
+ int outstride = n;
+
+ // inverse transform row vectors and transpose
+ for (i = 0; i < n; ++i) {
+ IHT_32x16[tx_type].rows(input, outtmp);
+ for (j = 0; j < n2; ++j)
+ out[j][i] = (tran_low_t)dct_const_round_shift(outtmp[j] * Sqrt2);
+ input += n2;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < n2; ++i) {
+ IHT_32x16[tx_type].cols(out[i], out[i]);
+ }
+
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, n, n2);
+
+ // Sum with the destination
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+ }
+ }
+}
+#endif // CONFIG_EXT_TX
+
+void vp10_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ static const transform_2d IHT_8[] = {
+ { idct8_c, idct8_c }, // DCT_DCT
+ { iadst8_c, idct8_c }, // ADST_DCT
+ { idct8_c, iadst8_c }, // DCT_ADST
+ { iadst8_c, iadst8_c }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { iadst8_c, idct8_c }, // FLIPADST_DCT
+ { idct8_c, iadst8_c }, // DCT_FLIPADST
+ { iadst8_c, iadst8_c }, // FLIPADST_FLIPADST
+ { iadst8_c, iadst8_c }, // ADST_FLIPADST
+ { iadst8_c, iadst8_c }, // FLIPADST_ADST
+ { iidtx8_c, iidtx8_c }, // IDTX
+ { idct8_c, iidtx8_c }, // V_DCT
+ { iidtx8_c, idct8_c }, // H_DCT
+ { iadst8_c, iidtx8_c }, // V_ADST
+ { iidtx8_c, iadst8_c }, // H_ADST
+ { iadst8_c, iidtx8_c }, // V_FLIPADST
+ { iidtx8_c, iadst8_c }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+ };
+
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[8][8];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 8;
+
+ // inverse transform row vectors
+ for (i = 0; i < 8; ++i) {
+ IHT_8[tx_type].rows(input, out[i]);
+ input += 8;
+ }
+
+ // transpose
+ for (i = 1; i < 8; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 8; ++i) {
+ IHT_8[tx_type].cols(out[i], out[i]);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5));
+ }
+ }
+}
+
+void vp10_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ static const transform_2d IHT_16[] = {
+ { idct16_c, idct16_c }, // DCT_DCT
+ { iadst16_c, idct16_c }, // ADST_DCT
+ { idct16_c, iadst16_c }, // DCT_ADST
+ { iadst16_c, iadst16_c }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { iadst16_c, idct16_c }, // FLIPADST_DCT
+ { idct16_c, iadst16_c }, // DCT_FLIPADST
+ { iadst16_c, iadst16_c }, // FLIPADST_FLIPADST
+ { iadst16_c, iadst16_c }, // ADST_FLIPADST
+ { iadst16_c, iadst16_c }, // FLIPADST_ADST
+ { iidtx16_c, iidtx16_c }, // IDTX
+ { idct16_c, iidtx16_c }, // V_DCT
+ { iidtx16_c, idct16_c }, // H_DCT
+ { iadst16_c, iidtx16_c }, // V_ADST
+ { iidtx16_c, iadst16_c }, // H_ADST
+ { iadst16_c, iidtx16_c }, // V_FLIPADST
+ { iidtx16_c, iadst16_c }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+ };
+
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[16][16];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 16;
+
+ // inverse transform row vectors
+ for (i = 0; i < 16; ++i) {
+ IHT_16[tx_type].rows(input, out[i]);
+ input += 16;
+ }
+
+ // transpose
+ for (i = 1; i < 16; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 16; ++i) {
+ IHT_16[tx_type].cols(out[i], out[i]);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+ }
+ }
+}
+
+#if CONFIG_EXT_TX
+void vp10_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
+ int stride, int tx_type) {
+ static const transform_2d IHT_32[] = {
+ { idct32_c, idct32_c }, // DCT_DCT
+ { ihalfright32_c, idct32_c }, // ADST_DCT
+ { idct32_c, ihalfright32_c }, // DCT_ADST
+ { ihalfright32_c, ihalfright32_c }, // ADST_ADST
+ { ihalfright32_c, idct32_c }, // FLIPADST_DCT
+ { idct32_c, ihalfright32_c }, // DCT_FLIPADST
+ { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST
+ { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST
+ { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST
+ { iidtx32_c, iidtx32_c }, // IDTX
+ { idct32_c, iidtx32_c }, // V_DCT
+ { iidtx32_c, idct32_c }, // H_DCT
+ { ihalfright32_c, iidtx16_c }, // V_ADST
+ { iidtx16_c, ihalfright32_c }, // H_ADST
+ { ihalfright32_c, iidtx16_c }, // V_FLIPADST
+ { iidtx16_c, ihalfright32_c }, // H_FLIPADST
+ };
+
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[32][32];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 32;
+
+ // inverse transform row vectors
+ for (i = 0; i < 32; ++i) {
+ IHT_32[tx_type].rows(input, out[i]);
+ input += 32;
+ }
+
+ // transpose
+ for (i = 1; i < 32; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 32; ++i) {
+ IHT_32[tx_type].cols(out[i], out[i]);
+ }
+
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
+
+ // Sum with the destination
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+ }
+ }
+}
+#endif // CONFIG_EXT_TX
+
+// idct
+void vp10_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob) {
+ if (eob > 1)
+ vpx_idct4x4_16_add(input, dest, stride);
+ else
+ vpx_idct4x4_1_add(input, dest, stride);
+}
+
+void vp10_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob) {
+ if (eob > 1)
+ vpx_iwht4x4_16_add(input, dest, stride);
+ else
+ vpx_iwht4x4_1_add(input, dest, stride);
+}
+
+void vp10_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob) {
+ // If dc is 1, then input[0] is the reconstructed value, do not need
+ // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
+
+ // The calculation can be simplified if there are not many non-zero dct
+ // coefficients. Use eobs to decide what to do.
+ // TODO(yunqingwang): "eobs = 1" case is also handled in vp10_short_idct8x8_c.
+ // Combine that with code here.
+ if (eob == 1)
+ // DC only DCT coefficient
+ vpx_idct8x8_1_add(input, dest, stride);
+ else if (eob <= 12)
+ vpx_idct8x8_12_add(input, dest, stride);
+ else
+ vpx_idct8x8_64_add(input, dest, stride);
+}
+
+void vp10_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob) {
+ /* The calculation can be simplified if there are not many non-zero dct
+ * coefficients. Use eobs to separate different cases. */
+ if (eob == 1) /* DC only DCT coefficient. */
+ vpx_idct16x16_1_add(input, dest, stride);
+ else if (eob <= 10)
+ vpx_idct16x16_10_add(input, dest, stride);
+ else
+ vpx_idct16x16_256_add(input, dest, stride);
+}
+
+void vp10_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob) {
+ if (eob == 1)
+ vpx_idct32x32_1_add(input, dest, stride);
+ else if (eob <= 34)
+ // non-zero coeff only in upper-left 8x8
+ vpx_idct32x32_34_add(input, dest, stride);
+ else
+ vpx_idct32x32_1024_add(input, dest, stride);
+}
+
+void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type, int lossless) {
+ if (lossless) {
+ assert(tx_type == DCT_DCT);
+ vp10_iwht4x4_add(input, dest, stride, eob);
+ return;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT: vp10_idct4x4_add(input, dest, stride, eob); break;
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST: vp10_iht4x4_16_add(input, dest, stride, tx_type); break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST: vp10_iht4x4_16_add(input, dest, stride, tx_type); break;
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_iht4x4_16_add_c(input, dest, stride, tx_type);
+ break;
+ case IDTX: inv_idtx_add_c(input, dest, stride, 4, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+
+#if CONFIG_EXT_TX
+void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type) {
+ (void)eob;
+ vp10_iht4x8_32_add(input, dest, stride, tx_type);
+}
+
+void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type) {
+ (void)eob;
+ vp10_iht8x4_32_add(input, dest, stride, tx_type);
+}
+
+void vp10_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type) {
+ (void)eob;
+ vp10_iht8x16_128_add(input, dest, stride, tx_type);
+}
+
+void vp10_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type) {
+ (void)eob;
+ vp10_iht16x8_128_add(input, dest, stride, tx_type);
+}
+
+void vp10_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type) {
+ (void)eob;
+ vp10_iht16x32_512_add(input, dest, stride, tx_type);
+}
+
+void vp10_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type) {
+ (void)eob;
+ vp10_iht32x16_512_add(input, dest, stride, tx_type);
+}
+#endif // CONFIG_EXT_TX
+
+void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type) {
+ switch (tx_type) {
+ case DCT_DCT: vp10_idct8x8_add(input, dest, stride, eob); break;
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST: vp10_iht8x8_64_add(input, dest, stride, tx_type); break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST: vp10_iht8x8_64_add(input, dest, stride, tx_type); break;
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_iht8x8_64_add_c(input, dest, stride, tx_type);
+ break;
+ case IDTX: inv_idtx_add_c(input, dest, stride, 8, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+
+void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type) {
+ switch (tx_type) {
+ case DCT_DCT: vp10_idct16x16_add(input, dest, stride, eob); break;
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST: vp10_iht16x16_256_add(input, dest, stride, tx_type); break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_iht16x16_256_add(input, dest, stride, tx_type);
+ break;
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_iht16x16_256_add_c(input, dest, stride, tx_type);
+ break;
+ case IDTX: inv_idtx_add_c(input, dest, stride, 16, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+
+void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type) {
+ switch (tx_type) {
+ case DCT_DCT: vp10_idct32x32_add(input, dest, stride, eob); break;
+#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
+ break;
+ case IDTX: inv_idtx_add_c(input, dest, stride, 32, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_4[] = {
+ { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT
+ { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT
+ { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // FLIPADST_DCT
+ { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST
+ { highbd_iidtx4_c, highbd_iidtx4_c }, // IDTX
+ { vpx_highbd_idct4_c, highbd_iidtx4_c }, // V_DCT
+ { highbd_iidtx4_c, vpx_highbd_idct4_c }, // H_DCT
+ { vpx_highbd_iadst4_c, highbd_iidtx4_c }, // V_ADST
+ { highbd_iidtx4_c, vpx_highbd_iadst4_c }, // H_ADST
+ { vpx_highbd_iadst4_c, highbd_iidtx4_c }, // V_FLIPADST
+ { highbd_iidtx4_c, vpx_highbd_iadst4_c }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+ };
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[4][4];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 4;
+
+ // inverse transform row vectors
+ for (i = 0; i < 4; ++i) {
+ HIGH_IHT_4[tx_type].rows(input, out[i], bd);
+ input += 4;
+ }
+
+ // transpose
+ for (i = 1; i < 4; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 4; ++i) {
+ HIGH_IHT_4[tx_type].cols(out[i], out[i], bd);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 4, 4);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] =
+ highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 4), bd);
+ }
+ }
+}
+
+#if CONFIG_EXT_TX
+void vp10_highbd_iht4x8_32_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_4x8[] = {
+ { vpx_highbd_idct8_c, vpx_highbd_idct4_c }, // DCT_DCT
+ { vpx_highbd_iadst8_c, vpx_highbd_idct4_c }, // ADST_DCT
+ { vpx_highbd_idct8_c, vpx_highbd_iadst4_c }, // DCT_ADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c }, // ADST_ADST
+ { vpx_highbd_iadst8_c, vpx_highbd_idct4_c }, // FLIPADST_DCT
+ { vpx_highbd_idct8_c, vpx_highbd_iadst4_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst4_c }, // FLIPADST_ADST
+ { highbd_iidtx8_c, highbd_iidtx4_c }, // IDTX
+ { vpx_highbd_idct8_c, highbd_iidtx4_c }, // V_DCT
+ { highbd_iidtx8_c, vpx_highbd_idct4_c }, // H_DCT
+ { vpx_highbd_iadst8_c, highbd_iidtx4_c }, // V_ADST
+ { highbd_iidtx8_c, vpx_highbd_iadst4_c }, // H_ADST
+ { vpx_highbd_iadst8_c, highbd_iidtx4_c }, // V_FLIPADST
+ { highbd_iidtx8_c, vpx_highbd_iadst4_c }, // H_FLIPADST
+ };
+ const int n = 4;
+ const int n2 = 8;
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t out[4][8], outtmp[4];
+ tran_low_t *outp = &out[0][0];
+ int outstride = n2;
+
+ // inverse transform row vectors, and transpose
+ for (i = 0; i < n2; ++i) {
+ HIGH_IHT_4x8[tx_type].rows(input, outtmp, bd);
+ for (j = 0; j < n; ++j) {
+ out[j][i] =
+ HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
+ }
+ input += n;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < n; ++i) {
+ HIGH_IHT_4x8[tx_type].cols(out[i], out[i], bd);
+ }
+
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
+
+ // Sum with the destination
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] =
+ highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
+ }
+ }
+}
+
+void vp10_highbd_iht8x4_32_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_8x4[] = {
+ { vpx_highbd_idct4_c, vpx_highbd_idct8_c }, // DCT_DCT
+ { vpx_highbd_iadst4_c, vpx_highbd_idct8_c }, // ADST_DCT
+ { vpx_highbd_idct4_c, vpx_highbd_iadst8_c }, // DCT_ADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c }, // ADST_ADST
+ { vpx_highbd_iadst4_c, vpx_highbd_idct8_c }, // FLIPADST_DCT
+ { vpx_highbd_idct4_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst4_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST
+ { highbd_iidtx4_c, highbd_iidtx8_c }, // IDTX
+ { vpx_highbd_idct4_c, highbd_iidtx8_c }, // V_DCT
+ { highbd_iidtx4_c, vpx_highbd_idct8_c }, // H_DCT
+ { vpx_highbd_iadst4_c, highbd_iidtx8_c }, // V_ADST
+ { highbd_iidtx4_c, vpx_highbd_iadst8_c }, // H_ADST
+ { vpx_highbd_iadst4_c, highbd_iidtx8_c }, // V_FLIPADST
+ { highbd_iidtx4_c, vpx_highbd_iadst8_c }, // H_FLIPADST
+ };
+ const int n = 4;
+ const int n2 = 8;
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t out[8][4], outtmp[8];
+ tran_low_t *outp = &out[0][0];
+ int outstride = n;
+
+ // inverse transform row vectors, and transpose
+ for (i = 0; i < n; ++i) {
+ HIGH_IHT_8x4[tx_type].rows(input, outtmp, bd);
+ for (j = 0; j < n2; ++j) {
+ out[j][i] =
+ HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
+ }
+ input += n2;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < n2; ++i) {
+ HIGH_IHT_8x4[tx_type].cols(out[i], out[i], bd);
+ }
+
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
+
+ // Sum with the destination
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] =
+ highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
+ }
+ }
+}
+
+void vp10_highbd_iht8x16_128_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_8x16[] = {
+ { vpx_highbd_idct16_c, vpx_highbd_idct8_c }, // DCT_DCT
+ { vpx_highbd_iadst16_c, vpx_highbd_idct8_c }, // ADST_DCT
+ { vpx_highbd_idct16_c, vpx_highbd_iadst8_c }, // DCT_ADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst8_c }, // ADST_ADST
+ { vpx_highbd_iadst16_c, vpx_highbd_idct8_c }, // FLIPADST_DCT
+ { vpx_highbd_idct16_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST
+ { highbd_iidtx16_c, highbd_iidtx8_c }, // IDTX
+ { vpx_highbd_idct16_c, highbd_iidtx8_c }, // V_DCT
+ { highbd_iidtx16_c, vpx_highbd_idct8_c }, // H_DCT
+ { vpx_highbd_iadst16_c, highbd_iidtx8_c }, // V_ADST
+ { highbd_iidtx16_c, vpx_highbd_iadst8_c }, // H_ADST
+ { vpx_highbd_iadst16_c, highbd_iidtx8_c }, // V_FLIPADST
+ { highbd_iidtx16_c, vpx_highbd_iadst8_c }, // H_FLIPADST
+ };
+ const int n = 8;
+ const int n2 = 16;
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t out[8][16], outtmp[8];
+ tran_low_t *outp = &out[0][0];
+ int outstride = n2;
+
+ // inverse transform row vectors, and transpose
+ for (i = 0; i < n2; ++i) {
+ HIGH_IHT_8x16[tx_type].rows(input, outtmp, bd);
+ for (j = 0; j < n; ++j)
+ out[j][i] =
+ HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
+ input += n;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < n; ++i) {
+ HIGH_IHT_8x16[tx_type].cols(out[i], out[i], bd);
+ }
+
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
+
+ // Sum with the destination
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] =
+ highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
+ }
+ }
+}
+
+void vp10_highbd_iht16x8_128_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_16x8[] = {
+ { vpx_highbd_idct8_c, vpx_highbd_idct16_c }, // DCT_DCT
+ { vpx_highbd_iadst8_c, vpx_highbd_idct16_c }, // ADST_DCT
+ { vpx_highbd_idct8_c, vpx_highbd_iadst16_c }, // DCT_ADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst16_c }, // ADST_ADST
+ { vpx_highbd_iadst8_c, vpx_highbd_idct16_c }, // FLIPADST_DCT
+ { vpx_highbd_idct8_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST
+ { highbd_iidtx8_c, highbd_iidtx16_c }, // IDTX
+ { vpx_highbd_idct8_c, highbd_iidtx16_c }, // V_DCT
+ { highbd_iidtx8_c, vpx_highbd_idct16_c }, // H_DCT
+ { vpx_highbd_iadst8_c, highbd_iidtx16_c }, // V_ADST
+ { highbd_iidtx8_c, vpx_highbd_iadst16_c }, // H_ADST
+ { vpx_highbd_iadst8_c, highbd_iidtx16_c }, // V_FLIPADST
+ { highbd_iidtx8_c, vpx_highbd_iadst16_c }, // H_FLIPADST
+ };
+ const int n = 8;
+ const int n2 = 16;
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t out[16][8], outtmp[16];
+ tran_low_t *outp = &out[0][0];
+ int outstride = n;
+
+ // inverse transform row vectors, and transpose
+ for (i = 0; i < n; ++i) {
+ HIGH_IHT_16x8[tx_type].rows(input, outtmp, bd);
+ for (j = 0; j < n2; ++j)
+ out[j][i] =
+ HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
+ input += n2;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < n2; ++i) {
+ HIGH_IHT_16x8[tx_type].cols(out[i], out[i], bd);
+ }
+
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
+
+ // Sum with the destination
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] =
+ highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
+ }
+ }
+}
+
+void vp10_highbd_iht16x32_512_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_16x32[] = {
+ { vpx_highbd_idct32_c, vpx_highbd_idct16_c }, // DCT_DCT
+ { highbd_ihalfright32_c, vpx_highbd_idct16_c }, // ADST_DCT
+ { vpx_highbd_idct32_c, vpx_highbd_iadst16_c }, // DCT_ADST
+ { highbd_ihalfright32_c, vpx_highbd_iadst16_c }, // ADST_ADST
+ { highbd_ihalfright32_c, vpx_highbd_idct16_c }, // FLIPADST_DCT
+ { vpx_highbd_idct32_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST
+ { highbd_ihalfright32_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST
+ { highbd_ihalfright32_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST
+ { highbd_ihalfright32_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST
+ { highbd_iidtx32_c, highbd_iidtx16_c }, // IDTX
+ { vpx_highbd_idct32_c, highbd_iidtx16_c }, // V_DCT
+ { highbd_iidtx32_c, vpx_highbd_idct16_c }, // H_DCT
+ { highbd_ihalfright32_c, highbd_iidtx16_c }, // V_ADST
+ { highbd_iidtx32_c, vpx_highbd_iadst16_c }, // H_ADST
+ { highbd_ihalfright32_c, highbd_iidtx16_c }, // V_FLIPADST
+ { highbd_iidtx32_c, vpx_highbd_iadst16_c }, // H_FLIPADST
+ };
+ const int n = 16;
+ const int n2 = 32;
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t out[16][32], outtmp[16];
+ tran_low_t *outp = &out[0][0];
+ int outstride = n2;
+
+ // inverse transform row vectors, and transpose
+ for (i = 0; i < n2; ++i) {
+ HIGH_IHT_16x32[tx_type].rows(input, outtmp, bd);
+ for (j = 0; j < n; ++j)
+ out[j][i] =
+ HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
+ input += n;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < n; ++i) {
+ HIGH_IHT_16x32[tx_type].cols(out[i], out[i], bd);
+ }
+
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n2, n);
+
+ // Sum with the destination
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] =
+ highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
+ }
+ }
+}
+
+void vp10_highbd_iht32x16_512_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_32x16[] = {
+ { vpx_highbd_idct16_c, vpx_highbd_idct32_c }, // DCT_DCT
+ { vpx_highbd_iadst16_c, vpx_highbd_idct32_c }, // ADST_DCT
+ { vpx_highbd_idct16_c, highbd_ihalfright32_c }, // DCT_ADST
+ { vpx_highbd_iadst16_c, highbd_ihalfright32_c }, // ADST_ADST
+ { vpx_highbd_iadst16_c, vpx_highbd_idct32_c }, // FLIPADST_DCT
+ { vpx_highbd_idct16_c, highbd_ihalfright32_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst16_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst16_c, highbd_ihalfright32_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst16_c, highbd_ihalfright32_c }, // FLIPADST_ADST
+ { highbd_iidtx16_c, highbd_iidtx32_c }, // IDTX
+ { vpx_highbd_idct16_c, highbd_iidtx32_c }, // V_DCT
+ { highbd_iidtx16_c, vpx_highbd_idct32_c }, // H_DCT
+ { vpx_highbd_iadst16_c, highbd_iidtx32_c }, // V_ADST
+ { highbd_iidtx16_c, highbd_ihalfright32_c }, // H_ADST
+ { vpx_highbd_iadst16_c, highbd_iidtx32_c }, // V_FLIPADST
+ { highbd_iidtx16_c, highbd_ihalfright32_c }, // H_FLIPADST
+ };
+ const int n = 16;
+ const int n2 = 32;
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t out[32][16], outtmp[32];
+ tran_low_t *outp = &out[0][0];
+ int outstride = n;
+
+ // inverse transform row vectors, and transpose
+ for (i = 0; i < n; ++i) {
+ HIGH_IHT_32x16[tx_type].rows(input, outtmp, bd);
+ for (j = 0; j < n2; ++j)
+ out[j][i] =
+ HIGHBD_WRAPLOW(highbd_dct_const_round_shift(outtmp[j] * Sqrt2), bd);
+ input += n2;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < n2; ++i) {
+ HIGH_IHT_32x16[tx_type].cols(out[i], out[i], bd);
+ }
+
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, n, n2);
+
+ // Sum with the destination
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] =
+ highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
+ }
+ }
+}
+#endif // CONFIG_EXT_TX
+
+void vp10_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_8[] = {
+ { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT
+ { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT
+ { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // FLIPADST_DCT
+ { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }, // FLIPADST_ADST
+ { highbd_iidtx8_c, highbd_iidtx8_c }, // IDTX
+ { vpx_highbd_idct8_c, highbd_iidtx8_c }, // V_DCT
+ { highbd_iidtx8_c, vpx_highbd_idct8_c }, // H_DCT
+ { vpx_highbd_iadst8_c, highbd_iidtx8_c }, // V_ADST
+ { highbd_iidtx8_c, vpx_highbd_iadst8_c }, // H_ADST
+ { vpx_highbd_iadst8_c, highbd_iidtx8_c }, // V_FLIPADST
+ { highbd_iidtx8_c, vpx_highbd_iadst8_c }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+ };
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[8][8];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 8;
+
+ // inverse transform row vectors
+ for (i = 0; i < 8; ++i) {
+ HIGH_IHT_8[tx_type].rows(input, out[i], bd);
+ input += 8;
+ }
+
+ // transpose
+ for (i = 1; i < 8; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 8; ++i) {
+ HIGH_IHT_8[tx_type].cols(out[i], out[i], bd);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 8, 8);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] =
+ highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 5), bd);
+ }
+ }
+}
+
+void vp10_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_16[] = {
+ { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT
+ { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT
+ { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // FLIPADST_DCT
+ { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_FLIPADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_FLIPADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // ADST_FLIPADST
+ { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }, // FLIPADST_ADST
+ { highbd_iidtx16_c, highbd_iidtx16_c }, // IDTX
+ { vpx_highbd_idct16_c, highbd_iidtx16_c }, // V_DCT
+ { highbd_iidtx16_c, vpx_highbd_idct16_c }, // H_DCT
+ { vpx_highbd_iadst16_c, highbd_iidtx16_c }, // V_ADST
+ { highbd_iidtx16_c, vpx_highbd_iadst16_c }, // H_ADST
+ { vpx_highbd_iadst16_c, highbd_iidtx16_c }, // V_FLIPADST
+ { highbd_iidtx16_c, vpx_highbd_iadst16_c }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+ };
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[16][16];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 16;
+
+ // inverse transform row vectors
+ for (i = 0; i < 16; ++i) {
+ HIGH_IHT_16[tx_type].rows(input, out[i], bd);
+ input += 16;
+ }
+
+ // transpose
+ for (i = 1; i < 16; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 16; ++i) {
+ HIGH_IHT_16[tx_type].cols(out[i], out[i], bd);
+ }
+
+#if CONFIG_EXT_TX
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 16, 16);
+#endif
+
+ // Sum with the destination
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] =
+ highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
+ }
+ }
+}
+
+#if CONFIG_EXT_TX
+void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_32[] = {
+ { vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT
+ { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
+ { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
+ { highbd_iidtx32_c, highbd_iidtx32_c }, // IDTX
+ { vpx_highbd_idct32_c, highbd_iidtx32_c }, // V_DCT
+ { highbd_iidtx32_c, vpx_highbd_idct32_c }, // H_DCT
+ { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_ADST
+ { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_ADST
+ { highbd_ihalfright32_c, highbd_iidtx32_c }, // V_FLIPADST
+ { highbd_iidtx32_c, highbd_ihalfright32_c }, // H_FLIPADST
+ };
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[32][32];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 32;
+
+ // inverse transform row vectors
+ for (i = 0; i < 32; ++i) {
+ HIGH_IHT_32[tx_type].rows(input, out[i], bd);
+ input += 32;
+ }
+
+ // transpose
+ for (i = 1; i < 32; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 32; ++i) {
+ HIGH_IHT_32[tx_type].cols(out[i], out[i], bd);
+ }
+
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 32, 32);
+
+ // Sum with the destination
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] =
+ highbd_clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6), bd);
+ }
+ }
+}
+#endif // CONFIG_EXT_TX
+
+// idct
+void vp10_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, int bd) {
+ if (eob > 1)
+ vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
+ else
+ vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
+}
+
+void vp10_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, int bd) {
+ if (eob > 1)
+ vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
+ else
+ vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
+}
+
+void vp10_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, int bd) {
+ // If dc is 1, then input[0] is the reconstructed value, do not need
+ // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
+
+ // The calculation can be simplified if there are not many non-zero dct
+ // coefficients. Use eobs to decide what to do.
+ // TODO(yunqingwang): "eobs = 1" case is also handled in vp10_short_idct8x8_c.
+ // Combine that with code here.
+ // DC only DCT coefficient
+ if (eob == 1) {
+ vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
+ } else if (eob <= 10) {
+ vpx_highbd_idct8x8_10_add(input, dest, stride, bd);
+ } else {
+ vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
+ }
+}
+
+void vp10_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd) {
+ // The calculation can be simplified if there are not many non-zero dct
+ // coefficients. Use eobs to separate different cases.
+ // DC only DCT coefficient.
+ if (eob == 1) {
+ vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
+ } else if (eob <= 10) {
+ vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
+ } else {
+ vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
+ }
+}
+
+void vp10_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd) {
+ // Non-zero coeff only in upper-left 8x8
+ if (eob == 1) {
+ vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
+ } else if (eob <= 34) {
+ vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
+ } else {
+ vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
+ }
+}
+
+void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd, TX_TYPE tx_type,
+ int lossless) {
+ if (lossless) {
+ assert(tx_type == DCT_DCT);
+ vp10_highbd_iwht4x4_add(input, dest, stride, eob, bd);
+ return;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
+ bd);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_inv_txfm2d_add_4x4(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
+ bd);
+ break;
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_highbd_iht4x4_16_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 4, tx_type, bd);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+
+#if CONFIG_EXT_TX
+void vp10_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type) {
+ (void)eob;
+ vp10_highbd_iht4x8_32_add_c(input, dest, stride, tx_type, bd);
+}
+
+void vp10_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type) {
+ (void)eob;
+ vp10_highbd_iht8x4_32_add_c(input, dest, stride, tx_type, bd);
+}
+
+void vp10_highbd_inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type) {
+ (void)eob;
+ vp10_highbd_iht8x16_128_add_c(input, dest, stride, tx_type, bd);
+}
+
+void vp10_highbd_inv_txfm_add_16x8(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type) {
+ (void)eob;
+ vp10_highbd_iht16x8_128_add_c(input, dest, stride, tx_type, bd);
+}
+
+void vp10_highbd_inv_txfm_add_16x32(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type) {
+ (void)eob;
+ vp10_highbd_iht16x32_512_add_c(input, dest, stride, tx_type, bd);
+}
+
+void vp10_highbd_inv_txfm_add_32x16(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type) {
+ (void)eob;
+ vp10_highbd_iht32x16_512_add_c(input, dest, stride, tx_type, bd);
+}
+#endif // CONFIG_EXT_TX
+
+void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type) {
+ (void)eob;
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
+ bd);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_inv_txfm2d_add_8x8(input, CONVERT_TO_SHORTPTR(dest), stride, tx_type,
+ bd);
+ break;
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_highbd_iht8x8_64_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 8, tx_type, bd);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+
+void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type) {
+ (void)eob;
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
+ tx_type, bd);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_inv_txfm2d_add_16x16(input, CONVERT_TO_SHORTPTR(dest), stride,
+ tx_type, bd);
+ break;
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ // Use C version since DST only exists in C code
+ vp10_highbd_iht16x16_256_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 16, tx_type, bd);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+
+void vp10_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type) {
+ (void)eob;
+ switch (tx_type) {
+ case DCT_DCT:
+ vp10_inv_txfm2d_add_32x32(input, CONVERT_TO_SHORTPTR(dest), stride,
+ DCT_DCT, bd);
+ break;
+#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
+ break;
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 32, tx_type, bd);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
+ INV_TXFM_PARAM *inv_txfm_param) {
+ const TX_TYPE tx_type = inv_txfm_param->tx_type;
+ const TX_SIZE tx_size = inv_txfm_param->tx_size;
+ const int eob = inv_txfm_param->eob;
+ const int lossless = inv_txfm_param->lossless;
+
+ switch (tx_size) {
+ case TX_32X32:
+ vp10_inv_txfm_add_32x32(input, dest, stride, eob, tx_type);
+ break;
+ case TX_16X16:
+ vp10_inv_txfm_add_16x16(input, dest, stride, eob, tx_type);
+ break;
+ case TX_8X8:
+ vp10_inv_txfm_add_8x8(input, dest, stride, eob, tx_type);
+ break;
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ vp10_inv_txfm_add_4x8(input, dest, stride, eob, tx_type);
+ break;
+ case TX_8X4:
+ vp10_inv_txfm_add_8x4(input, dest, stride, eob, tx_type);
+ break;
+ case TX_8X16:
+ vp10_inv_txfm_add_8x16(input, dest, stride, eob, tx_type);
+ break;
+ case TX_16X8:
+ vp10_inv_txfm_add_16x8(input, dest, stride, eob, tx_type);
+ break;
+ case TX_16X32:
+ vp10_inv_txfm_add_16x32(input, dest, stride, eob, tx_type);
+ break;
+ case TX_32X16:
+ vp10_inv_txfm_add_32x16(input, dest, stride, eob, tx_type);
+ break;
+#endif // CONFIG_EXT_TX
+ case TX_4X4:
+ // this is like vp10_short_idct4x4 but has a special case around eob<=1
+ // which is significant (not just an optimization) for the lossless
+ // case.
+ vp10_inv_txfm_add_4x4(input, dest, stride, eob, tx_type, lossless);
+ break;
+ default: assert(0 && "Invalid transform size"); break;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
+ INV_TXFM_PARAM *inv_txfm_param) {
+ const TX_TYPE tx_type = inv_txfm_param->tx_type;
+ const TX_SIZE tx_size = inv_txfm_param->tx_size;
+ const int eob = inv_txfm_param->eob;
+ const int bd = inv_txfm_param->bd;
+ const int lossless = inv_txfm_param->lossless;
+
+ switch (tx_size) {
+ case TX_32X32:
+ vp10_highbd_inv_txfm_add_32x32(input, dest, stride, eob, bd, tx_type);
+ break;
+ case TX_16X16:
+ vp10_highbd_inv_txfm_add_16x16(input, dest, stride, eob, bd, tx_type);
+ break;
+ case TX_8X8:
+ vp10_highbd_inv_txfm_add_8x8(input, dest, stride, eob, bd, tx_type);
+ break;
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ vp10_highbd_inv_txfm_add_4x8(input, dest, stride, eob, bd, tx_type);
+ break;
+ case TX_8X4:
+ vp10_highbd_inv_txfm_add_8x4(input, dest, stride, eob, bd, tx_type);
+ break;
+ case TX_8X16:
+ vp10_highbd_inv_txfm_add_8x16(input, dest, stride, eob, bd, tx_type);
+ break;
+ case TX_16X8:
+ vp10_highbd_inv_txfm_add_16x8(input, dest, stride, eob, bd, tx_type);
+ break;
+ case TX_16X32:
+ vp10_highbd_inv_txfm_add_16x32(input, dest, stride, eob, bd, tx_type);
+ break;
+ case TX_32X16:
+ vp10_highbd_inv_txfm_add_32x16(input, dest, stride, eob, bd, tx_type);
+ break;
+#endif // CONFIG_EXT_TX
+ case TX_4X4:
+ // this is like vp10_short_idct4x4 but has a special case around eob<=1
+ // which is significant (not just an optimization) for the lossless
+ // case.
+ vp10_highbd_inv_txfm_add_4x4(input, dest, stride, eob, bd, tx_type,
+ lossless);
+ break;
+ default: assert(0 && "Invalid transform size"); break;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/idct.h b/av1/common/idct.h
new file mode 100644
index 0000000..9b3be62
--- /dev/null
+++ b/av1/common/idct.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_IDCT_H_
+#define VP10_COMMON_IDCT_H_
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "av1/common/blockd.h"
+#include "av1/common/common.h"
+#include "av1/common/enums.h"
+#include "aom_dsp/inv_txfm.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct INV_TXFM_PARAM {
+ TX_TYPE tx_type;
+ TX_SIZE tx_size;
+ int eob;
+ int lossless;
+#if CONFIG_VP9_HIGHBITDEPTH
+ int bd;
+#endif
+} INV_TXFM_PARAM;
+
+typedef void (*transform_1d)(const tran_low_t *, tran_low_t *);
+
+typedef struct {
+ transform_1d cols, rows; // vertical and horizontal
+} transform_2d;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef void (*highbd_transform_1d)(const tran_low_t *, tran_low_t *, int bd);
+
+typedef struct {
+ highbd_transform_1d cols, rows; // vertical and horizontal
+} highbd_transform_2d;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#define MAX_TX_SCALE 1
+int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
+ const TX_SIZE tx_size);
+
+void vp10_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob);
+void vp10_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob);
+void vp10_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob);
+void vp10_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob);
+void vp10_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob);
+
+void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type, int lossless);
+#if CONFIG_EXT_TX
+void vp10_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type);
+void vp10_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type);
+#endif // CONFIG_EXT_TX
+void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type);
+void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type);
+void vp10_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, TX_TYPE tx_type);
+void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
+ INV_TXFM_PARAM *inv_txfm_param);
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, int bd);
+void vp10_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, int bd);
+void vp10_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
+ int eob, int bd);
+void vp10_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd);
+void vp10_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd);
+void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd, TX_TYPE tx_type,
+ int lossless);
+#if CONFIG_EXT_TX
+void vp10_highbd_inv_txfm_add_4x8(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd, TX_TYPE tx_type);
+void vp10_highbd_inv_txfm_add_8x4(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd, TX_TYPE tx_type);
+#endif // CONFIG_EXT_TX
+void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd, TX_TYPE tx_type);
+void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type);
+void vp10_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
+ int stride, int eob, int bd,
+ TX_TYPE tx_type);
+void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
+ INV_TXFM_PARAM *inv_txfm_param);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_IDCT_H_
diff --git a/av1/common/intra_filters.h b/av1/common/intra_filters.h
new file mode 100644
index 0000000..021fb8e
--- /dev/null
+++ b/av1/common/intra_filters.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_INTRA_FILTERS_H_
+#define VP10_COMMON_INTRA_FILTERS_H_
+
+#define FILTER_INTRA_PREC_BITS (10)
+
+static int filter_intra_taps_4[TX_SIZES][INTRA_MODES][4] = {
+ {
+ { 735, 881, -537, -54 },
+ { 1005, 519, -488, -11 },
+ { 383, 990, -343, -6 },
+ { 442, 805, -542, 319 },
+ { 658, 616, -133, -116 },
+ { 875, 442, -141, -151 },
+ { 386, 741, -23, -80 },
+ { 390, 1027, -446, 51 },
+ { 679, 606, -523, 262 },
+ { 903, 922, -778, -23 },
+ },
+ {
+ { 648, 803, -444, 16 },
+ { 972, 620, -576, 7 },
+ { 561, 967, -499, -5 },
+ { 585, 762, -468, 144 },
+ { 596, 619, -182, -9 },
+ { 895, 459, -176, -153 },
+ { 557, 722, -126, -129 },
+ { 601, 839, -523, 105 },
+ { 562, 709, -499, 251 },
+ { 803, 872, -695, 43 },
+ },
+ {
+ { 423, 728, -347, 111 },
+ { 963, 685, -665, 23 },
+ { 281, 1024, -480, 216 },
+ { 640, 596, -437, 78 },
+ { 429, 669, -259, 99 },
+ { 740, 646, -415, 23 },
+ { 568, 771, -346, 40 },
+ { 404, 833, -486, 209 },
+ { 398, 712, -423, 307 },
+ { 939, 935, -887, 17 },
+ },
+ {
+ { 477, 737, -393, 150 },
+ { 881, 630, -546, 67 },
+ { 506, 984, -443, -20 },
+ { 114, 459, -270, 528 },
+ { 433, 528, 14, 3 },
+ { 837, 470, -301, -30 },
+ { 181, 777, 89, -107 },
+ { -29, 716, -232, 259 },
+ { 589, 646, -495, 255 },
+ { 740, 884, -728, 77 },
+ },
+};
+
+#endif // VP10_COMMON_INTRA_FILTERS_H_
diff --git a/av1/common/loopfilter.c b/av1/common/loopfilter.c
new file mode 100644
index 0000000..e4636a5
--- /dev/null
+++ b/av1/common/loopfilter.c
@@ -0,0 +1,1697 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "av1/common/loopfilter.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/restoration.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+
+#include "av1/common/seg_common.h"
+
+// 64 bit masks for left transform size. Each 1 represents a position where
+// we should apply a loop filter across the left border of an 8x8 block
+// boundary.
+//
+// In the case of TX_16X16-> ( in low order byte first we end up with
+// a mask that looks like this
+//
+// 10101010
+// 10101010
+// 10101010
+// 10101010
+// 10101010
+// 10101010
+// 10101010
+// 10101010
+//
+// A loopfilter should be applied to every other 8x8 horizontally.
+static const uint64_t left_64x64_txform_mask[TX_SIZES] = {
+ 0xffffffffffffffffULL, // TX_4X4
+ 0xffffffffffffffffULL, // TX_8x8
+ 0x5555555555555555ULL, // TX_16x16
+ 0x1111111111111111ULL, // TX_32x32
+};
+
+// 64 bit masks for above transform size. Each 1 represents a position where
+// we should apply a loop filter across the top border of an 8x8 block
+// boundary.
+//
+// In the case of TX_32x32 -> ( in low order byte first we end up with
+// a mask that looks like this
+//
+// 11111111
+// 00000000
+// 00000000
+// 00000000
+// 11111111
+// 00000000
+// 00000000
+// 00000000
+//
+// A loopfilter should be applied to every other 4 the row vertically.
+static const uint64_t above_64x64_txform_mask[TX_SIZES] = {
+ 0xffffffffffffffffULL, // TX_4X4
+ 0xffffffffffffffffULL, // TX_8x8
+ 0x00ff00ff00ff00ffULL, // TX_16x16
+ 0x000000ff000000ffULL, // TX_32x32
+};
+
+// 64 bit masks for prediction sizes (left). Each 1 represents a position
+// where left border of an 8x8 block. These are aligned to the right most
+// appropriate bit, and then shifted into place.
+//
+// In the case of TX_16x32 -> ( low order byte first ) we end up with
+// a mask that looks like this :
+//
+// 10000000
+// 10000000
+// 10000000
+// 10000000
+// 00000000
+// 00000000
+// 00000000
+// 00000000
+static const uint64_t left_prediction_mask[BLOCK_SIZES] = {
+ 0x0000000000000001ULL, // BLOCK_4X4,
+ 0x0000000000000001ULL, // BLOCK_4X8,
+ 0x0000000000000001ULL, // BLOCK_8X4,
+ 0x0000000000000001ULL, // BLOCK_8X8,
+ 0x0000000000000101ULL, // BLOCK_8X16,
+ 0x0000000000000001ULL, // BLOCK_16X8,
+ 0x0000000000000101ULL, // BLOCK_16X16,
+ 0x0000000001010101ULL, // BLOCK_16X32,
+ 0x0000000000000101ULL, // BLOCK_32X16,
+ 0x0000000001010101ULL, // BLOCK_32X32,
+ 0x0101010101010101ULL, // BLOCK_32X64,
+ 0x0000000001010101ULL, // BLOCK_64X32,
+ 0x0101010101010101ULL, // BLOCK_64X64
+};
+
+// 64 bit mask to shift and set for each prediction size.
+static const uint64_t above_prediction_mask[BLOCK_SIZES] = {
+ 0x0000000000000001ULL, // BLOCK_4X4
+ 0x0000000000000001ULL, // BLOCK_4X8
+ 0x0000000000000001ULL, // BLOCK_8X4
+ 0x0000000000000001ULL, // BLOCK_8X8
+ 0x0000000000000001ULL, // BLOCK_8X16,
+ 0x0000000000000003ULL, // BLOCK_16X8
+ 0x0000000000000003ULL, // BLOCK_16X16
+ 0x0000000000000003ULL, // BLOCK_16X32,
+ 0x000000000000000fULL, // BLOCK_32X16,
+ 0x000000000000000fULL, // BLOCK_32X32,
+ 0x000000000000000fULL, // BLOCK_32X64,
+ 0x00000000000000ffULL, // BLOCK_64X32,
+ 0x00000000000000ffULL, // BLOCK_64X64
+};
+// 64 bit mask to shift and set for each prediction size. A bit is set for
+// each 8x8 block that would be in the left most block of the given block
+// size in the 64x64 block.
+static const uint64_t size_mask[BLOCK_SIZES] = {
+ 0x0000000000000001ULL, // BLOCK_4X4
+ 0x0000000000000001ULL, // BLOCK_4X8
+ 0x0000000000000001ULL, // BLOCK_8X4
+ 0x0000000000000001ULL, // BLOCK_8X8
+ 0x0000000000000101ULL, // BLOCK_8X16,
+ 0x0000000000000003ULL, // BLOCK_16X8
+ 0x0000000000000303ULL, // BLOCK_16X16
+ 0x0000000003030303ULL, // BLOCK_16X32,
+ 0x0000000000000f0fULL, // BLOCK_32X16,
+ 0x000000000f0f0f0fULL, // BLOCK_32X32,
+ 0x0f0f0f0f0f0f0f0fULL, // BLOCK_32X64,
+ 0x00000000ffffffffULL, // BLOCK_64X32,
+ 0xffffffffffffffffULL, // BLOCK_64X64
+};
+
+// These are used for masking the left and above borders.
+static const uint64_t left_border = 0x1111111111111111ULL;
+static const uint64_t above_border = 0x000000ff000000ffULL;
+
+// 16 bit masks for uv transform sizes.
+static const uint16_t left_64x64_txform_mask_uv[TX_SIZES] = {
+ 0xffff, // TX_4X4
+ 0xffff, // TX_8x8
+ 0x5555, // TX_16x16
+ 0x1111, // TX_32x32
+};
+
+static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = {
+ 0xffff, // TX_4X4
+ 0xffff, // TX_8x8
+ 0x0f0f, // TX_16x16
+ 0x000f, // TX_32x32
+};
+
+// 16 bit left mask to shift and set for each uv prediction size.
+static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = {
+ 0x0001, // BLOCK_4X4,
+ 0x0001, // BLOCK_4X8,
+ 0x0001, // BLOCK_8X4,
+ 0x0001, // BLOCK_8X8,
+ 0x0001, // BLOCK_8X16,
+ 0x0001, // BLOCK_16X8,
+ 0x0001, // BLOCK_16X16,
+ 0x0011, // BLOCK_16X32,
+ 0x0001, // BLOCK_32X16,
+ 0x0011, // BLOCK_32X32,
+ 0x1111, // BLOCK_32X64
+ 0x0011, // BLOCK_64X32,
+ 0x1111, // BLOCK_64X64
+};
+// 16 bit above mask to shift and set for uv each prediction size.
+static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = {
+ 0x0001, // BLOCK_4X4
+ 0x0001, // BLOCK_4X8
+ 0x0001, // BLOCK_8X4
+ 0x0001, // BLOCK_8X8
+ 0x0001, // BLOCK_8X16,
+ 0x0001, // BLOCK_16X8
+ 0x0001, // BLOCK_16X16
+ 0x0001, // BLOCK_16X32,
+ 0x0003, // BLOCK_32X16,
+ 0x0003, // BLOCK_32X32,
+ 0x0003, // BLOCK_32X64,
+ 0x000f, // BLOCK_64X32,
+ 0x000f, // BLOCK_64X64
+};
+
+// 64 bit mask to shift and set for each uv prediction size
+static const uint16_t size_mask_uv[BLOCK_SIZES] = {
+ 0x0001, // BLOCK_4X4
+ 0x0001, // BLOCK_4X8
+ 0x0001, // BLOCK_8X4
+ 0x0001, // BLOCK_8X8
+ 0x0001, // BLOCK_8X16,
+ 0x0001, // BLOCK_16X8
+ 0x0001, // BLOCK_16X16
+ 0x0011, // BLOCK_16X32,
+ 0x0003, // BLOCK_32X16,
+ 0x0033, // BLOCK_32X32,
+ 0x3333, // BLOCK_32X64,
+ 0x00ff, // BLOCK_64X32,
+ 0xffff, // BLOCK_64X64
+};
+static const uint16_t left_border_uv = 0x1111;
+static const uint16_t above_border_uv = 0x000f;
+
+static const int mode_lf_lut[MB_MODE_COUNT] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
+ 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0)
+#if CONFIG_EXT_INTER
+ ,
+ 1, // NEWFROMNEARMV mode
+ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (ZERO_ZEROMV == 0)
+#endif // CONFIG_EXT_INTER
+};
+
+static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
+ int lvl;
+
+ // For each possible value for the loop filter fill out limits
+ for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
+ // Set loop filter parameters that control sharpness.
+ int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
+
+ if (sharpness_lvl > 0) {
+ if (block_inside_limit > (9 - sharpness_lvl))
+ block_inside_limit = (9 - sharpness_lvl);
+ }
+
+ if (block_inside_limit < 1) block_inside_limit = 1;
+
+ memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
+ memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
+ SIMD_WIDTH);
+ }
+}
+
+static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
+ const MB_MODE_INFO *mbmi) {
+#if CONFIG_SUPERTX
+ const int segment_id = VPXMIN(mbmi->segment_id, mbmi->segment_id_supertx);
+ assert(
+ IMPLIES(supertx_enabled(mbmi), mbmi->segment_id_supertx != MAX_SEGMENTS));
+ assert(IMPLIES(supertx_enabled(mbmi),
+ mbmi->segment_id_supertx <= mbmi->segment_id));
+#else
+ const int segment_id = mbmi->segment_id;
+#endif // CONFIG_SUPERTX
+ return lfi_n->lvl[segment_id][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
+}
+
+void vp10_loop_filter_init(VP10_COMMON *cm) {
+ loop_filter_info_n *lfi = &cm->lf_info;
+ struct loopfilter *lf = &cm->lf;
+ int lvl;
+
+ // init limits for given sharpness
+ update_sharpness(lfi, lf->sharpness_level);
+ lf->last_sharpness_level = lf->sharpness_level;
+
+ // init hev threshold const vectors
+ for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
+ memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
+}
+
+void vp10_loop_filter_frame_init(VP10_COMMON *cm, int default_filt_lvl) {
+ int seg_id;
+ // n_shift is the multiplier for lf_deltas
+ // the multiplier is 1 for when filter_lvl is between 0 and 31;
+ // 2 when filter_lvl is between 32 and 63
+ const int scale = 1 << (default_filt_lvl >> 5);
+ loop_filter_info_n *const lfi = &cm->lf_info;
+ struct loopfilter *const lf = &cm->lf;
+ const struct segmentation *const seg = &cm->seg;
+
+ // update limits if sharpness has changed
+ if (lf->last_sharpness_level != lf->sharpness_level) {
+ update_sharpness(lfi, lf->sharpness_level);
+ lf->last_sharpness_level = lf->sharpness_level;
+ }
+
+ for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
+ int lvl_seg = default_filt_lvl;
+ if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
+ const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
+ lvl_seg = clamp(
+ seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data, 0,
+ MAX_LOOP_FILTER);
+ }
+
+ if (!lf->mode_ref_delta_enabled) {
+ // we could get rid of this if we assume that deltas are set to
+ // zero when not in use; encoder always uses deltas
+ memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
+ } else {
+ int ref, mode;
+ const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
+ lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
+
+ for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) {
+ for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
+ const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
+ lf->mode_deltas[mode] * scale;
+ lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
+ }
+ }
+ }
+ }
+}
+
+static void filter_selectively_vert_row2(int subsampling_factor, uint8_t *s,
+ int pitch, unsigned int mask_16x16_l,
+ unsigned int mask_8x8_l,
+ unsigned int mask_4x4_l,
+ unsigned int mask_4x4_int_l,
+ const loop_filter_info_n *lfi_n,
+ const uint8_t *lfl) {
+ const int mask_shift = subsampling_factor ? 4 : 8;
+ const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
+ const int lfl_forward = subsampling_factor ? 4 : 8;
+
+ unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
+ unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
+ unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
+ unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
+ unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
+ unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
+ unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
+ unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
+ unsigned int mask;
+
+ for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
+ mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
+ mask; mask >>= 1) {
+ const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
+ const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
+
+ if (mask & 1) {
+ if ((mask_16x16_0 | mask_16x16_1) & 1) {
+ if ((mask_16x16_0 & mask_16x16_1) & 1) {
+ vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr);
+ } else if (mask_16x16_0 & 1) {
+ vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+ } else {
+ vpx_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
+ }
+ }
+
+ if ((mask_8x8_0 | mask_8x8_1) & 1) {
+ if ((mask_8x8_0 & mask_8x8_1) & 1) {
+ vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
+ } else if (mask_8x8_0 & 1) {
+ vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+ } else {
+ vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
+ }
+ }
+
+ if ((mask_4x4_0 | mask_4x4_1) & 1) {
+ if ((mask_4x4_0 & mask_4x4_1) & 1) {
+ vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
+ } else if (mask_4x4_0 & 1) {
+ vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
+ } else {
+ vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
+ }
+ }
+
+ if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
+ if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
+ vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
+ } else if (mask_4x4_int_0 & 1) {
+ vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr);
+ } else {
+ vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr);
+ }
+ }
+ }
+
+ s += 8;
+ lfl += 1;
+ mask_16x16_0 >>= 1;
+ mask_8x8_0 >>= 1;
+ mask_4x4_0 >>= 1;
+ mask_4x4_int_0 >>= 1;
+ mask_16x16_1 >>= 1;
+ mask_8x8_1 >>= 1;
+ mask_4x4_1 >>= 1;
+ mask_4x4_int_1 >>= 1;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_filter_selectively_vert_row2(
+ int subsampling_factor, uint16_t *s, int pitch, unsigned int mask_16x16_l,
+ unsigned int mask_8x8_l, unsigned int mask_4x4_l,
+ unsigned int mask_4x4_int_l, const loop_filter_info_n *lfi_n,
+ const uint8_t *lfl, int bd) {
+ const int mask_shift = subsampling_factor ? 4 : 8;
+ const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
+ const int lfl_forward = subsampling_factor ? 4 : 8;
+
+ unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
+ unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
+ unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
+ unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
+ unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
+ unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
+ unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
+ unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
+ unsigned int mask;
+
+ for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
+ mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
+ mask; mask >>= 1) {
+ const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
+ const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
+
+ if (mask & 1) {
+ if ((mask_16x16_0 | mask_16x16_1) & 1) {
+ if ((mask_16x16_0 & mask_16x16_1) & 1) {
+ vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, bd);
+ } else if (mask_16x16_0 & 1) {
+ vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, bd);
+ } else {
+ vpx_highbd_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim,
+ lfi1->lim, lfi1->hev_thr, bd);
+ }
+ }
+
+ if ((mask_8x8_0 | mask_8x8_1) & 1) {
+ if ((mask_8x8_0 & mask_8x8_1) & 1) {
+ vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr, bd);
+ } else if (mask_8x8_0 & 1) {
+ vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, bd);
+ } else {
+ vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
+ lfi1->lim, lfi1->hev_thr, bd);
+ }
+ }
+
+ if ((mask_4x4_0 | mask_4x4_1) & 1) {
+ if ((mask_4x4_0 & mask_4x4_1) & 1) {
+ vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr, bd);
+ } else if (mask_4x4_0 & 1) {
+ vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, bd);
+ } else {
+ vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
+ lfi1->lim, lfi1->hev_thr, bd);
+ }
+ }
+
+ if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
+ if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
+ vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, lfi1->mblim, lfi1->lim,
+ lfi1->hev_thr, bd);
+ } else if (mask_4x4_int_0 & 1) {
+ vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
+ lfi0->hev_thr, bd);
+ } else {
+ vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
+ lfi1->lim, lfi1->hev_thr, bd);
+ }
+ }
+ }
+
+ s += 8;
+ lfl += 1;
+ mask_16x16_0 >>= 1;
+ mask_8x8_0 >>= 1;
+ mask_4x4_0 >>= 1;
+ mask_4x4_int_0 >>= 1;
+ mask_16x16_1 >>= 1;
+ mask_8x8_1 >>= 1;
+ mask_4x4_1 >>= 1;
+ mask_4x4_int_1 >>= 1;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static void filter_selectively_horiz(
+ uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
+ unsigned int mask_4x4, unsigned int mask_4x4_int,
+ const loop_filter_info_n *lfi_n, const uint8_t *lfl) {
+ unsigned int mask;
+ int count;
+
+ for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
+ mask >>= count) {
+ const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+
+ count = 1;
+ if (mask & 1) {
+ if (mask_16x16 & 1) {
+ if ((mask_16x16 & 3) == 3) {
+ vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
+ count = 2;
+ } else {
+ vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
+ }
+ } else if (mask_8x8 & 1) {
+ if ((mask_8x8 & 3) == 3) {
+ // Next block's thresholds.
+ const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+
+ vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, lfin->mblim, lfin->lim,
+ lfin->hev_thr);
+
+ if ((mask_4x4_int & 3) == 3) {
+ vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, lfin->mblim,
+ lfin->lim, lfin->hev_thr);
+ } else {
+ if (mask_4x4_int & 1)
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
+ else if (mask_4x4_int & 2)
+ vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ lfin->lim, lfin->hev_thr);
+ }
+ count = 2;
+ } else {
+ vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+
+ if (mask_4x4_int & 1)
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
+ }
+ } else if (mask_4x4 & 1) {
+ if ((mask_4x4 & 3) == 3) {
+ // Next block's thresholds.
+ const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+
+ vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, lfin->mblim, lfin->lim,
+ lfin->hev_thr);
+ if ((mask_4x4_int & 3) == 3) {
+ vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, lfin->mblim,
+ lfin->lim, lfin->hev_thr);
+ } else {
+ if (mask_4x4_int & 1)
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
+ else if (mask_4x4_int & 2)
+ vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ lfin->lim, lfin->hev_thr);
+ }
+ count = 2;
+ } else {
+ vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+
+ if (mask_4x4_int & 1)
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
+ }
+ } else if (mask_4x4_int & 1) {
+ vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
+ }
+ }
+ s += 8 * count;
+ lfl += count;
+ mask_16x16 >>= count;
+ mask_8x8 >>= count;
+ mask_4x4 >>= count;
+ mask_4x4_int >>= count;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_filter_selectively_horiz(
+ uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
+ unsigned int mask_4x4, unsigned int mask_4x4_int,
+ const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) {
+ unsigned int mask;
+ int count;
+
+ for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
+ mask >>= count) {
+ const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+
+ count = 1;
+ if (mask & 1) {
+ if (mask_16x16 & 1) {
+ if ((mask_16x16 & 3) == 3) {
+ vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, bd);
+ count = 2;
+ } else {
+ vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, bd);
+ }
+ } else if (mask_8x8 & 1) {
+ if ((mask_8x8 & 3) == 3) {
+ // Next block's thresholds.
+ const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+
+ vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, lfin->mblim, lfin->lim,
+ lfin->hev_thr, bd);
+
+ if ((mask_4x4_int & 3) == 3) {
+ vpx_highbd_lpf_horizontal_4_dual(
+ s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+ lfin->mblim, lfin->lim, lfin->hev_thr, bd);
+ } else {
+ if (mask_4x4_int & 1) {
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, bd);
+ } else if (mask_4x4_int & 2) {
+ vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ lfin->lim, lfin->hev_thr, bd);
+ }
+ }
+ count = 2;
+ } else {
+ vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, bd);
+
+ if (mask_4x4_int & 1) {
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, bd);
+ }
+ }
+ } else if (mask_4x4 & 1) {
+ if ((mask_4x4 & 3) == 3) {
+ // Next block's thresholds.
+ const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
+
+ vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, lfin->mblim, lfin->lim,
+ lfin->hev_thr, bd);
+ if ((mask_4x4_int & 3) == 3) {
+ vpx_highbd_lpf_horizontal_4_dual(
+ s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+ lfin->mblim, lfin->lim, lfin->hev_thr, bd);
+ } else {
+ if (mask_4x4_int & 1) {
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, bd);
+ } else if (mask_4x4_int & 2) {
+ vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
+ lfin->lim, lfin->hev_thr, bd);
+ }
+ }
+ count = 2;
+ } else {
+ vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, bd);
+
+ if (mask_4x4_int & 1) {
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
+ lfi->lim, lfi->hev_thr, bd);
+ }
+ }
+ } else if (mask_4x4_int & 1) {
+ vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, bd);
+ }
+ }
+ s += 8 * count;
+ lfl += count;
+ mask_16x16 >>= count;
+ mask_8x8 >>= count;
+ mask_4x4 >>= count;
+ mask_4x4_int >>= count;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+// This function ors into the current lfm structure, where to do loop
+// filters for the specific mi we are looking at. It uses information
+// including the block_size_type (32x16, 32x32, etc.), the transform size,
+// whether there were any coefficients encoded, and the loop filter strength
+// block we are currently looking at. Shift is used to position the
+// 1's we produce.
+// TODO(JBB) Need another function for different resolution color..
+static void build_masks(const loop_filter_info_n *const lfi_n,
+ const MODE_INFO *mi, const int shift_y,
+ const int shift_uv, LOOP_FILTER_MASK *lfm) {
+ const MB_MODE_INFO *mbmi = &mi->mbmi;
+ const BLOCK_SIZE block_size = mbmi->sb_type;
+ // TODO(debargha): Check if masks can be setup correctly when
+ // rectangular transfroms are used with the EXT_TX expt.
+ const TX_SIZE tx_size_y = txsize_sqr_up_map[mbmi->tx_size];
+ const TX_SIZE tx_size_uv =
+ get_uv_tx_size_impl(mbmi->tx_size, block_size, 1, 1);
+ const int filter_level = get_filter_level(lfi_n, mbmi);
+ uint64_t *const left_y = &lfm->left_y[tx_size_y];
+ uint64_t *const above_y = &lfm->above_y[tx_size_y];
+ uint64_t *const int_4x4_y = &lfm->int_4x4_y;
+ uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
+ uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
+ uint16_t *const int_4x4_uv = &lfm->left_int_4x4_uv;
+ int i;
+
+ // If filter level is 0 we don't loop filter.
+ if (!filter_level) {
+ return;
+ } else {
+ const int w = num_8x8_blocks_wide_lookup[block_size];
+ const int h = num_8x8_blocks_high_lookup[block_size];
+ const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
+ const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
+
+ for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w);
+ }
+
+ // These set 1 in the current block size for the block size edges.
+ // For instance if the block size is 32x16, we'll set:
+ // above = 1111
+ // 0000
+ // and
+ // left = 1000
+ // = 1000
+ // NOTE : In this example the low bit is left most ( 1000 ) is stored as
+ // 1, not 8...
+ //
+ // U and V set things on a 16 bit scale.
+ //
+ *above_y |= above_prediction_mask[block_size] << shift_y;
+ *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
+ *left_y |= left_prediction_mask[block_size] << shift_y;
+ *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
+
+ // If the block has no coefficients and is not intra we skip applying
+ // the loop filter on block edges.
+ if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) return;
+
+ // Here we are adding a mask for the transform size. The transform
+ // size mask is set to be correct for a 64x64 prediction block size. We
+ // mask to match the size of the block we are working on and then shift it
+ // into place..
+ *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y])
+ << shift_y;
+ *above_uv |=
+ (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv])
+ << shift_uv;
+
+ *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y])
+ << shift_y;
+ *left_uv |= (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv])
+ << shift_uv;
+
+ // Here we are trying to determine what to do with the internal 4x4 block
+ // boundaries. These differ from the 4x4 boundaries on the outside edge of
+ // an 8x8 in that the internal ones can be skipped and don't depend on
+ // the prediction block size.
+ if (tx_size_y == TX_4X4)
+ *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
+
+ if (tx_size_uv == TX_4X4)
+ *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
+}
+
+// This function does the same thing as the one above with the exception that
+// it only affects the y masks. It exists because for blocks < 16x16 in size,
+// we only update u and v masks on the first block.
+static void build_y_mask(const loop_filter_info_n *const lfi_n,
+ const MODE_INFO *mi, const int shift_y,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif // CONFIG_SUPERTX
+ LOOP_FILTER_MASK *lfm) {
+ const MB_MODE_INFO *mbmi = &mi->mbmi;
+ const TX_SIZE tx_size_y = txsize_sqr_up_map[mbmi->tx_size];
+#if CONFIG_SUPERTX
+ const BLOCK_SIZE block_size =
+ supertx_enabled ? (BLOCK_SIZE)(3 * tx_size_y) : mbmi->sb_type;
+#else
+ const BLOCK_SIZE block_size = mbmi->sb_type;
+#endif
+ const int filter_level = get_filter_level(lfi_n, mbmi);
+ uint64_t *const left_y = &lfm->left_y[tx_size_y];
+ uint64_t *const above_y = &lfm->above_y[tx_size_y];
+ uint64_t *const int_4x4_y = &lfm->int_4x4_y;
+ int i;
+
+ if (!filter_level) {
+ return;
+ } else {
+ const int w = num_8x8_blocks_wide_lookup[block_size];
+ const int h = num_8x8_blocks_high_lookup[block_size];
+ const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
+ const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
+
+ for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w);
+ }
+
+ *above_y |= above_prediction_mask[block_size] << shift_y;
+ *left_y |= left_prediction_mask[block_size] << shift_y;
+
+ if ((mbmi->skip || mbmi->has_no_coeffs) && is_inter_block(mbmi)) return;
+
+ *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y])
+ << shift_y;
+
+ *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y])
+ << shift_y;
+
+ if (tx_size_y == TX_4X4)
+ *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
+}
+
+// This function sets up the bit masks for the entire 64x64 region represented
+// by mi_row, mi_col.
+// TODO(JBB): This function only works for yv12.
+void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col,
+ MODE_INFO **mi, const int mode_info_stride,
+ LOOP_FILTER_MASK *lfm) {
+ int idx_32, idx_16, idx_8;
+ const loop_filter_info_n *const lfi_n = &cm->lf_info;
+ MODE_INFO **mip = mi;
+ MODE_INFO **mip2 = mi;
+
+ // These are offsets to the next mi in the 64x64 block. It is what gets
+ // added to the mi ptr as we go through each loop. It helps us to avoid
+ // setting up special row and column counters for each index. The last step
+ // brings us out back to the starting position.
+ const int offset_32[] = { 4, (mode_info_stride << 2) - 4, 4,
+ -(mode_info_stride << 2) - 4 };
+ const int offset_16[] = { 2, (mode_info_stride << 1) - 2, 2,
+ -(mode_info_stride << 1) - 2 };
+ const int offset[] = { 1, mode_info_stride - 1, 1, -mode_info_stride - 1 };
+
+ // Following variables represent shifts to position the current block
+ // mask over the appropriate block. A shift of 36 to the left will move
+ // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left
+ // 4 rows to the appropriate spot.
+ const int shift_32_y[] = { 0, 4, 32, 36 };
+ const int shift_16_y[] = { 0, 2, 16, 18 };
+ const int shift_8_y[] = { 0, 1, 8, 9 };
+ const int shift_32_uv[] = { 0, 2, 8, 10 };
+ const int shift_16_uv[] = { 0, 1, 4, 5 };
+ int i;
+ const int max_rows = VPXMIN(cm->mi_rows - mi_row, MAX_MIB_SIZE);
+ const int max_cols = VPXMIN(cm->mi_cols - mi_col, MAX_MIB_SIZE);
+#if CONFIG_EXT_PARTITION
+ assert(0 && "Not yet updated");
+#endif // CONFIG_EXT_PARTITION
+
+ vp10_zero(*lfm);
+ assert(mip[0] != NULL);
+
+ // TODO(jimbankoski): Try moving most of the following code into decode
+ // loop and storing lfm in the mbmi structure so that we don't have to go
+ // through the recursive loop structure multiple times.
+ switch (mip[0]->mbmi.sb_type) {
+ case BLOCK_64X64: build_masks(lfi_n, mip[0], 0, 0, lfm); break;
+ case BLOCK_64X32:
+ build_masks(lfi_n, mip[0], 0, 0, lfm);
+ mip2 = mip + mode_info_stride * 4;
+ if (4 >= max_rows) break;
+ build_masks(lfi_n, mip2[0], 32, 8, lfm);
+ break;
+ case BLOCK_32X64:
+ build_masks(lfi_n, mip[0], 0, 0, lfm);
+ mip2 = mip + 4;
+ if (4 >= max_cols) break;
+ build_masks(lfi_n, mip2[0], 4, 2, lfm);
+ break;
+ default:
+ for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
+ const int shift_y = shift_32_y[idx_32];
+ const int shift_uv = shift_32_uv[idx_32];
+ const int mi_32_col_offset = ((idx_32 & 1) << 2);
+ const int mi_32_row_offset = ((idx_32 >> 1) << 2);
+ if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
+ continue;
+ switch (mip[0]->mbmi.sb_type) {
+ case BLOCK_32X32:
+ build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ break;
+ case BLOCK_32X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+#if CONFIG_SUPERTX
+ if (supertx_enabled(&mip[0]->mbmi)) break;
+#endif
+ if (mi_32_row_offset + 2 >= max_rows) continue;
+ mip2 = mip + mode_info_stride * 2;
+ build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm);
+ break;
+ case BLOCK_16X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+#if CONFIG_SUPERTX
+ if (supertx_enabled(&mip[0]->mbmi)) break;
+#endif
+ if (mi_32_col_offset + 2 >= max_cols) continue;
+ mip2 = mip + 2;
+ build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm);
+ break;
+ default:
+#if CONFIG_SUPERTX
+ if (mip[0]->mbmi.tx_size == TX_32X32) {
+ build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ break;
+ }
+#endif
+ for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
+ const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16];
+ const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16];
+ const int mi_16_col_offset =
+ mi_32_col_offset + ((idx_16 & 1) << 1);
+ const int mi_16_row_offset =
+ mi_32_row_offset + ((idx_16 >> 1) << 1);
+
+ if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows)
+ continue;
+
+ switch (mip[0]->mbmi.sb_type) {
+ case BLOCK_16X16:
+ build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ break;
+ case BLOCK_16X8:
+#if CONFIG_SUPERTX
+ if (supertx_enabled(&mip[0]->mbmi)) break;
+#endif
+ build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ if (mi_16_row_offset + 1 >= max_rows) continue;
+ mip2 = mip + mode_info_stride;
+ build_y_mask(lfi_n, mip2[0], shift_y + 8,
+#if CONFIG_SUPERTX
+ 0,
+#endif
+ lfm);
+ break;
+ case BLOCK_8X16:
+#if CONFIG_SUPERTX
+ if (supertx_enabled(&mip[0]->mbmi)) break;
+#endif
+ build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ if (mi_16_col_offset + 1 >= max_cols) continue;
+ mip2 = mip + 1;
+ build_y_mask(lfi_n, mip2[0], shift_y + 1,
+#if CONFIG_SUPERTX
+ 0,
+#endif
+ lfm);
+ break;
+ default: {
+ const int shift_y =
+ shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0];
+#if CONFIG_SUPERTX
+ if (mip[0]->mbmi.tx_size == TX_16X16) {
+ build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ break;
+ }
+#endif
+ build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
+ mip += offset[0];
+ for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
+ const int shift_y = shift_32_y[idx_32] +
+ shift_16_y[idx_16] + shift_8_y[idx_8];
+ const int mi_8_col_offset =
+ mi_16_col_offset + ((idx_8 & 1));
+ const int mi_8_row_offset =
+ mi_16_row_offset + ((idx_8 >> 1));
+
+ if (mi_8_col_offset >= max_cols ||
+ mi_8_row_offset >= max_rows)
+ continue;
+ build_y_mask(lfi_n, mip[0], shift_y,
+#if CONFIG_SUPERTX
+ supertx_enabled(&mip[0]->mbmi),
+#endif
+ lfm);
+ }
+ break;
+ }
+ }
+ }
+ break;
+ }
+ }
+ break;
+ }
+ // The largest loopfilter we have is 16x16 so we use the 16x16 mask
+ // for 32x32 transforms also.
+ lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
+ lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
+ lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
+ lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];
+
+ // We do at least 8 tap filter on every 32x32 even if the transform size
+ // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
+ // remove it from the 4x4.
+ lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
+ lfm->left_y[TX_4X4] &= ~left_border;
+ lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
+ lfm->above_y[TX_4X4] &= ~above_border;
+ lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
+ lfm->left_uv[TX_4X4] &= ~left_border_uv;
+ lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
+ lfm->above_uv[TX_4X4] &= ~above_border_uv;
+
+ // We do some special edge handling.
+ if (mi_row + MAX_MIB_SIZE > cm->mi_rows) {
+ const uint64_t rows = cm->mi_rows - mi_row;
+
+ // Each pixel inside the border gets a 1,
+ const uint64_t mask_y = (((uint64_t)1 << (rows << MAX_MIB_SIZE_LOG2)) - 1);
+ const uint16_t mask_uv =
+ (((uint16_t)1 << (((rows + 1) >> 1) << (MAX_MIB_SIZE_LOG2 - 1))) - 1);
+
+ // Remove values completely outside our border.
+ for (i = 0; i < TX_32X32; i++) {
+ lfm->left_y[i] &= mask_y;
+ lfm->above_y[i] &= mask_y;
+ lfm->left_uv[i] &= mask_uv;
+ lfm->above_uv[i] &= mask_uv;
+ }
+ lfm->int_4x4_y &= mask_y;
+ lfm->above_int_4x4_uv = lfm->left_int_4x4_uv & mask_uv;
+
+ // We don't apply a wide loop filter on the last uv block row. If set
+ // apply the shorter one instead.
+ if (rows == 1) {
+ lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
+ lfm->above_uv[TX_16X16] = 0;
+ }
+ if (rows == 5) {
+ lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
+ lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
+ }
+ }
+
+ if (mi_col + MAX_MIB_SIZE > cm->mi_cols) {
+ const uint64_t columns = cm->mi_cols - mi_col;
+
+ // Each pixel inside the border gets a 1, the multiply copies the border
+ // to where we need it.
+ const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL;
+ const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;
+
+ // Internal edges are not applied on the last column of the image so
+ // we mask 1 more for the internal edges
+ const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;
+
+ // Remove the bits outside the image edge.
+ for (i = 0; i < TX_32X32; i++) {
+ lfm->left_y[i] &= mask_y;
+ lfm->above_y[i] &= mask_y;
+ lfm->left_uv[i] &= mask_uv;
+ lfm->above_uv[i] &= mask_uv;
+ }
+ lfm->int_4x4_y &= mask_y;
+ lfm->left_int_4x4_uv &= mask_uv_int;
+
+ // We don't apply a wide loop filter on the last uv column. If set
+ // apply the shorter one instead.
+ if (columns == 1) {
+ lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
+ lfm->left_uv[TX_16X16] = 0;
+ }
+ if (columns == 5) {
+ lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
+ lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
+ }
+ }
+ // We don't apply a loop filter on the first column in the image, mask that
+ // out.
+ if (mi_col == 0) {
+ for (i = 0; i < TX_32X32; i++) {
+ lfm->left_y[i] &= 0xfefefefefefefefeULL;
+ lfm->left_uv[i] &= 0xeeee;
+ }
+ }
+
+ // Assert if we try to apply 2 different loop filters at the same position.
+ assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8]));
+ assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4]));
+ assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4]));
+ assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16]));
+ assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_8X8]));
+ assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
+ assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
+ assert(!(lfm->left_int_4x4_uv & lfm->left_uv[TX_16X16]));
+ assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
+ assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
+ assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
+ assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16]));
+ assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
+ assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
+ assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
+ assert(!(lfm->above_int_4x4_uv & lfm->above_uv[TX_16X16]));
+}
+
+static void filter_selectively_vert(
+ uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
+ unsigned int mask_4x4, unsigned int mask_4x4_int,
+ const loop_filter_info_n *lfi_n, const uint8_t *lfl) {
+ unsigned int mask;
+
+ for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
+ mask >>= 1) {
+ const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+
+ if (mask & 1) {
+ if (mask_16x16 & 1) {
+ vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+ } else if (mask_8x8 & 1) {
+ vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+ } else if (mask_4x4 & 1) {
+ vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+ }
+ }
+ if (mask_4x4_int & 1)
+ vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
+ s += 8;
+ lfl += 1;
+ mask_16x16 >>= 1;
+ mask_8x8 >>= 1;
+ mask_4x4 >>= 1;
+ mask_4x4_int >>= 1;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_filter_selectively_vert(
+ uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
+ unsigned int mask_4x4, unsigned int mask_4x4_int,
+ const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) {
+ unsigned int mask;
+
+ for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
+ mask >>= 1) {
+ const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
+
+ if (mask & 1) {
+ if (mask_16x16 & 1) {
+ vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+ bd);
+ } else if (mask_8x8 & 1) {
+ vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+ bd);
+ } else if (mask_4x4 & 1) {
+ vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
+ bd);
+ }
+ }
+ if (mask_4x4_int & 1)
+ vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, bd);
+ s += 8;
+ lfl += 1;
+ mask_16x16 >>= 1;
+ mask_8x8 >>= 1;
+ mask_4x4 >>= 1;
+ mask_4x4_int >>= 1;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_filter_block_plane_non420(VP10_COMMON *cm,
+ struct macroblockd_plane *plane,
+ MODE_INFO **mib, int mi_row, int mi_col) {
+ const int ss_x = plane->subsampling_x;
+ const int ss_y = plane->subsampling_y;
+ const int row_step = 1 << ss_y;
+ const int col_step = 1 << ss_x;
+ struct buf_2d *const dst = &plane->dst;
+ uint8_t *const dst0 = dst->buf;
+ unsigned int mask_16x16[MAX_MIB_SIZE] = { 0 };
+ unsigned int mask_8x8[MAX_MIB_SIZE] = { 0 };
+ unsigned int mask_4x4[MAX_MIB_SIZE] = { 0 };
+ unsigned int mask_4x4_int[MAX_MIB_SIZE] = { 0 };
+ uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE];
+ int r, c;
+
+ for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) {
+ unsigned int mask_16x16_c = 0;
+ unsigned int mask_8x8_c = 0;
+ unsigned int mask_4x4_c = 0;
+ unsigned int border_mask;
+
+ // Determine the vertical edges that need filtering
+ for (c = 0; c < cm->mib_size && mi_col + c < cm->mi_cols; c += col_step) {
+ const MODE_INFO *mi = mib[c];
+ const MB_MODE_INFO *mbmi = &mi[0].mbmi;
+ const BLOCK_SIZE sb_type = mbmi->sb_type;
+ const int skip_this = mbmi->skip && is_inter_block(mbmi);
+ const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
+ const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
+
+ // left edge of current unit is block/partition edge -> no skip
+ const int block_edge_left =
+ (num_4x4_blocks_wide_lookup[sb_type] > 1) ? !blk_col : 1;
+ const int skip_this_c = skip_this && !block_edge_left;
+ // top edge of current unit is block/partition edge -> no skip
+ const int block_edge_above =
+ (num_4x4_blocks_high_lookup[sb_type] > 1) ? !blk_row : 1;
+ const int skip_this_r = skip_this && !block_edge_above;
+
+#if CONFIG_VAR_TX
+ TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
+ ? get_uv_tx_size(mbmi, plane)
+ : mbmi->tx_size;
+#else
+ const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
+ ? get_uv_tx_size(mbmi, plane)
+ : mbmi->tx_size;
+#endif
+
+ const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
+ const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
+
+ TX_SIZE tx_size_c = num_4x4_blocks_wide_txsize_log2_lookup[tx_size];
+ TX_SIZE tx_size_r = num_4x4_blocks_high_txsize_log2_lookup[tx_size];
+
+ int tx_size_mask = 0;
+ // Filter level can vary per MI
+ if (!(lfl[r][c >> ss_x] = get_filter_level(&cm->lf_info, mbmi))) continue;
+
+ if (tx_size == TX_32X32)
+ tx_size_mask = 3;
+ else if (tx_size == TX_16X16)
+ tx_size_mask = 1;
+ else
+ tx_size_mask = 0;
+
+#if CONFIG_VAR_TX
+ if (is_inter_block(mbmi) && !mbmi->skip)
+ tx_size =
+ (plane->plane_type == PLANE_TYPE_UV)
+ ? get_uv_tx_size_impl(mbmi->inter_tx_size[blk_row][blk_col],
+ sb_type, ss_x, ss_y)
+ : mbmi->inter_tx_size[blk_row][blk_col];
+
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ tx_size_r =
+ VPXMIN(txsize_horz_map[tx_size], cm->above_txfm_context[mi_col + c]);
+ tx_size_c = VPXMIN(txsize_vert_map[tx_size],
+ cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK]);
+
+ cm->above_txfm_context[mi_col + c] = txsize_horz_map[tx_size];
+ cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK] =
+ txsize_vert_map[tx_size];
+#else
+ tx_size_r = VPXMIN(tx_size, cm->above_txfm_context[mi_col + c]);
+ tx_size_c =
+ VPXMIN(tx_size, cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK]);
+
+ cm->above_txfm_context[mi_col + c] = tx_size;
+ cm->left_txfm_context[(mi_row + r) & MAX_MIB_MASK] = tx_size;
+#endif
+#endif
+
+ // Build masks based on the transform size of each block
+ // handle vertical mask
+ if (tx_size_c == TX_32X32) {
+ if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
+ if (!skip_border_4x4_c)
+ mask_16x16_c |= 1 << (c >> ss_x);
+ else
+ mask_8x8_c |= 1 << (c >> ss_x);
+ }
+ } else if (tx_size_c == TX_16X16) {
+ if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
+ if (!skip_border_4x4_c)
+ mask_16x16_c |= 1 << (c >> ss_x);
+ else
+ mask_8x8_c |= 1 << (c >> ss_x);
+ }
+ } else {
+ // force 8x8 filtering on 32x32 boundaries
+ if (!skip_this_c && ((c >> ss_x) & tx_size_mask) == 0) {
+ if (tx_size_c == TX_8X8 || ((c >> ss_x) & 3) == 0)
+ mask_8x8_c |= 1 << (c >> ss_x);
+ else
+ mask_4x4_c |= 1 << (c >> ss_x);
+ }
+
+ if (!skip_this && tx_size_c < TX_8X8 && !skip_border_4x4_c &&
+ ((c >> ss_x) & tx_size_mask) == 0)
+ mask_4x4_int[r] |= 1 << (c >> ss_x);
+ }
+
+ // set horizontal mask
+ if (tx_size_r == TX_32X32) {
+ if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
+ if (!skip_border_4x4_r)
+ mask_16x16[r] |= 1 << (c >> ss_x);
+ else
+ mask_8x8[r] |= 1 << (c >> ss_x);
+ }
+ } else if (tx_size_r == TX_16X16) {
+ if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
+ if (!skip_border_4x4_r)
+ mask_16x16[r] |= 1 << (c >> ss_x);
+ else
+ mask_8x8[r] |= 1 << (c >> ss_x);
+ }
+ } else {
+ // force 8x8 filtering on 32x32 boundaries
+ if (!skip_this_r && ((r >> ss_y) & tx_size_mask) == 0) {
+ if (tx_size_r == TX_8X8 || ((r >> ss_y) & 3) == 0)
+ mask_8x8[r] |= 1 << (c >> ss_x);
+ else
+ mask_4x4[r] |= 1 << (c >> ss_x);
+ }
+
+ if (!skip_this && tx_size_r < TX_8X8 && !skip_border_4x4_c &&
+ ((r >> ss_y) & tx_size_mask) == 0)
+ mask_4x4_int[r] |= 1 << (c >> ss_x);
+ }
+ }
+
+ // Disable filtering on the leftmost column
+ border_mask = ~(mi_col == 0);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ highbd_filter_selectively_vert(
+ CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
+ mask_16x16_c & border_mask, mask_8x8_c & border_mask,
+ mask_4x4_c & border_mask, mask_4x4_int[r], &cm->lf_info, &lfl[r][0],
+ (int)cm->bit_depth);
+ } else {
+ filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask,
+ mask_8x8_c & border_mask,
+ mask_4x4_c & border_mask, mask_4x4_int[r],
+ &cm->lf_info, &lfl[r][0]);
+ }
+#else
+ filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask,
+ mask_8x8_c & border_mask, mask_4x4_c & border_mask,
+ mask_4x4_int[r], &cm->lf_info, &lfl[r][0]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ dst->buf += MI_SIZE * dst->stride;
+ mib += row_step * cm->mi_stride;
+ }
+
+ // Now do horizontal pass
+ dst->buf = dst0;
+ for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) {
+ const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
+ const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
+
+ unsigned int mask_16x16_r;
+ unsigned int mask_8x8_r;
+ unsigned int mask_4x4_r;
+
+ if (mi_row + r == 0) {
+ mask_16x16_r = 0;
+ mask_8x8_r = 0;
+ mask_4x4_r = 0;
+ } else {
+ mask_16x16_r = mask_16x16[r];
+ mask_8x8_r = mask_8x8[r];
+ mask_4x4_r = mask_4x4[r];
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
+ dst->stride, mask_16x16_r, mask_8x8_r,
+ mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+ &lfl[r][0], (int)cm->bit_depth);
+ } else {
+ filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+ mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+ &lfl[r][0]);
+ }
+#else
+ filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+ mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+ &lfl[r][0]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ dst->buf += MI_SIZE * dst->stride;
+ }
+}
+
+void vp10_filter_block_plane_ss00(VP10_COMMON *const cm,
+ struct macroblockd_plane *const plane,
+ int mi_row, LOOP_FILTER_MASK *lfm) {
+ struct buf_2d *const dst = &plane->dst;
+ uint8_t *const dst0 = dst->buf;
+ int r;
+ uint64_t mask_16x16 = lfm->left_y[TX_16X16];
+ uint64_t mask_8x8 = lfm->left_y[TX_8X8];
+ uint64_t mask_4x4 = lfm->left_y[TX_4X4];
+ uint64_t mask_4x4_int = lfm->int_4x4_y;
+
+ assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
+
+ // Vertical pass: do 2 rows at one time
+ for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
+ unsigned int mask_16x16_l = mask_16x16 & 0xffff;
+ unsigned int mask_8x8_l = mask_8x8 & 0xffff;
+ unsigned int mask_4x4_l = mask_4x4 & 0xffff;
+ unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
+
+// Disable filtering on the leftmost column.
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ highbd_filter_selectively_vert_row2(
+ plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
+ mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
+ &lfm->lfl_y[r][0], (int)cm->bit_depth);
+ } else {
+ filter_selectively_vert_row2(
+ plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
+ mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r][0]);
+ }
+#else
+ filter_selectively_vert_row2(
+ plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
+ mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r][0]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ dst->buf += 2 * MI_SIZE * dst->stride;
+ mask_16x16 >>= 2 * MI_SIZE;
+ mask_8x8 >>= 2 * MI_SIZE;
+ mask_4x4 >>= 2 * MI_SIZE;
+ mask_4x4_int >>= 2 * MI_SIZE;
+ }
+
+ // Horizontal pass
+ dst->buf = dst0;
+ mask_16x16 = lfm->above_y[TX_16X16];
+ mask_8x8 = lfm->above_y[TX_8X8];
+ mask_4x4 = lfm->above_y[TX_4X4];
+ mask_4x4_int = lfm->int_4x4_y;
+
+ for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r++) {
+ unsigned int mask_16x16_r;
+ unsigned int mask_8x8_r;
+ unsigned int mask_4x4_r;
+
+ if (mi_row + r == 0) {
+ mask_16x16_r = 0;
+ mask_8x8_r = 0;
+ mask_4x4_r = 0;
+ } else {
+ mask_16x16_r = mask_16x16 & 0xff;
+ mask_8x8_r = mask_8x8 & 0xff;
+ mask_4x4_r = mask_4x4 & 0xff;
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ highbd_filter_selectively_horiz(
+ CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
+ mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r][0],
+ (int)cm->bit_depth);
+ } else {
+ filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+ mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
+ &lfm->lfl_y[r][0]);
+ }
+#else
+ filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+ mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
+ &lfm->lfl_y[r][0]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ dst->buf += MI_SIZE * dst->stride;
+ mask_16x16 >>= MI_SIZE;
+ mask_8x8 >>= MI_SIZE;
+ mask_4x4 >>= MI_SIZE;
+ mask_4x4_int >>= MI_SIZE;
+ }
+}
+
+void vp10_filter_block_plane_ss11(VP10_COMMON *const cm,
+ struct macroblockd_plane *const plane,
+ int mi_row, LOOP_FILTER_MASK *lfm) {
+ struct buf_2d *const dst = &plane->dst;
+ uint8_t *const dst0 = dst->buf;
+ int r, c;
+
+ uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
+ uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
+ uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
+ uint16_t mask_4x4_int = lfm->left_int_4x4_uv;
+
+ assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
+ assert(plane->plane_type == PLANE_TYPE_UV);
+
+ // Vertical pass: do 2 rows at one time
+ for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 4) {
+ for (c = 0; c < (cm->mib_size >> 1); c++) {
+ lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1];
+ lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1];
+ }
+
+ {
+ unsigned int mask_16x16_l = mask_16x16 & 0xff;
+ unsigned int mask_8x8_l = mask_8x8 & 0xff;
+ unsigned int mask_4x4_l = mask_4x4 & 0xff;
+ unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
+
+// Disable filtering on the leftmost column.
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ highbd_filter_selectively_vert_row2(
+ plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
+ mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
+ &lfm->lfl_uv[r >> 1][0], (int)cm->bit_depth);
+ } else {
+ filter_selectively_vert_row2(plane->subsampling_x, dst->buf,
+ dst->stride, mask_16x16_l, mask_8x8_l,
+ mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
+ &lfm->lfl_uv[r >> 1][0]);
+ }
+#else
+ filter_selectively_vert_row2(
+ plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
+ mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_uv[r >> 1][0]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ dst->buf += 2 * MI_SIZE * dst->stride;
+ mask_16x16 >>= MI_SIZE;
+ mask_8x8 >>= MI_SIZE;
+ mask_4x4 >>= MI_SIZE;
+ mask_4x4_int >>= MI_SIZE;
+ }
+ }
+
+ // Horizontal pass
+ dst->buf = dst0;
+ mask_16x16 = lfm->above_uv[TX_16X16];
+ mask_8x8 = lfm->above_uv[TX_8X8];
+ mask_4x4 = lfm->above_uv[TX_4X4];
+ mask_4x4_int = lfm->above_int_4x4_uv;
+
+ for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
+ const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
+ const unsigned int mask_4x4_int_r =
+ skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf);
+ unsigned int mask_16x16_r;
+ unsigned int mask_8x8_r;
+ unsigned int mask_4x4_r;
+
+ if (mi_row + r == 0) {
+ mask_16x16_r = 0;
+ mask_8x8_r = 0;
+ mask_4x4_r = 0;
+ } else {
+ mask_16x16_r = mask_16x16 & 0xf;
+ mask_8x8_r = mask_8x8 & 0xf;
+ mask_4x4_r = mask_4x4 & 0xf;
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ highbd_filter_selectively_horiz(
+ CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
+ mask_4x4_r, mask_4x4_int_r, &cm->lf_info, &lfm->lfl_uv[r >> 1][0],
+ (int)cm->bit_depth);
+ } else {
+ filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+ mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+ &lfm->lfl_uv[r >> 1][0]);
+ }
+#else
+ filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
+ mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
+ &lfm->lfl_uv[r >> 1][0]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ dst->buf += MI_SIZE * dst->stride;
+ mask_16x16 >>= MI_SIZE / 2;
+ mask_8x8 >>= MI_SIZE / 2;
+ mask_4x4 >>= MI_SIZE / 2;
+ mask_4x4_int >>= MI_SIZE / 2;
+ }
+}
+
+void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP10_COMMON *cm,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
+ int start, int stop, int y_only) {
+#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
+ const int num_planes = y_only ? 1 : MAX_MB_PLANE;
+ int mi_row, mi_col;
+
+#if CONFIG_VAR_TX
+ memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols);
+#endif // CONFIG_VAR_TX
+ for (mi_row = start; mi_row < stop; mi_row += cm->mib_size) {
+ MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
+#if CONFIG_VAR_TX
+ memset(cm->left_txfm_context, TX_SIZES, MAX_MIB_SIZE);
+#endif // CONFIG_VAR_TX
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) {
+ int plane;
+
+ vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
+
+ for (plane = 0; plane < num_planes; ++plane)
+ vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, mi_row,
+ mi_col);
+ }
+ }
+#else
+ const int num_planes = y_only ? 1 : MAX_MB_PLANE;
+ int mi_row, mi_col;
+ enum lf_path path;
+ LOOP_FILTER_MASK lfm;
+
+ if (y_only)
+ path = LF_PATH_444;
+ else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
+ path = LF_PATH_420;
+ else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
+ path = LF_PATH_444;
+ else
+ path = LF_PATH_SLOW;
+
+ for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
+ MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
+ int plane;
+
+ vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
+
+ // TODO(JBB): Make setup_mask work for non 420.
+ vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
+
+ vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
+ for (plane = 1; plane < num_planes; ++plane) {
+ switch (path) {
+ case LF_PATH_420:
+ vp10_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
+ break;
+ case LF_PATH_444:
+ vp10_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
+ break;
+ case LF_PATH_SLOW:
+ vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
+ mi_row, mi_col);
+ break;
+ }
+ }
+ }
+ }
+#endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
+}
+
+void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+ MACROBLOCKD *xd, int frame_filter_level, int y_only,
+ int partial_frame) {
+ int start_mi_row, end_mi_row, mi_rows_to_filter;
+ if (!frame_filter_level) return;
+ start_mi_row = 0;
+ mi_rows_to_filter = cm->mi_rows;
+ if (partial_frame && cm->mi_rows > 8) {
+ start_mi_row = cm->mi_rows >> 1;
+ start_mi_row &= 0xfffffff8;
+ mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
+ }
+ end_mi_row = start_mi_row + mi_rows_to_filter;
+ vp10_loop_filter_frame_init(cm, frame_filter_level);
+ vp10_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only);
+}
+
+void vp10_loop_filter_data_reset(
+ LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer,
+ struct VP10Common *cm,
+ const struct macroblockd_plane planes[MAX_MB_PLANE]) {
+ lf_data->frame_buffer = frame_buffer;
+ lf_data->cm = cm;
+ lf_data->start = 0;
+ lf_data->stop = 0;
+ lf_data->y_only = 0;
+ memcpy(lf_data->planes, planes, sizeof(lf_data->planes));
+}
+
+int vp10_loop_filter_worker(LFWorkerData *const lf_data, void *unused) {
+ (void)unused;
+ vp10_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+ lf_data->start, lf_data->stop, lf_data->y_only);
+ return 1;
+}
diff --git a/av1/common/loopfilter.h b/av1/common/loopfilter.h
new file mode 100644
index 0000000..b85ed04
--- /dev/null
+++ b/av1/common/loopfilter.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_LOOPFILTER_H_
+#define VP10_COMMON_LOOPFILTER_H_
+
+#include "aom_ports/mem.h"
+#include "./vpx_config.h"
+
+#include "av1/common/blockd.h"
+#include "av1/common/restoration.h"
+#include "av1/common/seg_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_LOOP_FILTER 63
+#define MAX_SHARPNESS 7
+
+#define SIMD_WIDTH 16
+
+#define MAX_MODE_LF_DELTAS 2
+
+enum lf_path {
+ LF_PATH_420,
+ LF_PATH_444,
+ LF_PATH_SLOW,
+};
+
+struct loopfilter {
+ int filter_level;
+
+ int sharpness_level;
+ int last_sharpness_level;
+
+ uint8_t mode_ref_delta_enabled;
+ uint8_t mode_ref_delta_update;
+
+ // 0 = Intra, Last, Last2+Last3(CONFIG_EXT_REFS),
+ // GF, BRF(CONFIG_EXT_REFS), ARF
+ signed char ref_deltas[TOTAL_REFS_PER_FRAME];
+ signed char last_ref_deltas[TOTAL_REFS_PER_FRAME];
+
+ // 0 = ZERO_MV, MV
+ signed char mode_deltas[MAX_MODE_LF_DELTAS];
+ signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
+};
+
+// Need to align this structure so when it is declared and
+// passed it can be loaded into vector registers.
+typedef struct {
+ DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]);
+ DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]);
+ DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]);
+} loop_filter_thresh;
+
+typedef struct {
+ loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1];
+ uint8_t lvl[MAX_SEGMENTS][TOTAL_REFS_PER_FRAME][MAX_MODE_LF_DELTAS];
+} loop_filter_info_n;
+
+// This structure holds bit masks for all 8x8 blocks in a 64x64 region.
+// Each 1 bit represents a position in which we want to apply the loop filter.
+// Left_ entries refer to whether we apply a filter on the border to the
+// left of the block. Above_ entries refer to whether or not to apply a
+// filter on the above border. Int_ entries refer to whether or not to
+// apply borders on the 4x4 edges within the 8x8 block that each bit
+// represents.
+// Since each transform is accompanied by a potentially different type of
+// loop filter there is a different entry in the array for each transform size.
+typedef struct {
+ uint64_t left_y[TX_SIZES];
+ uint64_t above_y[TX_SIZES];
+ uint64_t int_4x4_y;
+ uint16_t left_uv[TX_SIZES];
+ uint16_t above_uv[TX_SIZES];
+ uint16_t left_int_4x4_uv;
+ uint16_t above_int_4x4_uv;
+ uint8_t lfl_y[MAX_MIB_SIZE][MAX_MIB_SIZE];
+ uint8_t lfl_uv[MAX_MIB_SIZE / 2][MAX_MIB_SIZE / 2];
+} LOOP_FILTER_MASK;
+
+/* assorted loopfilter functions which get used elsewhere */
+struct VP10Common;
+struct macroblockd;
+struct VP10LfSyncData;
+
+// This function sets up the bit masks for the entire 64x64 region represented
+// by mi_row, mi_col.
+void vp10_setup_mask(struct VP10Common *const cm, const int mi_row,
+ const int mi_col, MODE_INFO **mi_8x8,
+ const int mode_info_stride, LOOP_FILTER_MASK *lfm);
+
+void vp10_filter_block_plane_ss00(struct VP10Common *const cm,
+ struct macroblockd_plane *const plane,
+ int mi_row, LOOP_FILTER_MASK *lfm);
+
+void vp10_filter_block_plane_ss11(struct VP10Common *const cm,
+ struct macroblockd_plane *const plane,
+ int mi_row, LOOP_FILTER_MASK *lfm);
+
+void vp10_filter_block_plane_non420(struct VP10Common *cm,
+ struct macroblockd_plane *plane,
+ MODE_INFO **mi_8x8, int mi_row, int mi_col);
+
+void vp10_loop_filter_init(struct VP10Common *cm);
+
+// Update the loop filter for the current frame.
+// This should be called before vp10_loop_filter_rows(),
+// vp10_loop_filter_frame()
+// calls this function directly.
+void vp10_loop_filter_frame_init(struct VP10Common *cm, int default_filt_lvl);
+
+void vp10_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct VP10Common *cm,
+ struct macroblockd *mbd, int filter_level,
+ int y_only, int partial_frame);
+
+// Apply the loop filter to [start, stop) macro block rows in frame_buffer.
+void vp10_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
+ struct VP10Common *cm,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
+ int start, int stop, int y_only);
+
+typedef struct LoopFilterWorkerData {
+ YV12_BUFFER_CONFIG *frame_buffer;
+ struct VP10Common *cm;
+ struct macroblockd_plane planes[MAX_MB_PLANE];
+
+ int start;
+ int stop;
+ int y_only;
+} LFWorkerData;
+
+void vp10_loop_filter_data_reset(
+ LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer,
+ struct VP10Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]);
+
+// Operates on the rows described by 'lf_data'.
+int vp10_loop_filter_worker(LFWorkerData *const lf_data, void *unused);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_LOOPFILTER_H_
diff --git a/av1/common/mips/dspr2/itrans16_dspr2.c b/av1/common/mips/dspr2/itrans16_dspr2.c
new file mode 100644
index 0000000..c0b9b2a
--- /dev/null
+++ b/av1/common/mips/dspr2/itrans16_dspr2.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#include "./vp10_rtcd.h"
+#include "av1/common/common.h"
+#include "av1/common/blockd.h"
+#include "av1/common/idct.h"
+#include "aom_dsp/mips/inv_txfm_dspr2.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_ports/mem.h"
+
+#if HAVE_DSPR2
+void vp10_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, int pitch,
+ int tx_type) {
+ int i, j;
+ DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
+ int16_t *outptr = out;
+ int16_t temp_out[16];
+ uint32_t pos = 45;
+
+ /* bit positon for extract from acc */
+ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos));
+
+ switch (tx_type) {
+ case DCT_DCT: // DCT in both horizontal and vertical
+ idct16_rows_dspr2(input, outptr, 16);
+ idct16_cols_add_blk_dspr2(out, dest, pitch);
+ break;
+ case ADST_DCT: // ADST in vertical, DCT in horizontal
+ idct16_rows_dspr2(input, outptr, 16);
+
+ outptr = out;
+
+ for (i = 0; i < 16; ++i) {
+ iadst16_dspr2(outptr, temp_out);
+
+ for (j = 0; j < 16; ++j)
+ dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) +
+ dest[j * pitch + i]);
+ outptr += 16;
+ }
+ break;
+ case DCT_ADST: // DCT in vertical, ADST in horizontal
+ {
+ int16_t temp_in[16 * 16];
+
+ for (i = 0; i < 16; ++i) {
+ /* prefetch row */
+ prefetch_load((const uint8_t *)(input + 16));
+
+ iadst16_dspr2(input, outptr);
+ input += 16;
+ outptr += 16;
+ }
+
+ for (i = 0; i < 16; ++i)
+ for (j = 0; j < 16; ++j) temp_in[j * 16 + i] = out[i * 16 + j];
+
+ idct16_cols_add_blk_dspr2(temp_in, dest, pitch);
+ } break;
+ case ADST_ADST: // ADST in both directions
+ {
+ int16_t temp_in[16];
+
+ for (i = 0; i < 16; ++i) {
+ /* prefetch row */
+ prefetch_load((const uint8_t *)(input + 16));
+
+ iadst16_dspr2(input, outptr);
+ input += 16;
+ outptr += 16;
+ }
+
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+ iadst16_dspr2(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) +
+ dest[j * pitch + i]);
+ }
+ } break;
+ default: printf("vp10_short_iht16x16_add_dspr2 : Invalid tx_type\n"); break;
+ }
+}
+#endif // #if HAVE_DSPR2
diff --git a/av1/common/mips/dspr2/itrans4_dspr2.c b/av1/common/mips/dspr2/itrans4_dspr2.c
new file mode 100644
index 0000000..dcb28c9
--- /dev/null
+++ b/av1/common/mips/dspr2/itrans4_dspr2.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#include "./vp10_rtcd.h"
+#include "av1/common/common.h"
+#include "av1/common/blockd.h"
+#include "av1/common/idct.h"
+#include "aom_dsp/mips/inv_txfm_dspr2.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_ports/mem.h"
+
+#if HAVE_DSPR2
+void vp10_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest,
+ int dest_stride, int tx_type) {
+ int i, j;
+ DECLARE_ALIGNED(32, int16_t, out[4 * 4]);
+ int16_t *outptr = out;
+ int16_t temp_in[4 * 4], temp_out[4];
+ uint32_t pos = 45;
+
+ /* bit positon for extract from acc */
+ __asm__ __volatile__("wrdsp %[pos], 1 \n\t"
+ :
+ : [pos] "r"(pos));
+
+ switch (tx_type) {
+ case DCT_DCT: // DCT in both horizontal and vertical
+ vpx_idct4_rows_dspr2(input, outptr);
+ vpx_idct4_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ break;
+ case ADST_DCT: // ADST in vertical, DCT in horizontal
+ vpx_idct4_rows_dspr2(input, outptr);
+
+ outptr = out;
+
+ for (i = 0; i < 4; ++i) {
+ iadst4_dspr2(outptr, temp_out);
+
+ for (j = 0; j < 4; ++j)
+ dest[j * dest_stride + i] = clip_pixel(
+ ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]);
+
+ outptr += 4;
+ }
+ break;
+ case DCT_ADST: // DCT in vertical, ADST in horizontal
+ for (i = 0; i < 4; ++i) {
+ iadst4_dspr2(input, outptr);
+ input += 4;
+ outptr += 4;
+ }
+
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j) {
+ temp_in[i * 4 + j] = out[j * 4 + i];
+ }
+ }
+ vpx_idct4_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
+ break;
+ case ADST_ADST: // ADST in both directions
+ for (i = 0; i < 4; ++i) {
+ iadst4_dspr2(input, outptr);
+ input += 4;
+ outptr += 4;
+ }
+
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
+ iadst4_dspr2(temp_in, temp_out);
+
+ for (j = 0; j < 4; ++j)
+ dest[j * dest_stride + i] = clip_pixel(
+ ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * dest_stride + i]);
+ }
+ break;
+ default: printf("vp10_short_iht4x4_add_dspr2 : Invalid tx_type\n"); break;
+ }
+}
+#endif // #if HAVE_DSPR2
diff --git a/av1/common/mips/dspr2/itrans8_dspr2.c b/av1/common/mips/dspr2/itrans8_dspr2.c
new file mode 100644
index 0000000..761d6f0
--- /dev/null
+++ b/av1/common/mips/dspr2/itrans8_dspr2.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#include "./vp10_rtcd.h"
+#include "av1/common/common.h"
+#include "av1/common/blockd.h"
+#include "aom_dsp/mips/inv_txfm_dspr2.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_ports/mem.h"
+
+#if HAVE_DSPR2
+void vp10_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
+ int dest_stride, int tx_type) {
+ int i, j;
+ DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
+ int16_t *outptr = out;
+ int16_t temp_in[8 * 8], temp_out[8];
+ uint32_t pos = 45;
+
+ /* bit positon for extract from acc */
+ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos));
+
+ switch (tx_type) {
+ case DCT_DCT: // DCT in both horizontal and vertical
+ idct8_rows_dspr2(input, outptr, 8);
+ idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
+ break;
+ case ADST_DCT: // ADST in vertical, DCT in horizontal
+ idct8_rows_dspr2(input, outptr, 8);
+
+ for (i = 0; i < 8; ++i) {
+ iadst8_dspr2(&out[i * 8], temp_out);
+
+ for (j = 0; j < 8; ++j)
+ dest[j * dest_stride + i] = clip_pixel(
+ ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]);
+ }
+ break;
+ case DCT_ADST: // DCT in vertical, ADST in horizontal
+ for (i = 0; i < 8; ++i) {
+ iadst8_dspr2(input, outptr);
+ input += 8;
+ outptr += 8;
+ }
+
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j) {
+ temp_in[i * 8 + j] = out[j * 8 + i];
+ }
+ }
+ idct8_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
+ break;
+ case ADST_ADST: // ADST in both directions
+ for (i = 0; i < 8; ++i) {
+ iadst8_dspr2(input, outptr);
+ input += 8;
+ outptr += 8;
+ }
+
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+
+ iadst8_dspr2(temp_in, temp_out);
+
+ for (j = 0; j < 8; ++j)
+ dest[j * dest_stride + i] = clip_pixel(
+ ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * dest_stride + i]);
+ }
+ break;
+ default: printf("vp10_short_iht8x8_add_dspr2 : Invalid tx_type\n"); break;
+ }
+}
+#endif // #if HAVE_DSPR2
diff --git a/av1/common/mips/msa/idct16x16_msa.c b/av1/common/mips/msa/idct16x16_msa.c
new file mode 100644
index 0000000..baa3a97
--- /dev/null
+++ b/av1/common/mips/msa/idct16x16_msa.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/enums.h"
+#include "aom_dsp/mips/inv_txfm_msa.h"
+
+void vp10_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride, int32_t tx_type) {
+ int32_t i;
+ DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
+ int16_t *out_ptr = &out[0];
+
+ switch (tx_type) {
+ case DCT_DCT:
+ /* transform rows */
+ for (i = 0; i < 2; ++i) {
+ /* process 16 * 8 block */
+ vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+ }
+
+ /* transform columns */
+ for (i = 0; i < 2; ++i) {
+ /* process 8 * 16 block */
+ vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+ dst_stride);
+ }
+ break;
+ case ADST_DCT:
+ /* transform rows */
+ for (i = 0; i < 2; ++i) {
+ /* process 16 * 8 block */
+ vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+ }
+
+ /* transform columns */
+ for (i = 0; i < 2; ++i) {
+ vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
+ (dst + (i << 3)), dst_stride);
+ }
+ break;
+ case DCT_ADST:
+ /* transform rows */
+ for (i = 0; i < 2; ++i) {
+ /* process 16 * 8 block */
+ vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+ }
+
+ /* transform columns */
+ for (i = 0; i < 2; ++i) {
+ /* process 8 * 16 block */
+ vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+ dst_stride);
+ }
+ break;
+ case ADST_ADST:
+ /* transform rows */
+ for (i = 0; i < 2; ++i) {
+ /* process 16 * 8 block */
+ vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7)));
+ }
+
+ /* transform columns */
+ for (i = 0; i < 2; ++i) {
+ vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
+ (dst + (i << 3)), dst_stride);
+ }
+ break;
+ default: assert(0); break;
+ }
+}
diff --git a/av1/common/mips/msa/idct4x4_msa.c b/av1/common/mips/msa/idct4x4_msa.c
new file mode 100644
index 0000000..0620df7
--- /dev/null
+++ b/av1/common/mips/msa/idct4x4_msa.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/enums.h"
+#include "aom_dsp/mips/inv_txfm_msa.h"
+
+void vp10_iht4x4_16_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride, int32_t tx_type) {
+ v8i16 in0, in1, in2, in3;
+
+ /* load vector elements of 4x4 block */
+ LD4x4_SH(input, in0, in1, in2, in3);
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ /* DCT in horizontal */
+ VPX_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ /* DCT in vertical */
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VPX_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ case ADST_DCT:
+ /* DCT in horizontal */
+ VPX_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ /* ADST in vertical */
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VPX_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ case DCT_ADST:
+ /* ADST in horizontal */
+ VPX_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ /* DCT in vertical */
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VPX_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ case ADST_ADST:
+ /* ADST in horizontal */
+ VPX_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ /* ADST in vertical */
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VPX_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ default: assert(0); break;
+ }
+
+ /* final rounding (add 2^3, divide by 2^4) and shift */
+ SRARI_H4_SH(in0, in1, in2, in3, 4);
+ /* add block and store 4x4 */
+ ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
+}
diff --git a/av1/common/mips/msa/idct8x8_msa.c b/av1/common/mips/msa/idct8x8_msa.c
new file mode 100644
index 0000000..5c62c4a
--- /dev/null
+++ b/av1/common/mips/msa/idct8x8_msa.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/enums.h"
+#include "aom_dsp/mips/inv_txfm_msa.h"
+
+void vp10_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride, int32_t tx_type) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+ /* load vector elements of 8x8 block */
+ LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ /* DCT in horizontal */
+ VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+ /* DCT in vertical */
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+ in3, in4, in5, in6, in7);
+ VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+ break;
+ case ADST_DCT:
+ /* DCT in horizontal */
+ VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+ /* ADST in vertical */
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+ in3, in4, in5, in6, in7);
+ VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ break;
+ case DCT_ADST:
+ /* ADST in horizontal */
+ VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ /* DCT in vertical */
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+ in3, in4, in5, in6, in7);
+ VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+ break;
+ case ADST_ADST:
+ /* ADST in horizontal */
+ VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ /* ADST in vertical */
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+ in3, in4, in5, in6, in7);
+ VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ break;
+ default: assert(0); break;
+ }
+
+ /* final rounding (add 2^4, divide by 2^5) and shift */
+ SRARI_H4_SH(in0, in1, in2, in3, 5);
+ SRARI_H4_SH(in4, in5, in6, in7, 5);
+
+ /* add block and store 8x8 */
+ VPX_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
+ dst += (4 * dst_stride);
+ VPX_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
+}
diff --git a/av1/common/mv.h b/av1/common/mv.h
new file mode 100644
index 0000000..dba3336
--- /dev/null
+++ b/av1/common/mv.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_MV_H_
+#define VP10_COMMON_MV_H_
+
+#include "av1/common/common.h"
+#include "aom_dsp/vpx_filter.h"
+#if CONFIG_GLOBAL_MOTION
+#include "av1/common/warped_motion.h"
+#endif // CONFIG_GLOBAL_MOTION
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct mv {
+ int16_t row;
+ int16_t col;
+} MV;
+
+typedef union int_mv {
+ uint32_t as_int;
+ MV as_mv;
+} int_mv; /* facilitates faster equality tests and copies */
+
+typedef struct mv32 {
+ int32_t row;
+ int32_t col;
+} MV32;
+
+#if CONFIG_GLOBAL_MOTION
+// ALPHA here refers to parameters a and b in rotzoom model:
+// | a b|
+// |-b a|
+//
+// and a, b, c, d in affine model:
+// | a b|
+// | c d|
+//
+// Anything ending in PREC_BITS is the number of bits of precision
+// to maintain when converting from double to integer.
+//
+// The ABS parameters are used to create an upper and lower bound
+// for each parameter. In other words, after a parameter is integerized
+// it is clamped between -(1 << ABS_XXX_BITS) and (1 << ABS_XXX_BITS).
+//
+// XXX_PREC_DIFF and XXX_DECODE_FACTOR
+// are computed once here to prevent repetitive
+// computation on the decoder side. These are
+// to allow the global motion parameters to be encoded in a lower
+// precision than the warped model precision. This means that they
+// need to be changed to warped precision when they are decoded.
+//
+// XX_MIN, XX_MAX are also computed to avoid repeated computation
+
+#define GM_TRANS_PREC_BITS 5
+#define GM_TRANS_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_TRANS_PREC_BITS)
+#define GM_TRANS_DECODE_FACTOR (1 << GM_TRANS_PREC_DIFF)
+
+#define GM_ALPHA_PREC_BITS 5
+#define GM_ALPHA_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_ALPHA_PREC_BITS)
+#define GM_ALPHA_DECODE_FACTOR (1 << GM_ALPHA_PREC_DIFF)
+
+#define GM_ABS_ALPHA_BITS 8
+#define GM_ABS_TRANS_BITS 8
+
+#define GM_TRANS_MAX (1 << GM_ABS_TRANS_BITS)
+#define GM_ALPHA_MAX (1 << GM_ABS_ALPHA_BITS)
+#define GM_TRANS_MIN -GM_TRANS_MAX
+#define GM_ALPHA_MIN -GM_ALPHA_MAX
+
+typedef enum {
+ GLOBAL_ZERO = 0,
+ GLOBAL_TRANSLATION = 1,
+ GLOBAL_ROTZOOM = 2,
+ GLOBAL_AFFINE = 3,
+ GLOBAL_MOTION_TYPES
+} GLOBAL_MOTION_TYPE;
+
+typedef struct {
+ GLOBAL_MOTION_TYPE gmtype;
+ WarpedMotionParams motion_params;
+} Global_Motion_Params;
+
+static INLINE TransformationType gm_to_trans_type(GLOBAL_MOTION_TYPE gmtype) {
+ switch (gmtype) {
+ case GLOBAL_ZERO: return UNKNOWN_TRANSFORM; break;
+ case GLOBAL_TRANSLATION: return TRANSLATION; break;
+ case GLOBAL_ROTZOOM: return ROTZOOM; break;
+ case GLOBAL_AFFINE: return AFFINE; break;
+ default: assert(0);
+ }
+ return UNKNOWN_TRANSFORM;
+}
+
+static INLINE GLOBAL_MOTION_TYPE get_gmtype(const Global_Motion_Params *gm) {
+ if (gm->motion_params.wmmat[4] == 0 && gm->motion_params.wmmat[5] == 0) {
+ if (gm->motion_params.wmmat[2] == 0 && gm->motion_params.wmmat[3] == 0) {
+ return ((gm->motion_params.wmmat[0] | gm->motion_params.wmmat[1])
+ ? GLOBAL_TRANSLATION
+ : GLOBAL_ZERO);
+ } else {
+ return GLOBAL_ROTZOOM;
+ }
+ } else {
+ return GLOBAL_AFFINE;
+ }
+}
+#endif // CONFIG_GLOBAL_MOTION
+
+#if CONFIG_REF_MV
+typedef struct candidate_mv {
+ int_mv this_mv;
+ int_mv comp_mv;
+ int_mv pred_mv;
+ int weight;
+} CANDIDATE_MV;
+#endif
+
+static INLINE int is_zero_mv(const MV *mv) {
+ return *((const uint32_t *)mv) == 0;
+}
+
+static INLINE int is_equal_mv(const MV *a, const MV *b) {
+ return *((const uint32_t *)a) == *((const uint32_t *)b);
+}
+
+static INLINE void clamp_mv(MV *mv, int min_col, int max_col, int min_row,
+ int max_row) {
+ mv->col = clamp(mv->col, min_col, max_col);
+ mv->row = clamp(mv->row, min_row, max_row);
+}
+
+static INLINE int mv_has_subpel(const MV *mv) {
+ return (mv->row & SUBPEL_MASK) || (mv->col & SUBPEL_MASK);
+}
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_MV_H_
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c
new file mode 100644
index 0000000..836b065
--- /dev/null
+++ b/av1/common/mvref_common.c
@@ -0,0 +1,828 @@
+
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/mvref_common.h"
+
+#if CONFIG_REF_MV
+
+static uint8_t add_ref_mv_candidate(
+ const MODE_INFO *const candidate_mi, const MB_MODE_INFO *const candidate,
+ const MV_REFERENCE_FRAME rf[2], uint8_t *refmv_count,
+ CANDIDATE_MV *ref_mv_stack, const int use_hp, int len, int block, int col) {
+ const int weight = len;
+ int index = 0, ref;
+ int newmv_count = 0;
+
+ assert(2 * weight < REF_CAT_LEVEL);
+
+ if (rf[1] == NONE) {
+ // single reference frame
+ for (ref = 0; ref < 2; ++ref) {
+ if (candidate->ref_frame[ref] == rf[0]) {
+ int_mv this_refmv = get_sub_block_mv(candidate_mi, ref, col, block);
+ lower_mv_precision(&this_refmv.as_mv, use_hp);
+
+ for (index = 0; index < *refmv_count; ++index)
+ if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) break;
+
+ if (index < *refmv_count) ref_mv_stack[index].weight += 2 * weight;
+
+ // Add a new item to the list.
+ if (index == *refmv_count) {
+ ref_mv_stack[index].this_mv = this_refmv;
+ ref_mv_stack[index].pred_mv =
+ get_sub_block_pred_mv(candidate_mi, ref, col, block);
+ ref_mv_stack[index].weight = 2 * weight;
+ ++(*refmv_count);
+
+#if CONFIG_EXT_INTER
+ if (candidate->mode == NEWMV || candidate->mode == NEWFROMNEARMV)
+#else
+ if (candidate->mode == NEWMV)
+#endif // CONFIG_EXT_INTER
+ ++newmv_count;
+ }
+
+ if (candidate_mi->mbmi.sb_type < BLOCK_8X8 && block >= 0) {
+ int alt_block = 3 - block;
+ this_refmv = get_sub_block_mv(candidate_mi, ref, col, alt_block);
+ lower_mv_precision(&this_refmv.as_mv, use_hp);
+
+ for (index = 0; index < *refmv_count; ++index)
+ if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) break;
+
+ if (index < *refmv_count) ref_mv_stack[index].weight += weight;
+
+ // Add a new item to the list.
+ if (index == *refmv_count) {
+ ref_mv_stack[index].this_mv = this_refmv;
+ ref_mv_stack[index].pred_mv =
+ get_sub_block_pred_mv(candidate_mi, ref, col, alt_block);
+ ref_mv_stack[index].weight = weight;
+ ++(*refmv_count);
+
+#if CONFIG_EXT_INTER
+ if (candidate->mode == NEWMV || candidate->mode == NEWFROMNEARMV)
+#else
+ if (candidate->mode == NEWMV)
+#endif // CONFIG_EXT_INTER
+ ++newmv_count;
+ }
+ }
+ }
+ }
+ } else {
+ // compound reference frame
+ if (candidate->ref_frame[0] == rf[0] && candidate->ref_frame[1] == rf[1]) {
+ int_mv this_refmv[2];
+
+ for (ref = 0; ref < 2; ++ref) {
+ this_refmv[ref] = get_sub_block_mv(candidate_mi, ref, col, block);
+ lower_mv_precision(&this_refmv[ref].as_mv, use_hp);
+ }
+
+ for (index = 0; index < *refmv_count; ++index)
+ if ((ref_mv_stack[index].this_mv.as_int == this_refmv[0].as_int) &&
+ (ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int))
+ break;
+
+ if (index < *refmv_count) ref_mv_stack[index].weight += 2 * weight;
+
+ // Add a new item to the list.
+ if (index == *refmv_count) {
+ ref_mv_stack[index].this_mv = this_refmv[0];
+ ref_mv_stack[index].comp_mv = this_refmv[1];
+ ref_mv_stack[index].weight = 2 * weight;
+ ++(*refmv_count);
+
+#if CONFIG_EXT_INTER
+ if (candidate->mode == NEW_NEWMV)
+#else
+ if (candidate->mode == NEWMV)
+#endif // CONFIG_EXT_INTER
+ ++newmv_count;
+ }
+
+ if (candidate_mi->mbmi.sb_type < BLOCK_8X8 && block >= 0) {
+ int alt_block = 3 - block;
+ this_refmv[0] = get_sub_block_mv(candidate_mi, 0, col, alt_block);
+ this_refmv[1] = get_sub_block_mv(candidate_mi, 1, col, alt_block);
+
+ for (ref = 0; ref < 2; ++ref)
+ lower_mv_precision(&this_refmv[ref].as_mv, use_hp);
+
+ for (index = 0; index < *refmv_count; ++index)
+ if (ref_mv_stack[index].this_mv.as_int == this_refmv[0].as_int &&
+ ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int)
+ break;
+
+ if (index < *refmv_count) ref_mv_stack[index].weight += weight;
+
+ // Add a new item to the list.
+ if (index == *refmv_count) {
+ ref_mv_stack[index].this_mv = this_refmv[0];
+ ref_mv_stack[index].comp_mv = this_refmv[1];
+ ref_mv_stack[index].weight = weight;
+ ++(*refmv_count);
+
+#if CONFIG_EXT_INTER
+ if (candidate->mode == NEW_NEWMV)
+#else
+ if (candidate->mode == NEWMV)
+#endif // CONFIG_EXT_INTER
+ ++newmv_count;
+ }
+ }
+ }
+ }
+ return newmv_count;
+}
+
+static uint8_t scan_row_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ const int mi_row, const int mi_col, int block,
+ const MV_REFERENCE_FRAME rf[2], int row_offset,
+ CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count) {
+ const TileInfo *const tile = &xd->tile;
+ int i;
+ uint8_t newmv_count = 0;
+
+ for (i = 0; i < xd->n8_w && *refmv_count < MAX_REF_MV_STACK_SIZE;) {
+ POSITION mi_pos;
+ mi_pos.row = row_offset;
+ mi_pos.col = i;
+
+ if (is_inside(tile, mi_col, mi_row, &mi_pos)) {
+ const MODE_INFO *const candidate_mi =
+ xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ const int len =
+ VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[candidate->sb_type]);
+
+ newmv_count += add_ref_mv_candidate(
+ candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
+ cm->allow_high_precision_mv, len, block, mi_pos.col);
+ i += len;
+ } else {
+ ++i;
+ }
+ }
+
+ return newmv_count;
+}
+
+static uint8_t scan_col_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ const int mi_row, const int mi_col, int block,
+ const MV_REFERENCE_FRAME rf[2], int col_offset,
+ CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count) {
+ const TileInfo *const tile = &xd->tile;
+ int i;
+ uint8_t newmv_count = 0;
+
+ for (i = 0; i < xd->n8_h && *refmv_count < MAX_REF_MV_STACK_SIZE;) {
+ POSITION mi_pos;
+ mi_pos.row = i;
+ mi_pos.col = col_offset;
+
+ if (is_inside(tile, mi_col, mi_row, &mi_pos)) {
+ const MODE_INFO *const candidate_mi =
+ xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ const int len =
+ VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[candidate->sb_type]);
+
+ newmv_count += add_ref_mv_candidate(
+ candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
+ cm->allow_high_precision_mv, len, block, mi_pos.col);
+ i += len;
+ } else {
+ ++i;
+ }
+ }
+
+ return newmv_count;
+}
+
+static uint8_t scan_blk_mbmi(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ const int mi_row, const int mi_col, int block,
+ const MV_REFERENCE_FRAME rf[2], int row_offset,
+ int col_offset, CANDIDATE_MV *ref_mv_stack,
+ uint8_t *refmv_count) {
+ const TileInfo *const tile = &xd->tile;
+ POSITION mi_pos;
+ uint8_t newmv_count = 0;
+
+ mi_pos.row = row_offset;
+ mi_pos.col = col_offset;
+
+ if (is_inside(tile, mi_col, mi_row, &mi_pos) &&
+ *refmv_count < MAX_REF_MV_STACK_SIZE) {
+ const MODE_INFO *const candidate_mi =
+ xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ const int len = 1;
+
+ newmv_count += add_ref_mv_candidate(
+ candidate_mi, candidate, rf, refmv_count, ref_mv_stack,
+ cm->allow_high_precision_mv, len, block, mi_pos.col);
+ } // Analyze a single 8x8 block motion information.
+ return newmv_count;
+}
+
+static int has_top_right(const MACROBLOCKD *xd, int mi_row, int mi_col,
+ int bs) {
+ // In a split partition all apart from the bottom right has a top right
+ int has_tr = !((mi_row & bs) && (mi_col & bs));
+
+ // bs > 0 and bs is a power of 2
+ assert(bs > 0 && !(bs & (bs - 1)));
+
+ // For each 4x4 group of blocks, when the bottom right is decoded the blocks
+ // to the right have not been decoded therefore the bottom right does
+ // not have a top right
+ while (bs < MAX_MIB_SIZE) {
+ if (mi_col & bs) {
+ if ((mi_col & (2 * bs)) && (mi_row & (2 * bs))) {
+ has_tr = 0;
+ break;
+ }
+ } else {
+ break;
+ }
+ bs <<= 1;
+ }
+
+ // The left hand of two vertical rectangles always has a top right (as the
+ // block above will have been decoded)
+ if (xd->n8_w < xd->n8_h)
+ if (!xd->is_sec_rect) has_tr = 1;
+
+ // The bottom of two horizontal rectangles never has a top right (as the block
+ // to the right won't have been decoded)
+ if (xd->n8_w > xd->n8_h)
+ if (xd->is_sec_rect) has_tr = 0;
+
+#if CONFIG_EXT_PARTITION_TYPES
+ // The bottom left square of a Vertical A does not have a top right as it is
+ // decoded before the right hand rectangle of the partition
+ if (xd->mi[0]->mbmi.partition == PARTITION_VERT_A)
+ if ((mi_row & bs) && !(mi_col & bs)) has_tr = 0;
+#endif // CONFIG_EXT_PARTITION_TYPES
+
+ return has_tr;
+}
+
+static void handle_sec_rect_block(const MB_MODE_INFO *const candidate,
+ uint8_t refmv_count,
+ CANDIDATE_MV *ref_mv_stack,
+ MV_REFERENCE_FRAME ref_frame,
+ int16_t *mode_context) {
+ int rf, idx;
+
+ for (rf = 0; rf < 2; ++rf) {
+ if (candidate->ref_frame[rf] == ref_frame) {
+ const int list_range = VPXMIN(refmv_count, MAX_MV_REF_CANDIDATES);
+
+ const int_mv pred_mv = candidate->mv[rf];
+ for (idx = 0; idx < list_range; ++idx)
+ if (pred_mv.as_int == ref_mv_stack[idx].this_mv.as_int) break;
+
+ if (idx < list_range) {
+ if (idx == 0)
+ mode_context[ref_frame] |= (1 << SKIP_NEARESTMV_OFFSET);
+ else if (idx == 1)
+ mode_context[ref_frame] |= (1 << SKIP_NEARMV_OFFSET);
+ }
+ }
+ }
+}
+
+static void setup_ref_mv_list(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ MV_REFERENCE_FRAME ref_frame,
+ uint8_t *refmv_count, CANDIDATE_MV *ref_mv_stack,
+ int_mv *mv_ref_list, int block, int mi_row,
+ int mi_col, int16_t *mode_context) {
+ int idx, nearest_refmv_count = 0;
+ uint8_t newmv_count = 0;
+
+ CANDIDATE_MV tmp_mv;
+ int len, nr_len;
+
+ const MV_REF *const prev_frame_mvs_base =
+ cm->use_prev_frame_mvs
+ ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col
+ : NULL;
+
+ int bs = VPXMAX(xd->n8_w, xd->n8_h);
+ int has_tr = has_top_right(xd, mi_row, mi_col, bs);
+
+ MV_REFERENCE_FRAME rf[2];
+ vp10_set_ref_frame(rf, ref_frame);
+
+ mode_context[ref_frame] = 0;
+ *refmv_count = 0;
+
+ // Scan the first above row mode info.
+ newmv_count = scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -1,
+ ref_mv_stack, refmv_count);
+ // Scan the first left column mode info.
+ newmv_count += scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -1,
+ ref_mv_stack, refmv_count);
+
+ // Check top-right boundary
+ if (has_tr)
+ newmv_count += scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, -1, 1,
+ ref_mv_stack, refmv_count);
+
+ nearest_refmv_count = *refmv_count;
+
+ for (idx = 0; idx < nearest_refmv_count; ++idx) {
+ assert(ref_mv_stack[idx].weight > 0 &&
+ ref_mv_stack[idx].weight < REF_CAT_LEVEL);
+ ref_mv_stack[idx].weight += REF_CAT_LEVEL;
+ }
+
+ if (prev_frame_mvs_base && cm->show_frame && cm->last_show_frame &&
+ rf[1] == NONE) {
+ int ref;
+ int blk_row, blk_col;
+
+ for (blk_row = 0; blk_row < xd->n8_h; ++blk_row) {
+ for (blk_col = 0; blk_col < xd->n8_w; ++blk_col) {
+ const MV_REF *prev_frame_mvs =
+ prev_frame_mvs_base + blk_row * cm->mi_cols + blk_col;
+
+ POSITION mi_pos;
+ mi_pos.row = blk_row;
+ mi_pos.col = blk_col;
+
+ if (!is_inside(&xd->tile, mi_col, mi_row, &mi_pos)) continue;
+
+ for (ref = 0; ref < 2; ++ref) {
+ if (prev_frame_mvs->ref_frame[ref] == ref_frame) {
+ int_mv this_refmv = prev_frame_mvs->mv[ref];
+ lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv);
+
+ for (idx = 0; idx < *refmv_count; ++idx)
+ if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int) break;
+
+ if (idx < *refmv_count) ref_mv_stack[idx].weight += 2;
+
+ if (idx == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
+ ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
+ ref_mv_stack[idx].weight = 2;
+ ++(*refmv_count);
+
+ if (abs(ref_mv_stack[idx].this_mv.as_mv.row) >= 8 ||
+ abs(ref_mv_stack[idx].this_mv.as_mv.col) >= 8)
+ mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (*refmv_count == nearest_refmv_count)
+ mode_context[ref_frame] |= (1 << ZEROMV_OFFSET);
+
+ // Analyze the top-left corner block mode info.
+ // scan_blk_mbmi(cm, xd, mi_row, mi_col, block, ref_frame,
+ // -1, -1, ref_mv_stack, refmv_count);
+
+ // Scan the second outer area.
+ scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -2, ref_mv_stack,
+ refmv_count);
+ scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -2, ref_mv_stack,
+ refmv_count);
+
+ // Scan the third outer area.
+ scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -3, ref_mv_stack,
+ refmv_count);
+ scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -3, ref_mv_stack,
+ refmv_count);
+
+ // Scan the fourth outer area.
+ scan_row_mbmi(cm, xd, mi_row, mi_col, block, rf, -4, ref_mv_stack,
+ refmv_count);
+ // Scan the third left row mode info.
+ scan_col_mbmi(cm, xd, mi_row, mi_col, block, rf, -4, ref_mv_stack,
+ refmv_count);
+
+ switch (nearest_refmv_count) {
+ case 0:
+ mode_context[ref_frame] |= 0;
+ if (*refmv_count >= 1) mode_context[ref_frame] |= 1;
+
+ if (*refmv_count == 1)
+ mode_context[ref_frame] |= (1 << REFMV_OFFSET);
+ else if (*refmv_count >= 2)
+ mode_context[ref_frame] |= (2 << REFMV_OFFSET);
+ break;
+ case 1:
+ mode_context[ref_frame] |= (newmv_count > 0) ? 2 : 3;
+
+ if (*refmv_count == 1)
+ mode_context[ref_frame] |= (3 << REFMV_OFFSET);
+ else if (*refmv_count >= 2)
+ mode_context[ref_frame] |= (4 << REFMV_OFFSET);
+ break;
+
+ case 2:
+ default:
+ if (newmv_count >= 2)
+ mode_context[ref_frame] |= 4;
+ else if (newmv_count == 1)
+ mode_context[ref_frame] |= 5;
+ else
+ mode_context[ref_frame] |= 6;
+
+ mode_context[ref_frame] |= (5 << REFMV_OFFSET);
+ break;
+ }
+
+ // Rank the likelihood and assign nearest and near mvs.
+ len = nearest_refmv_count;
+ while (len > 0) {
+ nr_len = 0;
+ for (idx = 1; idx < len; ++idx) {
+ if (ref_mv_stack[idx - 1].weight < ref_mv_stack[idx].weight) {
+ tmp_mv = ref_mv_stack[idx - 1];
+ ref_mv_stack[idx - 1] = ref_mv_stack[idx];
+ ref_mv_stack[idx] = tmp_mv;
+ nr_len = idx;
+ }
+ }
+ len = nr_len;
+ }
+
+ len = *refmv_count;
+ while (len > nearest_refmv_count) {
+ nr_len = nearest_refmv_count;
+ for (idx = nearest_refmv_count + 1; idx < len; ++idx) {
+ if (ref_mv_stack[idx - 1].weight < ref_mv_stack[idx].weight) {
+ tmp_mv = ref_mv_stack[idx - 1];
+ ref_mv_stack[idx - 1] = ref_mv_stack[idx];
+ ref_mv_stack[idx] = tmp_mv;
+ nr_len = idx;
+ }
+ }
+ len = nr_len;
+ }
+
+ // TODO(jingning): Clean-up needed.
+ if (xd->is_sec_rect) {
+ if (xd->n8_w < xd->n8_h) {
+ const MODE_INFO *const candidate_mi = xd->mi[-1];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ handle_sec_rect_block(candidate, nearest_refmv_count, ref_mv_stack,
+ ref_frame, mode_context);
+ }
+
+ if (xd->n8_w > xd->n8_h) {
+ const MODE_INFO *const candidate_mi = xd->mi[-xd->mi_stride];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ handle_sec_rect_block(candidate, nearest_refmv_count, ref_mv_stack,
+ ref_frame, mode_context);
+ }
+ }
+
+ if (rf[1] > NONE) {
+ for (idx = 0; idx < *refmv_count; ++idx) {
+ clamp_mv_ref(&ref_mv_stack[idx].this_mv.as_mv, xd->n8_w << 3,
+ xd->n8_h << 3, xd);
+ clamp_mv_ref(&ref_mv_stack[idx].comp_mv.as_mv, xd->n8_w << 3,
+ xd->n8_h << 3, xd);
+ }
+ } else {
+ for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *refmv_count); ++idx) {
+ mv_ref_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
+ clamp_mv_ref(&mv_ref_list[idx].as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+ }
+ }
+}
+#endif
+
+// This function searches the neighbourhood of a given MB/SB
+// to try and find candidate reference vectors.
+static void find_mv_refs_idx(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+ int_mv *mv_ref_list, int block, int mi_row,
+ int mi_col, find_mv_refs_sync sync,
+ void *const data, int16_t *mode_context) {
+ const int *ref_sign_bias = cm->ref_frame_sign_bias;
+ int i, refmv_count = 0;
+ const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
+ int different_ref_found = 0;
+ int context_counter = 0;
+ const MV_REF *const prev_frame_mvs =
+ cm->use_prev_frame_mvs
+ ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col
+ : NULL;
+ const TileInfo *const tile = &xd->tile;
+ const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type] << 3;
+ const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type] << 3;
+
+ // The nearest 2 blocks are treated differently
+ // if the size < 8x8 we get the mv from the bmi substructure,
+ // and we also need to keep a mode count.
+ for (i = 0; i < 2; ++i) {
+ const POSITION *const mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, mv_ref)) {
+ const MODE_INFO *const candidate_mi =
+ xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ // Keep counts for entropy encoding.
+ context_counter += mode_2_counter[candidate->mode];
+ different_ref_found = 1;
+
+ if (candidate->ref_frame[0] == ref_frame)
+ ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block),
+ refmv_count, mv_ref_list, bw, bh, xd, Done);
+ else if (candidate->ref_frame[1] == ref_frame)
+ ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block),
+ refmv_count, mv_ref_list, bw, bh, xd, Done);
+ }
+ }
+
+ // Check the rest of the neighbors in much the same way
+ // as before except we don't need to keep track of sub blocks or
+ // mode counts.
+ for (; i < MVREF_NEIGHBOURS; ++i) {
+ const POSITION *const mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, mv_ref)) {
+ const MB_MODE_INFO *const candidate =
+ &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
+ different_ref_found = 1;
+
+ if (candidate->ref_frame[0] == ref_frame)
+ ADD_MV_REF_LIST(candidate->mv[0], refmv_count, mv_ref_list, bw, bh, xd,
+ Done);
+ else if (candidate->ref_frame[1] == ref_frame)
+ ADD_MV_REF_LIST(candidate->mv[1], refmv_count, mv_ref_list, bw, bh, xd,
+ Done);
+ }
+ }
+
+// TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
+// on windows platform. The sync here is unncessary if use_perv_frame_mvs
+// is 0. But after removing it, there will be hang in the unit test on windows
+// due to several threads waiting for a thread's signal.
+#if defined(_WIN32) && !HAVE_PTHREAD_H
+ if (cm->frame_parallel_decode && sync != NULL) {
+ sync(data, mi_row);
+ }
+#endif
+
+ // Check the last frame's mode and mv info.
+ if (cm->use_prev_frame_mvs) {
+ // Synchronize here for frame parallel decode if sync function is provided.
+ if (cm->frame_parallel_decode && sync != NULL) {
+ sync(data, mi_row);
+ }
+
+ if (prev_frame_mvs->ref_frame[0] == ref_frame) {
+ ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, bw, bh,
+ xd, Done);
+ } else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
+ ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, bw, bh,
+ xd, Done);
+ }
+ }
+
+ // Since we couldn't find 2 mvs from the same reference frame
+ // go back through the neighbors and find motion vectors from
+ // different reference frames.
+ if (different_ref_found) {
+ for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
+ const POSITION *mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, mv_ref)) {
+ const MB_MODE_INFO *const candidate =
+ &xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]->mbmi;
+
+ // If the candidate is INTRA we don't want to consider its mv.
+ IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
+ refmv_count, mv_ref_list, bw, bh, xd, Done);
+ }
+ }
+ }
+
+ // Since we still don't have a candidate we'll try the last frame.
+ if (cm->use_prev_frame_mvs) {
+ if (prev_frame_mvs->ref_frame[0] != ref_frame &&
+ prev_frame_mvs->ref_frame[0] > INTRA_FRAME) {
+ int_mv mv = prev_frame_mvs->mv[0];
+ if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] !=
+ ref_sign_bias[ref_frame]) {
+ mv.as_mv.row *= -1;
+ mv.as_mv.col *= -1;
+ }
+ ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done);
+ }
+
+ if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME &&
+ prev_frame_mvs->ref_frame[1] != ref_frame) {
+ int_mv mv = prev_frame_mvs->mv[1];
+ if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] !=
+ ref_sign_bias[ref_frame]) {
+ mv.as_mv.row *= -1;
+ mv.as_mv.col *= -1;
+ }
+ ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done);
+ }
+ }
+
+Done:
+ if (mode_context)
+ mode_context[ref_frame] = counter_to_context[context_counter];
+ for (i = refmv_count; i < MAX_MV_REF_CANDIDATES; ++i)
+ mv_ref_list[i].as_int = 0;
+}
+
+#if CONFIG_EXT_INTER
+// This function keeps a mode count for a given MB/SB
+void vp10_update_mv_context(const MACROBLOCKD *xd, MODE_INFO *mi,
+ MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list,
+ int block, int mi_row, int mi_col,
+ int16_t *mode_context) {
+ int i, refmv_count = 0;
+ const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
+ int context_counter = 0;
+ const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type] << 3;
+ const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type] << 3;
+ const TileInfo *const tile = &xd->tile;
+
+ // Blank the reference vector list
+ memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
+
+ // The nearest 2 blocks are examined only.
+ // If the size < 8x8, we get the mv from the bmi substructure;
+ for (i = 0; i < 2; ++i) {
+ const POSITION *const mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, mv_ref)) {
+ const MODE_INFO *const candidate_mi =
+ xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+
+ // Keep counts for entropy encoding.
+ context_counter += mode_2_counter[candidate->mode];
+
+ if (candidate->ref_frame[0] == ref_frame) {
+ ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block),
+ refmv_count, mv_ref_list, bw, bh, xd, Done);
+ } else if (candidate->ref_frame[1] == ref_frame) {
+ ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block),
+ refmv_count, mv_ref_list, bw, bh, xd, Done);
+ }
+ }
+ }
+
+Done:
+
+ if (mode_context)
+ mode_context[ref_frame] = counter_to_context[context_counter];
+}
+#endif // CONFIG_EXT_INTER
+
+void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+#if CONFIG_REF_MV
+ uint8_t *ref_mv_count, CANDIDATE_MV *ref_mv_stack,
+#if CONFIG_EXT_INTER
+ int16_t *compound_mode_context,
+#endif // CONFIG_EXT_INTER
+#endif
+ int_mv *mv_ref_list, int mi_row, int mi_col,
+ find_mv_refs_sync sync, void *const data,
+ int16_t *mode_context) {
+#if CONFIG_REF_MV
+ int idx, all_zero = 1;
+#endif
+#if CONFIG_EXT_INTER
+ vp10_update_mv_context(xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col,
+#if CONFIG_REF_MV
+ compound_mode_context);
+#else
+ mode_context);
+#endif // CONFIG_REF_MV
+ find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col, sync,
+ data, NULL);
+#else
+ find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col, sync,
+ data, mode_context);
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_REF_MV
+ setup_ref_mv_list(cm, xd, ref_frame, ref_mv_count, ref_mv_stack, mv_ref_list,
+ -1, mi_row, mi_col, mode_context);
+
+ for (idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx)
+ if (mv_ref_list[idx].as_int != 0) all_zero = 0;
+
+ if (all_zero) mode_context[ref_frame] |= (1 << ALL_ZERO_FLAG_OFFSET);
+#endif
+}
+
+void vp10_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
+ int_mv *near_mv) {
+ int i;
+ // Make sure all the candidates are properly clamped etc
+ for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
+ lower_mv_precision(&mvlist[i].as_mv, allow_hp);
+ }
+ *nearest_mv = mvlist[0];
+ *near_mv = mvlist[1];
+}
+
+void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, int block,
+ int ref, int mi_row, int mi_col,
+#if CONFIG_REF_MV
+ CANDIDATE_MV *ref_mv_stack,
+ uint8_t *ref_mv_count,
+#endif
+#if CONFIG_EXT_INTER
+ int_mv *mv_list,
+#endif // CONFIG_EXT_INTER
+ int_mv *nearest_mv, int_mv *near_mv) {
+#if !CONFIG_EXT_INTER
+ int_mv mv_list[MAX_MV_REF_CANDIDATES];
+#endif // !CONFIG_EXT_INTER
+ MODE_INFO *const mi = xd->mi[0];
+ b_mode_info *bmi = mi->bmi;
+ int n;
+#if CONFIG_REF_MV
+ CANDIDATE_MV tmp_mv;
+ uint8_t idx;
+ uint8_t above_count = 0, left_count = 0;
+ MV_REFERENCE_FRAME rf[2] = { mi->mbmi.ref_frame[ref], NONE };
+ *ref_mv_count = 0;
+#endif
+
+ assert(MAX_MV_REF_CANDIDATES == 2);
+
+ find_mv_refs_idx(cm, xd, mi, mi->mbmi.ref_frame[ref], mv_list, block, mi_row,
+ mi_col, NULL, NULL, NULL);
+
+#if CONFIG_REF_MV
+ scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, -1, 0, ref_mv_stack,
+ ref_mv_count);
+ above_count = *ref_mv_count;
+
+ scan_blk_mbmi(cm, xd, mi_row, mi_col, block, rf, 0, -1, ref_mv_stack,
+ ref_mv_count);
+ left_count = *ref_mv_count - above_count;
+
+ if (above_count > 1 && left_count > 0) {
+ tmp_mv = ref_mv_stack[1];
+ ref_mv_stack[1] = ref_mv_stack[above_count];
+ ref_mv_stack[above_count] = tmp_mv;
+ }
+
+ for (idx = 0; idx < *ref_mv_count; ++idx)
+ clamp_mv_ref(&ref_mv_stack[idx].this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3,
+ xd);
+
+ for (idx = 0; idx < VPXMIN(MAX_MV_REF_CANDIDATES, *ref_mv_count); ++idx)
+ mv_list[idx].as_int = ref_mv_stack[idx].this_mv.as_int;
+#endif
+
+ near_mv->as_int = 0;
+ switch (block) {
+ case 0:
+ nearest_mv->as_int = mv_list[0].as_int;
+ near_mv->as_int = mv_list[1].as_int;
+ break;
+ case 1:
+ case 2:
+ nearest_mv->as_int = bmi[0].as_mv[ref].as_int;
+ for (n = 0; n < MAX_MV_REF_CANDIDATES; ++n)
+ if (nearest_mv->as_int != mv_list[n].as_int) {
+ near_mv->as_int = mv_list[n].as_int;
+ break;
+ }
+ break;
+ case 3: {
+ int_mv candidates[2 + MAX_MV_REF_CANDIDATES];
+ candidates[0] = bmi[1].as_mv[ref];
+ candidates[1] = bmi[0].as_mv[ref];
+ candidates[2] = mv_list[0];
+ candidates[3] = mv_list[1];
+
+ nearest_mv->as_int = bmi[2].as_mv[ref].as_int;
+ for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n)
+ if (nearest_mv->as_int != candidates[n].as_int) {
+ near_mv->as_int = candidates[n].as_int;
+ break;
+ }
+ break;
+ }
+ default: assert(0 && "Invalid block index.");
+ }
+}
diff --git a/av1/common/mvref_common.h b/av1/common/mvref_common.h
new file mode 100644
index 0000000..babd4f0
--- /dev/null
+++ b/av1/common/mvref_common.h
@@ -0,0 +1,491 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VP10_COMMON_MVREF_COMMON_H_
+#define VP10_COMMON_MVREF_COMMON_H_
+
+#include "av1/common/onyxc_int.h"
+#include "av1/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MVREF_NEIGHBOURS 8
+
+typedef struct position {
+ int row;
+ int col;
+} POSITION;
+
+typedef enum {
+ BOTH_ZERO = 0,
+ ZERO_PLUS_PREDICTED = 1,
+ BOTH_PREDICTED = 2,
+ NEW_PLUS_NON_INTRA = 3,
+ BOTH_NEW = 4,
+ INTRA_PLUS_NON_INTRA = 5,
+ BOTH_INTRA = 6,
+ INVALID_CASE = 9
+} motion_vector_context;
+
+// This is used to figure out a context for the ref blocks. The code flattens
+// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
+// adding 9 for each intra block, 3 for each zero mv and 1 for each new
+// motion vector. This single number is then converted into a context
+// with a single lookup ( counter_to_context ).
+static const int mode_2_counter[MB_MODE_COUNT] = {
+ 9, // DC_PRED
+ 9, // V_PRED
+ 9, // H_PRED
+ 9, // D45_PRED
+ 9, // D135_PRED
+ 9, // D117_PRED
+ 9, // D153_PRED
+ 9, // D207_PRED
+ 9, // D63_PRED
+ 9, // TM_PRED
+ 0, // NEARESTMV
+ 0, // NEARMV
+ 3, // ZEROMV
+ 1, // NEWMV
+#if CONFIG_EXT_INTER
+ 1, // NEWFROMNEARMV
+ 0, // NEAREST_NEARESTMV
+ 0, // NEAREST_NEARMV
+ 0, // NEAR_NEARESTMV
+ 0, // NEAR_NEARMV
+ 1, // NEAREST_NEWMV
+ 1, // NEW_NEARESTMV
+ 1, // NEAR_NEWMV
+ 1, // NEW_NEARMV
+ 3, // ZERO_ZEROMV
+ 1, // NEW_NEWMV
+#endif // CONFIG_EXT_INTER
+};
+
+// There are 3^3 different combinations of 3 counts that can be either 0,1 or
+// 2. However the actual count can never be greater than 2 so the highest
+// counter we need is 18. 9 is an invalid counter that's never used.
+static const int counter_to_context[19] = {
+ BOTH_PREDICTED, // 0
+ NEW_PLUS_NON_INTRA, // 1
+ BOTH_NEW, // 2
+ ZERO_PLUS_PREDICTED, // 3
+ NEW_PLUS_NON_INTRA, // 4
+ INVALID_CASE, // 5
+ BOTH_ZERO, // 6
+ INVALID_CASE, // 7
+ INVALID_CASE, // 8
+ INTRA_PLUS_NON_INTRA, // 9
+ INTRA_PLUS_NON_INTRA, // 10
+ INVALID_CASE, // 11
+ INTRA_PLUS_NON_INTRA, // 12
+ INVALID_CASE, // 13
+ INVALID_CASE, // 14
+ INVALID_CASE, // 15
+ INVALID_CASE, // 16
+ INVALID_CASE, // 17
+ BOTH_INTRA // 18
+};
+
+static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
+ // 4X4
+ { { -1, 0 },
+ { 0, -1 },
+ { -1, -1 },
+ { -2, 0 },
+ { 0, -2 },
+ { -2, -1 },
+ { -1, -2 },
+ { -2, -2 } },
+ // 4X8
+ { { -1, 0 },
+ { 0, -1 },
+ { -1, -1 },
+ { -2, 0 },
+ { 0, -2 },
+ { -2, -1 },
+ { -1, -2 },
+ { -2, -2 } },
+ // 8X4
+ { { -1, 0 },
+ { 0, -1 },
+ { -1, -1 },
+ { -2, 0 },
+ { 0, -2 },
+ { -2, -1 },
+ { -1, -2 },
+ { -2, -2 } },
+ // 8X8
+ { { -1, 0 },
+ { 0, -1 },
+ { -1, -1 },
+ { -2, 0 },
+ { 0, -2 },
+ { -2, -1 },
+ { -1, -2 },
+ { -2, -2 } },
+ // 8X16
+ { { 0, -1 },
+ { -1, 0 },
+ { 1, -1 },
+ { -1, -1 },
+ { 0, -2 },
+ { -2, 0 },
+ { -2, -1 },
+ { -1, -2 } },
+ // 16X8
+ { { -1, 0 },
+ { 0, -1 },
+ { -1, 1 },
+ { -1, -1 },
+ { -2, 0 },
+ { 0, -2 },
+ { -1, -2 },
+ { -2, -1 } },
+ // 16X16
+ { { -1, 0 },
+ { 0, -1 },
+ { -1, 1 },
+ { 1, -1 },
+ { -1, -1 },
+ { -3, 0 },
+ { 0, -3 },
+ { -3, -3 } },
+ // 16X32
+ { { 0, -1 },
+ { -1, 0 },
+ { 2, -1 },
+ { -1, -1 },
+ { -1, 1 },
+ { 0, -3 },
+ { -3, 0 },
+ { -3, -3 } },
+ // 32X16
+ { { -1, 0 },
+ { 0, -1 },
+ { -1, 2 },
+ { -1, -1 },
+ { 1, -1 },
+ { -3, 0 },
+ { 0, -3 },
+ { -3, -3 } },
+ // 32X32
+ { { -1, 1 },
+ { 1, -1 },
+ { -1, 2 },
+ { 2, -1 },
+ { -1, -1 },
+ { -3, 0 },
+ { 0, -3 },
+ { -3, -3 } },
+ // 32X64
+ { { 0, -1 },
+ { -1, 0 },
+ { 4, -1 },
+ { -1, 2 },
+ { -1, -1 },
+ { 0, -3 },
+ { -3, 0 },
+ { 2, -1 } },
+ // 64X32
+ { { -1, 0 },
+ { 0, -1 },
+ { -1, 4 },
+ { 2, -1 },
+ { -1, -1 },
+ { -3, 0 },
+ { 0, -3 },
+ { -1, 2 } },
+ // 64X64
+ { { -1, 3 },
+ { 3, -1 },
+ { -1, 4 },
+ { 4, -1 },
+ { -1, -1 },
+ { -1, 0 },
+ { 0, -1 },
+ { -1, 6 } },
+#if CONFIG_EXT_PARTITION
+ // TODO(debargha/jingning) Making them twice the 32x64, .. ones above
+ // 64x128
+ { { 0, -2 },
+ { -2, 0 },
+ { 8, -2 },
+ { -2, 4 },
+ { -2, -2 },
+ { 0, -6 },
+ { -6, 0 },
+ { 4, -2 } },
+ // 128x64
+ { { -2, 0 },
+ { 0, -2 },
+ { -2, 8 },
+ { 4, -2 },
+ { -2, -2 },
+ { -6, 0 },
+ { 0, -6 },
+ { -2, 4 } },
+ // 128x128
+ { { -2, 6 },
+ { 6, -2 },
+ { -2, 8 },
+ { 8, -2 },
+ { -2, -2 },
+ { -2, 0 },
+ { 0, -2 },
+ { -2, 12 } },
+#endif // CONFIG_EXT_PARTITION
+};
+
+static const int idx_n_column_to_subblock[4][2] = {
+ { 1, 2 }, { 1, 3 }, { 3, 2 }, { 3, 3 }
+};
+
+// clamp_mv_ref
+#if CONFIG_EXT_PARTITION
+#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
+#else
+#define MV_BORDER (8 << 3) // Allow 8 pels in 1/8th pel units
+#endif // CONFIG_EXT_PARTITION
+
+static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) {
+ clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER,
+ xd->mb_to_right_edge + bw * 8 + MV_BORDER,
+ xd->mb_to_top_edge - bh * 8 - MV_BORDER,
+ xd->mb_to_bottom_edge + bh * 8 + MV_BORDER);
+}
+
+// This function returns either the appropriate sub block or block's mv
+// on whether the block_size < 8x8 and we have check_sub_blocks set.
+static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
+ int search_col, int block_idx) {
+ return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
+ ? candidate
+ ->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
+ .as_mv[which_mv]
+ : candidate->mbmi.mv[which_mv];
+}
+
+#if CONFIG_REF_MV
+static INLINE int_mv get_sub_block_pred_mv(const MODE_INFO *candidate,
+ int which_mv, int search_col,
+ int block_idx) {
+ return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
+ ? candidate
+ ->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
+ .pred_mv_s8[which_mv]
+ : candidate->mbmi.pred_mv[which_mv];
+}
+#endif
+
+// Performs mv sign inversion if indicated by the reference frame combination.
+static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
+ const MV_REFERENCE_FRAME this_ref_frame,
+ const int *ref_sign_bias) {
+ int_mv mv = mbmi->mv[ref];
+ if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
+ mv.as_mv.row *= -1;
+ mv.as_mv.col *= -1;
+ }
+ return mv;
+}
+
+#define CLIP_IN_ADD(mv, bw, bh, xd) clamp_mv_ref(mv, bw, bh, xd)
+
+// This macro is used to add a motion vector mv_ref list if it isn't
+// already in the list. If it's the second motion vector it will also
+// skip all additional processing and jump to done!
+#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, bw, bh, xd, Done) \
+ do { \
+ (mv_ref_list)[(refmv_count)] = (mv); \
+ CLIP_IN_ADD(&(mv_ref_list)[(refmv_count)].as_mv, (bw), (bh), (xd)); \
+ if (refmv_count && (mv_ref_list)[1].as_int != (mv_ref_list)[0].as_int) { \
+ (refmv_count) = 2; \
+ goto Done; \
+ } \
+ (refmv_count) = 1; \
+ } while (0)
+
+// If either reference frame is different, not INTRA, and they
+// are different from each other scale and add the mv to our list.
+#define IF_DIFF_REF_FRAME_ADD_MV(mbmi, ref_frame, ref_sign_bias, refmv_count, \
+ mv_ref_list, bw, bh, xd, Done) \
+ do { \
+ if (is_inter_block(mbmi)) { \
+ if ((mbmi)->ref_frame[0] != ref_frame) \
+ ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \
+ refmv_count, mv_ref_list, bw, bh, xd, Done); \
+ if (has_second_ref(mbmi) && (mbmi)->ref_frame[1] != ref_frame) \
+ ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \
+ refmv_count, mv_ref_list, bw, bh, xd, Done); \
+ } \
+ } while (0)
+
+// Checks that the given mi_row, mi_col and search point
+// are inside the borders of the tile.
+static INLINE int is_inside(const TileInfo *const tile, int mi_col, int mi_row,
+ const POSITION *mi_pos) {
+ return !(mi_row + mi_pos->row < tile->mi_row_start ||
+ mi_col + mi_pos->col < tile->mi_col_start ||
+ mi_row + mi_pos->row >= tile->mi_row_end ||
+ mi_col + mi_pos->col >= tile->mi_col_end);
+}
+
+static INLINE void lower_mv_precision(MV *mv, int allow_hp) {
+ const int use_hp = allow_hp && vp10_use_mv_hp(mv);
+ if (!use_hp) {
+ if (mv->row & 1) mv->row += (mv->row > 0 ? -1 : 1);
+ if (mv->col & 1) mv->col += (mv->col > 0 ? -1 : 1);
+ }
+}
+
+#if CONFIG_REF_MV
+static INLINE int vp10_nmv_ctx(const uint8_t ref_mv_count,
+ const CANDIDATE_MV *ref_mv_stack) {
+#if CONFIG_EXT_INTER
+ return 0;
+#endif
+ if (ref_mv_stack[0].weight > REF_CAT_LEVEL && ref_mv_count > 0) {
+ if (abs(ref_mv_stack[0].this_mv.as_mv.row -
+ ref_mv_stack[0].pred_mv.as_mv.row) <= 4 &&
+ abs(ref_mv_stack[0].this_mv.as_mv.col -
+ ref_mv_stack[0].pred_mv.as_mv.col) <= 4)
+ return 2;
+ else
+ return 1;
+ }
+ return 0;
+}
+
+static INLINE int8_t vp10_ref_frame_type(const MV_REFERENCE_FRAME *const rf) {
+ if (rf[1] > INTRA_FRAME) {
+ return TOTAL_REFS_PER_FRAME + FWD_RF_OFFSET(rf[0]) +
+ BWD_RF_OFFSET(rf[1]) * FWD_REFS;
+ }
+
+ return rf[0];
+}
+
+static MV_REFERENCE_FRAME ref_frame_map[COMP_REFS][2] = {
+#if CONFIG_EXT_REFS
+ { LAST_FRAME, BWDREF_FRAME }, { LAST2_FRAME, BWDREF_FRAME },
+ { LAST3_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, BWDREF_FRAME },
+
+ { LAST_FRAME, ALTREF_FRAME }, { LAST2_FRAME, ALTREF_FRAME },
+ { LAST3_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME }
+#else
+ { LAST_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME }
+#endif
+};
+
+static INLINE void vp10_set_ref_frame(MV_REFERENCE_FRAME *rf,
+ int8_t ref_frame_type) {
+ if (ref_frame_type >= TOTAL_REFS_PER_FRAME) {
+ rf[0] = ref_frame_map[ref_frame_type - TOTAL_REFS_PER_FRAME][0];
+ rf[1] = ref_frame_map[ref_frame_type - TOTAL_REFS_PER_FRAME][1];
+ } else {
+ rf[0] = ref_frame_type;
+ rf[1] = NONE;
+ assert(ref_frame_type > INTRA_FRAME &&
+ ref_frame_type < TOTAL_REFS_PER_FRAME);
+ }
+}
+
+static INLINE int16_t vp10_mode_context_analyzer(
+ const int16_t *const mode_context, const MV_REFERENCE_FRAME *const rf,
+ BLOCK_SIZE bsize, int block) {
+ int16_t mode_ctx = 0;
+ if (block >= 0) {
+ mode_ctx = mode_context[rf[0]] & 0x00ff;
+
+ if (block > 0 && bsize < BLOCK_8X8 && bsize > BLOCK_4X4)
+ mode_ctx |= (1 << SKIP_NEARESTMV_SUB8X8_OFFSET);
+
+ return mode_ctx;
+ }
+
+ if (rf[1] > INTRA_FRAME)
+ return mode_context[rf[0]] & (mode_context[rf[1]] | 0x00ff);
+ else if (rf[0] != ALTREF_FRAME)
+ return mode_context[rf[0]] & ~(mode_context[ALTREF_FRAME] & 0xfe00);
+ else
+ return mode_context[rf[0]];
+}
+
+static INLINE uint8_t vp10_drl_ctx(const CANDIDATE_MV *ref_mv_stack,
+ int ref_idx) {
+ if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
+ ref_mv_stack[ref_idx + 1].weight >= REF_CAT_LEVEL) {
+ if (ref_mv_stack[ref_idx].weight == ref_mv_stack[ref_idx + 1].weight)
+ return 0;
+ else
+ return 1;
+ }
+
+ if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
+ ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
+ return 2;
+
+ if (ref_mv_stack[ref_idx].weight < REF_CAT_LEVEL &&
+ ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL) {
+ if (ref_mv_stack[ref_idx].weight == ref_mv_stack[ref_idx + 1].weight)
+ return 3;
+ else
+ return 4;
+ }
+
+ return 0;
+}
+#endif
+
+typedef void (*find_mv_refs_sync)(void *const data, int mi_row);
+void vp10_find_mv_refs(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+#if CONFIG_REF_MV
+ uint8_t *ref_mv_count, CANDIDATE_MV *ref_mv_stack,
+#if CONFIG_EXT_INTER
+ int16_t *compound_mode_context,
+#endif // CONFIG_EXT_INTER
+#endif
+ int_mv *mv_ref_list, int mi_row, int mi_col,
+ find_mv_refs_sync sync, void *const data,
+ int16_t *mode_context);
+
+// check a list of motion vectors by sad score using a number rows of pixels
+// above and a number cols of pixels in the left to select the one with best
+// score to use as ref motion vector
+void vp10_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
+ int_mv *near_mv);
+
+void vp10_append_sub8x8_mvs_for_idx(VP10_COMMON *cm, MACROBLOCKD *xd, int block,
+ int ref, int mi_row, int mi_col,
+#if CONFIG_REF_MV
+ CANDIDATE_MV *ref_mv_stack,
+ uint8_t *ref_mv_count,
+#endif
+#if CONFIG_EXT_INTER
+ int_mv *mv_list,
+#endif // CONFIG_EXT_INTER
+ int_mv *nearest_mv, int_mv *near_mv);
+
+#if CONFIG_EXT_INTER
+// This function keeps a mode count for a given MB/SB
+void vp10_update_mv_context(const MACROBLOCKD *xd, MODE_INFO *mi,
+ MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list,
+ int block, int mi_row, int mi_col,
+ int16_t *mode_context);
+#endif // CONFIG_EXT_INTER
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_MVREF_COMMON_H_
diff --git a/av1/common/od_dering.c b/av1/common/od_dering.c
new file mode 100644
index 0000000..b7a459c
--- /dev/null
+++ b/av1/common/od_dering.c
@@ -0,0 +1,362 @@
+/*Daala video codec
+Copyright (c) 2014-2016 Daala project contributors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <math.h>
+#include "dering.h"
+
+const od_dering_opt_vtbl OD_DERING_VTBL_C = {
+ { od_filter_dering_direction_4x4_c, od_filter_dering_direction_8x8_c },
+ { od_filter_dering_orthogonal_4x4_c, od_filter_dering_orthogonal_8x8_c }
+};
+
+/* Generated from gen_filter_tables.c. */
+const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
+ { -1 * OD_FILT_BSTRIDE + 1, -2 * OD_FILT_BSTRIDE + 2,
+ -3 * OD_FILT_BSTRIDE + 3 },
+ { 0 * OD_FILT_BSTRIDE + 1, -1 * OD_FILT_BSTRIDE + 2,
+ -1 * OD_FILT_BSTRIDE + 3 },
+ { 0 * OD_FILT_BSTRIDE + 1, 0 * OD_FILT_BSTRIDE + 2, 0 * OD_FILT_BSTRIDE + 3 },
+ { 0 * OD_FILT_BSTRIDE + 1, 1 * OD_FILT_BSTRIDE + 2, 1 * OD_FILT_BSTRIDE + 3 },
+ { 1 * OD_FILT_BSTRIDE + 1, 2 * OD_FILT_BSTRIDE + 2, 3 * OD_FILT_BSTRIDE + 3 },
+ { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 1, 3 * OD_FILT_BSTRIDE + 1 },
+ { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 0, 3 * OD_FILT_BSTRIDE + 0 },
+ { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE - 1, 3 * OD_FILT_BSTRIDE - 1 },
+};
+
+const double OD_DERING_GAIN_TABLE[OD_DERING_LEVELS] = { 0, 0.5, 0.707,
+ 1, 1.41, 2 };
+
+/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
+ The search minimizes the weighted variance along all the lines in a
+ particular direction, i.e. the squared error between the input and a
+ "predicted" block where each pixel is replaced by the average along a line
+ in a particular direction. Since each direction have the same sum(x^2) term,
+ that term is never computed. See Section 2, step 2, of:
+ http://jmvalin.ca/notes/intra_paint.pdf */
+static int od_dir_find8(const od_dering_in *img, int stride, int32_t *var,
+ int coeff_shift) {
+ int i;
+ int32_t cost[8] = { 0 };
+ int partial[8][15] = { { 0 } };
+ int32_t best_cost = 0;
+ int best_dir = 0;
+ /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
+ The output is then 840 times larger, but we don't care for finding
+ the max. */
+ static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
+ for (i = 0; i < 8; i++) {
+ int j;
+ for (j = 0; j < 8; j++) {
+ int x;
+ /* We subtract 128 here to reduce the maximum range of the squared
+ partial sums. */
+ x = (img[i * stride + j] >> coeff_shift) - 128;
+ partial[0][i + j] += x;
+ partial[1][i + j / 2] += x;
+ partial[2][i] += x;
+ partial[3][3 + i - j / 2] += x;
+ partial[4][7 + i - j] += x;
+ partial[5][3 - i / 2 + j] += x;
+ partial[6][j] += x;
+ partial[7][i / 2 + j] += x;
+ }
+ }
+ for (i = 0; i < 8; i++) {
+ cost[2] += partial[2][i] * partial[2][i];
+ cost[6] += partial[6][i] * partial[6][i];
+ }
+ cost[2] *= div_table[8];
+ cost[6] *= div_table[8];
+ for (i = 0; i < 7; i++) {
+ cost[0] += (partial[0][i] * partial[0][i] +
+ partial[0][14 - i] * partial[0][14 - i]) *
+ div_table[i + 1];
+ cost[4] += (partial[4][i] * partial[4][i] +
+ partial[4][14 - i] * partial[4][14 - i]) *
+ div_table[i + 1];
+ }
+ cost[0] += partial[0][7] * partial[0][7] * div_table[8];
+ cost[4] += partial[4][7] * partial[4][7] * div_table[8];
+ for (i = 1; i < 8; i += 2) {
+ int j;
+ for (j = 0; j < 4 + 1; j++) {
+ cost[i] += partial[i][3 + j] * partial[i][3 + j];
+ }
+ cost[i] *= div_table[8];
+ for (j = 0; j < 4 - 1; j++) {
+ cost[i] += (partial[i][j] * partial[i][j] +
+ partial[i][10 - j] * partial[i][10 - j]) *
+ div_table[2 * j + 2];
+ }
+ }
+ for (i = 0; i < 8; i++) {
+ if (cost[i] > best_cost) {
+ best_cost = cost[i];
+ best_dir = i;
+ }
+ }
+ /* Difference between the optimal variance and the variance along the
+ orthogonal direction. Again, the sum(x^2) terms cancel out. */
+ *var = best_cost - cost[(best_dir + 4) & 7];
+ /* We'd normally divide by 840, but dividing by 1024 is close enough
+ for what we're going to do with this. */
+ *var >>= 10;
+ return best_dir;
+}
+
+#define OD_DERING_VERY_LARGE (30000)
+#define OD_DERING_INBUF_SIZE \
+ ((OD_BSIZE_MAX + 2 * OD_FILT_BORDER) * (OD_BSIZE_MAX + 2 * OD_FILT_BORDER))
+
+/* Smooth in the direction detected. */
+void od_filter_dering_direction_c(int16_t *y, int ystride, const int16_t *in,
+ int ln, int threshold, int dir) {
+ int i;
+ int j;
+ int k;
+ static const int taps[3] = { 3, 2, 2 };
+ for (i = 0; i < 1 << ln; i++) {
+ for (j = 0; j < 1 << ln; j++) {
+ int16_t sum;
+ int16_t xx;
+ int16_t yy;
+ xx = in[i * OD_FILT_BSTRIDE + j];
+ sum = 0;
+ for (k = 0; k < 3; k++) {
+ int16_t p0;
+ int16_t p1;
+ p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
+ xx;
+ p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
+ xx;
+ if (abs(p0) < threshold) sum += taps[k] * p0;
+ if (abs(p1) < threshold) sum += taps[k] * p1;
+ }
+ yy = xx + ((sum + 8) >> 4);
+ y[i * ystride + j] = yy;
+ }
+ }
+}
+
+void od_filter_dering_direction_4x4_c(int16_t *y, int ystride,
+ const int16_t *in, int threshold,
+ int dir) {
+ od_filter_dering_direction_c(y, ystride, in, 2, threshold, dir);
+}
+
+void od_filter_dering_direction_8x8_c(int16_t *y, int ystride,
+ const int16_t *in, int threshold,
+ int dir) {
+ od_filter_dering_direction_c(y, ystride, in, 3, threshold, dir);
+}
+
+/* Smooth in the direction orthogonal to what was detected. */
+void od_filter_dering_orthogonal_c(int16_t *y, int ystride, const int16_t *in,
+ const od_dering_in *x, int xstride, int ln,
+ int threshold, int dir) {
+ int i;
+ int j;
+ int offset;
+ if (dir > 0 && dir < 4)
+ offset = OD_FILT_BSTRIDE;
+ else
+ offset = 1;
+ for (i = 0; i < 1 << ln; i++) {
+ for (j = 0; j < 1 << ln; j++) {
+ int16_t athresh;
+ int16_t yy;
+ int16_t sum;
+ int16_t p;
+ /* Deringing orthogonal to the direction uses a tighter threshold
+ because we want to be conservative. We've presumably already
+ achieved some deringing, so the amount of change is expected
+ to be low. Also, since we might be filtering across an edge, we
+ want to make sure not to blur it. That being said, we might want
+ to be a little bit more aggressive on pure horizontal/vertical
+ since the ringing there tends to be directional, so it doesn't
+ get removed by the directional filtering. */
+ athresh = OD_MINI(
+ threshold, threshold / 3 +
+ abs(in[i * OD_FILT_BSTRIDE + j] - x[i * xstride + j]));
+ yy = in[i * OD_FILT_BSTRIDE + j];
+ sum = 0;
+ p = in[i * OD_FILT_BSTRIDE + j + offset] - yy;
+ if (abs(p) < athresh) sum += p;
+ p = in[i * OD_FILT_BSTRIDE + j - offset] - yy;
+ if (abs(p) < athresh) sum += p;
+ p = in[i * OD_FILT_BSTRIDE + j + 2 * offset] - yy;
+ if (abs(p) < athresh) sum += p;
+ p = in[i * OD_FILT_BSTRIDE + j - 2 * offset] - yy;
+ if (abs(p) < athresh) sum += p;
+ y[i * ystride + j] = yy + ((3 * sum + 8) >> 4);
+ }
+ }
+}
+
+void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
+ const int16_t *in, const od_dering_in *x,
+ int xstride, int threshold, int dir) {
+ od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 2, threshold, dir);
+}
+
+void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
+ const int16_t *in, const od_dering_in *x,
+ int xstride, int threshold, int dir) {
+ od_filter_dering_orthogonal_c(y, ystride, in, x, xstride, 3, threshold, dir);
+}
+
+/* This table approximates x^0.16 with the index being log2(x). It is clamped
+ to [-.5, 3]. The table is computed as:
+ round(256*min(3, max(.5, 1.08*(sqrt(2)*2.^([0:17]+8)/256/256).^.16))) */
+static const int16_t OD_THRESH_TABLE_Q8[18] = {
+ 128, 134, 150, 168, 188, 210, 234, 262, 292,
+ 327, 365, 408, 455, 509, 569, 635, 710, 768,
+};
+
+/* Compute deringing filter threshold for each 8x8 block based on the
+ directional variance difference. A high variance difference means that we
+ have a highly directional pattern (e.g. a high contrast edge), so we can
+ apply more deringing. A low variance means that we either have a low
+ contrast edge, or a non-directional texture, so we want to be careful not
+ to blur. */
+static void od_compute_thresh(int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
+ int threshold,
+ int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
+ int nhb, int nvb) {
+ int bx;
+ int by;
+ for (by = 0; by < nvb; by++) {
+ for (bx = 0; bx < nhb; bx++) {
+ int v1;
+ /* We use the variance of 8x8 blocks to adjust the threshold. */
+ v1 = OD_MINI(32767, var[by][bx] >> 6);
+ thresh[by][bx] = (threshold * OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
+ }
+ }
+}
+
+void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
+ const od_dering_in *x, int xstride, int nhb, int nvb, int sbx,
+ int sby, int nhsb, int nvsb, int xdec,
+ int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
+ unsigned char *bskip, int skip_stride, int threshold,
+ int overlap, int coeff_shift) {
+ int i;
+ int j;
+ int bx;
+ int by;
+ int16_t inbuf[OD_DERING_INBUF_SIZE];
+ int16_t *in;
+ int bsize;
+ int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
+ int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
+ bsize = 3 - xdec;
+ in = inbuf + OD_FILT_BORDER * OD_FILT_BSTRIDE + OD_FILT_BORDER;
+ /* We avoid filtering the pixels for which some of the pixels to average
+ are outside the frame. We could change the filter instead, but it would
+ add special cases for any future vectorization. */
+ for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE;
+ for (i = -OD_FILT_BORDER * (sby != 0);
+ i < (nvb << bsize) + OD_FILT_BORDER * (sby != nvsb - 1); i++) {
+ for (j = -OD_FILT_BORDER * (sbx != 0);
+ j < (nhb << bsize) + OD_FILT_BORDER * (sbx != nhsb - 1); j++) {
+ in[i * OD_FILT_BSTRIDE + j] = x[i * xstride + j];
+ }
+ }
+ if (pli == 0) {
+ for (by = 0; by < nvb; by++) {
+ for (bx = 0; bx < nhb; bx++) {
+ dir[by][bx] = od_dir_find8(&x[8 * by * xstride + 8 * bx], xstride,
+ &var[by][bx], coeff_shift);
+ }
+ }
+ od_compute_thresh(thresh, threshold, var, nhb, nvb);
+ } else {
+ for (by = 0; by < nvb; by++) {
+ for (bx = 0; bx < nhb; bx++) {
+ thresh[by][bx] = threshold;
+ }
+ }
+ }
+ for (by = 0; by < nvb; by++) {
+ for (bx = 0; bx < nhb; bx++) {
+ int skip;
+#if defined(DAALA_ODINTRIN)
+ int xstart;
+ int ystart;
+ int xend;
+ int yend;
+ xstart = ystart = 0;
+ xend = yend = (2 >> xdec);
+ if (overlap) {
+ xstart -= (sbx != 0);
+ ystart -= (sby != 0);
+ xend += (sbx != nhsb - 1);
+ yend += (sby != nvsb - 1);
+ }
+ skip = 1;
+ /* We look at whether the current block and its 4x4 surrounding (due to
+ lapping) are skipped to avoid filtering the same content multiple
+ times. */
+ for (i = ystart; i < yend; i++) {
+ for (j = xstart; j < xend; j++) {
+ skip = skip && bskip[((by << 1 >> xdec) + i) * skip_stride +
+ (bx << 1 >> xdec) + j];
+ }
+ }
+#else
+ (void)overlap;
+ skip = bskip[by * skip_stride + bx];
+#endif
+ if (skip) thresh[by][bx] = 0;
+ }
+ }
+ for (by = 0; by < nvb; by++) {
+ for (bx = 0; bx < nhb; bx++) {
+ (vtbl->filter_dering_direction[bsize - OD_LOG_BSIZE0])(
+ &y[(by * ystride << bsize) + (bx << bsize)], ystride,
+ &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], thresh[by][bx],
+ dir[by][bx]);
+ }
+ }
+ for (i = 0; i < nvb << bsize; i++) {
+ for (j = 0; j < nhb << bsize; j++) {
+ in[i * OD_FILT_BSTRIDE + j] = y[i * ystride + j];
+ }
+ }
+ for (by = 0; by < nvb; by++) {
+ for (bx = 0; bx < nhb; bx++) {
+ (vtbl->filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
+ &y[(by * ystride << bsize) + (bx << bsize)], ystride,
+ &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
+ &x[(by * xstride << bsize) + (bx << bsize)], xstride, thresh[by][bx],
+ dir[by][bx]);
+ }
+ }
+}
diff --git a/av1/common/od_dering.h b/av1/common/od_dering.h
new file mode 100644
index 0000000..6bb3974
--- /dev/null
+++ b/av1/common/od_dering.h
@@ -0,0 +1,92 @@
+/*Daala video codec
+Copyright (c) 2003-2010 Daala project contributors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#if !defined(_dering_H)
+#define _dering_H (1)
+
+#include "odintrin.h"
+
+#if defined(DAALA_ODINTRIN)
+#include "filter.h"
+typedef int16_t od_dering_in;
+#endif
+
+#define OD_DERINGSIZES (2)
+
+#define OD_DERING_NO_CHECK_OVERLAP (0)
+#define OD_DERING_CHECK_OVERLAP (1)
+
+#define OD_DERING_LEVELS (6)
+extern const double OD_DERING_GAIN_TABLE[OD_DERING_LEVELS];
+
+#define OD_DERING_NBLOCKS (OD_BSIZE_MAX / 8)
+
+#define OD_FILT_BORDER (3)
+#define OD_FILT_BSTRIDE (OD_BSIZE_MAX + 2 * OD_FILT_BORDER)
+
+extern const int OD_DIRECTION_OFFSETS_TABLE[8][3];
+
+typedef void (*od_filter_dering_direction_func)(int16_t *y, int ystride,
+ const int16_t *in,
+ int threshold, int dir);
+typedef void (*od_filter_dering_orthogonal_func)(int16_t *y, int ystride,
+ const int16_t *in,
+ const od_dering_in *x,
+ int xstride, int threshold,
+ int dir);
+
+struct od_dering_opt_vtbl {
+ od_filter_dering_direction_func filter_dering_direction[OD_DERINGSIZES];
+ od_filter_dering_orthogonal_func filter_dering_orthogonal[OD_DERINGSIZES];
+};
+typedef struct od_dering_opt_vtbl od_dering_opt_vtbl;
+
+void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
+ const od_dering_in *x, int xstride, int nvb, int nhb, int sbx,
+ int sby, int nhsb, int nvsb, int xdec,
+ int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
+ unsigned char *bskip, int skip_stride, int threshold,
+ int overlap, int coeff_shift);
+void od_filter_dering_direction_c(int16_t *y, int ystride, const int16_t *in,
+ int ln, int threshold, int dir);
+void od_filter_dering_orthogonal_c(int16_t *y, int ystride, const int16_t *in,
+ const od_dering_in *x, int xstride, int ln,
+ int threshold, int dir);
+
+extern const od_dering_opt_vtbl OD_DERING_VTBL_C;
+
+void od_filter_dering_direction_4x4_c(int16_t *y, int ystride,
+ const int16_t *in, int threshold,
+ int dir);
+void od_filter_dering_direction_8x8_c(int16_t *y, int ystride,
+ const int16_t *in, int threshold,
+ int dir);
+void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
+ const int16_t *in, const od_dering_in *x,
+ int xstride, int threshold, int dir);
+void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
+ const int16_t *in, const od_dering_in *x,
+ int xstride, int threshold, int dir);
+
+#endif
diff --git a/av1/common/odintrin.c b/av1/common/odintrin.c
new file mode 100644
index 0000000..b5bbaa6
--- /dev/null
+++ b/av1/common/odintrin.c
@@ -0,0 +1,552 @@
+/*Daala video codec
+Copyright (c) 2006-2010 Daala project contributors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
+
+#include "av1/common/odintrin.h"
+
+/*Constants for use with OD_DIVU_SMALL().
+ See \cite{Rob05} for details on computing these constants.
+ @INPROCEEDINGS{Rob05,
+ author="Arch D. Robison",
+ title="{N}-bit Unsigned Division via {N}-bit Multiply-Add",
+ booktitle="Proc. of the 17th IEEE Symposium on Computer Arithmetic
+ (ARITH'05)",
+ pages="131--139",
+ address="Cape Cod, MA",
+ month=Jun,
+ year=2005
+ }*/
+uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2] = {
+ { 0xFFFFFFFF, 0xFFFFFFFF }, { 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0xAAAAAAAB, 0 }, { 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0xCCCCCCCD, 0 }, { 0xAAAAAAAB, 0 },
+ { 0x92492492, 0x92492492 }, { 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0xE38E38E4, 0 }, { 0xCCCCCCCD, 0 },
+ { 0xBA2E8BA3, 0 }, { 0xAAAAAAAB, 0 },
+ { 0x9D89D89E, 0 }, { 0x92492492, 0x92492492 },
+ { 0x88888889, 0 }, { 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0xF0F0F0F1, 0 }, { 0xE38E38E4, 0 },
+ { 0xD79435E5, 0xD79435E5 }, { 0xCCCCCCCD, 0 },
+ { 0xC30C30C3, 0xC30C30C3 }, { 0xBA2E8BA3, 0 },
+ { 0xB21642C9, 0 }, { 0xAAAAAAAB, 0 },
+ { 0xA3D70A3E, 0 }, { 0x9D89D89E, 0 },
+ { 0x97B425ED, 0x97B425ED }, { 0x92492492, 0x92492492 },
+ { 0x8D3DCB09, 0 }, { 0x88888889, 0 },
+ { 0x84210842, 0x84210842 }, { 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0xF83E0F84, 0 }, { 0xF0F0F0F1, 0 },
+ { 0xEA0EA0EA, 0xEA0EA0EA }, { 0xE38E38E4, 0 },
+ { 0xDD67C8A6, 0xDD67C8A6 }, { 0xD79435E5, 0xD79435E5 },
+ { 0xD20D20D2, 0xD20D20D2 }, { 0xCCCCCCCD, 0 },
+ { 0xC7CE0C7D, 0 }, { 0xC30C30C3, 0xC30C30C3 },
+ { 0xBE82FA0C, 0 }, { 0xBA2E8BA3, 0 },
+ { 0xB60B60B6, 0xB60B60B6 }, { 0xB21642C9, 0 },
+ { 0xAE4C415D, 0 }, { 0xAAAAAAAB, 0 },
+ { 0xA72F053A, 0 }, { 0xA3D70A3E, 0 },
+ { 0xA0A0A0A1, 0 }, { 0x9D89D89E, 0 },
+ { 0x9A90E7D9, 0x9A90E7D9 }, { 0x97B425ED, 0x97B425ED },
+ { 0x94F2094F, 0x94F2094F }, { 0x92492492, 0x92492492 },
+ { 0x8FB823EE, 0x8FB823EE }, { 0x8D3DCB09, 0 },
+ { 0x8AD8F2FC, 0 }, { 0x88888889, 0 },
+ { 0x864B8A7E, 0 }, { 0x84210842, 0x84210842 },
+ { 0x82082082, 0x82082082 }, { 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0xFC0FC0FD, 0 }, { 0xF83E0F84, 0 },
+ { 0xF4898D60, 0 }, { 0xF0F0F0F1, 0 },
+ { 0xED7303B6, 0 }, { 0xEA0EA0EA, 0xEA0EA0EA },
+ { 0xE6C2B449, 0 }, { 0xE38E38E4, 0 },
+ { 0xE070381C, 0xE070381C }, { 0xDD67C8A6, 0xDD67C8A6 },
+ { 0xDA740DA8, 0 }, { 0xD79435E5, 0xD79435E5 },
+ { 0xD4C77B04, 0 }, { 0xD20D20D2, 0xD20D20D2 },
+ { 0xCF6474A9, 0 }, { 0xCCCCCCCD, 0 },
+ { 0xCA4587E7, 0 }, { 0xC7CE0C7D, 0 },
+ { 0xC565C87C, 0 }, { 0xC30C30C3, 0xC30C30C3 },
+ { 0xC0C0C0C1, 0 }, { 0xBE82FA0C, 0 },
+ { 0xBC52640C, 0 }, { 0xBA2E8BA3, 0 },
+ { 0xB81702E1, 0 }, { 0xB60B60B6, 0xB60B60B6 },
+ { 0xB40B40B4, 0xB40B40B4 }, { 0xB21642C9, 0 },
+ { 0xB02C0B03, 0 }, { 0xAE4C415D, 0 },
+ { 0xAC769184, 0xAC769184 }, { 0xAAAAAAAB, 0 },
+ { 0xA8E83F57, 0xA8E83F57 }, { 0xA72F053A, 0 },
+ { 0xA57EB503, 0 }, { 0xA3D70A3E, 0 },
+ { 0xA237C32B, 0xA237C32B }, { 0xA0A0A0A1, 0 },
+ { 0x9F1165E7, 0x9F1165E7 }, { 0x9D89D89E, 0 },
+ { 0x9C09C09C, 0x9C09C09C }, { 0x9A90E7D9, 0x9A90E7D9 },
+ { 0x991F1A51, 0x991F1A51 }, { 0x97B425ED, 0x97B425ED },
+ { 0x964FDA6C, 0x964FDA6C }, { 0x94F2094F, 0x94F2094F },
+ { 0x939A85C4, 0x939A85C4 }, { 0x92492492, 0x92492492 },
+ { 0x90FDBC09, 0x90FDBC09 }, { 0x8FB823EE, 0x8FB823EE },
+ { 0x8E78356D, 0x8E78356D }, { 0x8D3DCB09, 0 },
+ { 0x8C08C08C, 0x8C08C08C }, { 0x8AD8F2FC, 0 },
+ { 0x89AE408A, 0 }, { 0x88888889, 0 },
+ { 0x8767AB5F, 0x8767AB5F }, { 0x864B8A7E, 0 },
+ { 0x85340853, 0x85340853 }, { 0x84210842, 0x84210842 },
+ { 0x83126E98, 0 }, { 0x82082082, 0x82082082 },
+ { 0x81020408, 0x81020408 }, { 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0xFE03F810, 0 }, { 0xFC0FC0FD, 0 },
+ { 0xFA232CF3, 0 }, { 0xF83E0F84, 0 },
+ { 0xF6603D99, 0 }, { 0xF4898D60, 0 },
+ { 0xF2B9D649, 0 }, { 0xF0F0F0F1, 0 },
+ { 0xEF2EB720, 0 }, { 0xED7303B6, 0 },
+ { 0xEBBDB2A6, 0 }, { 0xEA0EA0EA, 0xEA0EA0EA },
+ { 0xE865AC7C, 0 }, { 0xE6C2B449, 0 },
+ { 0xE525982B, 0 }, { 0xE38E38E4, 0 },
+ { 0xE1FC780F, 0 }, { 0xE070381C, 0xE070381C },
+ { 0xDEE95C4D, 0 }, { 0xDD67C8A6, 0xDD67C8A6 },
+ { 0xDBEB61EF, 0 }, { 0xDA740DA8, 0 },
+ { 0xD901B204, 0 }, { 0xD79435E5, 0xD79435E5 },
+ { 0xD62B80D7, 0 }, { 0xD4C77B04, 0 },
+ { 0xD3680D37, 0 }, { 0xD20D20D2, 0xD20D20D2 },
+ { 0xD0B69FCC, 0 }, { 0xCF6474A9, 0 },
+ { 0xCE168A77, 0xCE168A77 }, { 0xCCCCCCCD, 0 },
+ { 0xCB8727C1, 0 }, { 0xCA4587E7, 0 },
+ { 0xC907DA4F, 0 }, { 0xC7CE0C7D, 0 },
+ { 0xC6980C6A, 0 }, { 0xC565C87C, 0 },
+ { 0xC4372F86, 0 }, { 0xC30C30C3, 0xC30C30C3 },
+ { 0xC1E4BBD6, 0 }, { 0xC0C0C0C1, 0 },
+ { 0xBFA02FE8, 0xBFA02FE8 }, { 0xBE82FA0C, 0 },
+ { 0xBD691047, 0xBD691047 }, { 0xBC52640C, 0 },
+ { 0xBB3EE722, 0 }, { 0xBA2E8BA3, 0 },
+ { 0xB92143FA, 0xB92143FA }, { 0xB81702E1, 0 },
+ { 0xB70FBB5A, 0xB70FBB5A }, { 0xB60B60B6, 0xB60B60B6 },
+ { 0xB509E68B, 0 }, { 0xB40B40B4, 0xB40B40B4 },
+ { 0xB30F6353, 0 }, { 0xB21642C9, 0 },
+ { 0xB11FD3B8, 0xB11FD3B8 }, { 0xB02C0B03, 0 },
+ { 0xAF3ADDC7, 0 }, { 0xAE4C415D, 0 },
+ { 0xAD602B58, 0xAD602B58 }, { 0xAC769184, 0xAC769184 },
+ { 0xAB8F69E3, 0 }, { 0xAAAAAAAB, 0 },
+ { 0xA9C84A48, 0 }, { 0xA8E83F57, 0xA8E83F57 },
+ { 0xA80A80A8, 0xA80A80A8 }, { 0xA72F053A, 0 },
+ { 0xA655C439, 0xA655C439 }, { 0xA57EB503, 0 },
+ { 0xA4A9CF1E, 0 }, { 0xA3D70A3E, 0 },
+ { 0xA3065E40, 0 }, { 0xA237C32B, 0xA237C32B },
+ { 0xA16B312F, 0 }, { 0xA0A0A0A1, 0 },
+ { 0x9FD809FE, 0 }, { 0x9F1165E7, 0x9F1165E7 },
+ { 0x9E4CAD24, 0 }, { 0x9D89D89E, 0 },
+ { 0x9CC8E161, 0 }, { 0x9C09C09C, 0x9C09C09C },
+ { 0x9B4C6F9F, 0 }, { 0x9A90E7D9, 0x9A90E7D9 },
+ { 0x99D722DB, 0 }, { 0x991F1A51, 0x991F1A51 },
+ { 0x9868C80A, 0 }, { 0x97B425ED, 0x97B425ED },
+ { 0x97012E02, 0x97012E02 }, { 0x964FDA6C, 0x964FDA6C },
+ { 0x95A02568, 0x95A02568 }, { 0x94F2094F, 0x94F2094F },
+ { 0x94458094, 0x94458094 }, { 0x939A85C4, 0x939A85C4 },
+ { 0x92F11384, 0x92F11384 }, { 0x92492492, 0x92492492 },
+ { 0x91A2B3C5, 0 }, { 0x90FDBC09, 0x90FDBC09 },
+ { 0x905A3863, 0x905A3863 }, { 0x8FB823EE, 0x8FB823EE },
+ { 0x8F1779DA, 0 }, { 0x8E78356D, 0x8E78356D },
+ { 0x8DDA5202, 0x8DDA5202 }, { 0x8D3DCB09, 0 },
+ { 0x8CA29C04, 0x8CA29C04 }, { 0x8C08C08C, 0x8C08C08C },
+ { 0x8B70344A, 0x8B70344A }, { 0x8AD8F2FC, 0 },
+ { 0x8A42F870, 0x8A42F870 }, { 0x89AE408A, 0 },
+ { 0x891AC73B, 0 }, { 0x88888889, 0 },
+ { 0x87F78088, 0 }, { 0x8767AB5F, 0x8767AB5F },
+ { 0x86D90545, 0 }, { 0x864B8A7E, 0 },
+ { 0x85BF3761, 0x85BF3761 }, { 0x85340853, 0x85340853 },
+ { 0x84A9F9C8, 0x84A9F9C8 }, { 0x84210842, 0x84210842 },
+ { 0x83993052, 0x83993052 }, { 0x83126E98, 0 },
+ { 0x828CBFBF, 0 }, { 0x82082082, 0x82082082 },
+ { 0x81848DA9, 0 }, { 0x81020408, 0x81020408 },
+ { 0x80808081, 0 }, { 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0xFF00FF01, 0 }, { 0xFE03F810, 0 },
+ { 0xFD08E551, 0 }, { 0xFC0FC0FD, 0 },
+ { 0xFB188566, 0 }, { 0xFA232CF3, 0 },
+ { 0xF92FB222, 0 }, { 0xF83E0F84, 0 },
+ { 0xF74E3FC3, 0 }, { 0xF6603D99, 0 },
+ { 0xF57403D6, 0 }, { 0xF4898D60, 0 },
+ { 0xF3A0D52D, 0 }, { 0xF2B9D649, 0 },
+ { 0xF1D48BCF, 0 }, { 0xF0F0F0F1, 0 },
+ { 0xF00F00F0, 0xF00F00F0 }, { 0xEF2EB720, 0 },
+ { 0xEE500EE5, 0xEE500EE5 }, { 0xED7303B6, 0 },
+ { 0xEC979119, 0 }, { 0xEBBDB2A6, 0 },
+ { 0xEAE56404, 0 }, { 0xEA0EA0EA, 0xEA0EA0EA },
+ { 0xE9396520, 0 }, { 0xE865AC7C, 0 },
+ { 0xE79372E3, 0 }, { 0xE6C2B449, 0 },
+ { 0xE5F36CB0, 0xE5F36CB0 }, { 0xE525982B, 0 },
+ { 0xE45932D8, 0 }, { 0xE38E38E4, 0 },
+ { 0xE2C4A689, 0 }, { 0xE1FC780F, 0 },
+ { 0xE135A9CA, 0 }, { 0xE070381C, 0xE070381C },
+ { 0xDFAC1F75, 0 }, { 0xDEE95C4D, 0 },
+ { 0xDE27EB2D, 0 }, { 0xDD67C8A6, 0xDD67C8A6 },
+ { 0xDCA8F159, 0 }, { 0xDBEB61EF, 0 },
+ { 0xDB2F171E, 0 }, { 0xDA740DA8, 0 },
+ { 0xD9BA4257, 0 }, { 0xD901B204, 0 },
+ { 0xD84A598F, 0 }, { 0xD79435E5, 0xD79435E5 },
+ { 0xD6DF43FD, 0 }, { 0xD62B80D7, 0 },
+ { 0xD578E97D, 0 }, { 0xD4C77B04, 0 },
+ { 0xD417328A, 0 }, { 0xD3680D37, 0 },
+ { 0xD2BA083C, 0 }, { 0xD20D20D2, 0xD20D20D2 },
+ { 0xD161543E, 0xD161543E }, { 0xD0B69FCC, 0 },
+ { 0xD00D00D0, 0xD00D00D0 }, { 0xCF6474A9, 0 },
+ { 0xCEBCF8BC, 0 }, { 0xCE168A77, 0xCE168A77 },
+ { 0xCD712753, 0 }, { 0xCCCCCCCD, 0 },
+ { 0xCC29786D, 0 }, { 0xCB8727C1, 0 },
+ { 0xCAE5D85F, 0xCAE5D85F }, { 0xCA4587E7, 0 },
+ { 0xC9A633FD, 0 }, { 0xC907DA4F, 0 },
+ { 0xC86A7890, 0xC86A7890 }, { 0xC7CE0C7D, 0 },
+ { 0xC73293D8, 0 }, { 0xC6980C6A, 0 },
+ { 0xC5FE7403, 0xC5FE7403 }, { 0xC565C87C, 0 },
+ { 0xC4CE07B0, 0xC4CE07B0 }, { 0xC4372F86, 0 },
+ { 0xC3A13DE6, 0xC3A13DE6 }, { 0xC30C30C3, 0xC30C30C3 },
+ { 0xC2780614, 0 }, { 0xC1E4BBD6, 0 },
+ { 0xC152500C, 0xC152500C }, { 0xC0C0C0C1, 0 },
+ { 0xC0300C03, 0xC0300C03 }, { 0xBFA02FE8, 0xBFA02FE8 },
+ { 0xBF112A8B, 0 }, { 0xBE82FA0C, 0 },
+ { 0xBDF59C92, 0 }, { 0xBD691047, 0xBD691047 },
+ { 0xBCDD535E, 0 }, { 0xBC52640C, 0 },
+ { 0xBBC8408D, 0 }, { 0xBB3EE722, 0 },
+ { 0xBAB65610, 0xBAB65610 }, { 0xBA2E8BA3, 0 },
+ { 0xB9A7862A, 0xB9A7862A }, { 0xB92143FA, 0xB92143FA },
+ { 0xB89BC36D, 0 }, { 0xB81702E1, 0 },
+ { 0xB79300B8, 0 }, { 0xB70FBB5A, 0xB70FBB5A },
+ { 0xB68D3134, 0xB68D3134 }, { 0xB60B60B6, 0xB60B60B6 },
+ { 0xB58A4855, 0xB58A4855 }, { 0xB509E68B, 0 },
+ { 0xB48A39D4, 0xB48A39D4 }, { 0xB40B40B4, 0xB40B40B4 },
+ { 0xB38CF9B0, 0xB38CF9B0 }, { 0xB30F6353, 0 },
+ { 0xB2927C2A, 0 }, { 0xB21642C9, 0 },
+ { 0xB19AB5C5, 0 }, { 0xB11FD3B8, 0xB11FD3B8 },
+ { 0xB0A59B42, 0 }, { 0xB02C0B03, 0 },
+ { 0xAFB321A1, 0xAFB321A1 }, { 0xAF3ADDC7, 0 },
+ { 0xAEC33E20, 0 }, { 0xAE4C415D, 0 },
+ { 0xADD5E632, 0xADD5E632 }, { 0xAD602B58, 0xAD602B58 },
+ { 0xACEB0F89, 0xACEB0F89 }, { 0xAC769184, 0xAC769184 },
+ { 0xAC02B00B, 0 }, { 0xAB8F69E3, 0 },
+ { 0xAB1CBDD4, 0 }, { 0xAAAAAAAB, 0 },
+ { 0xAA392F36, 0 }, { 0xA9C84A48, 0 },
+ { 0xA957FAB5, 0xA957FAB5 }, { 0xA8E83F57, 0xA8E83F57 },
+ { 0xA8791709, 0 }, { 0xA80A80A8, 0xA80A80A8 },
+ { 0xA79C7B17, 0 }, { 0xA72F053A, 0 },
+ { 0xA6C21DF7, 0 }, { 0xA655C439, 0xA655C439 },
+ { 0xA5E9F6ED, 0xA5E9F6ED }, { 0xA57EB503, 0 },
+ { 0xA513FD6C, 0 }, { 0xA4A9CF1E, 0 },
+ { 0xA4402910, 0xA4402910 }, { 0xA3D70A3E, 0 },
+ { 0xA36E71A3, 0 }, { 0xA3065E40, 0 },
+ { 0xA29ECF16, 0xA29ECF16 }, { 0xA237C32B, 0xA237C32B },
+ { 0xA1D13986, 0 }, { 0xA16B312F, 0 },
+ { 0xA105A933, 0 }, { 0xA0A0A0A1, 0 },
+ { 0xA03C1689, 0 }, { 0x9FD809FE, 0 },
+ { 0x9F747A15, 0x9F747A15 }, { 0x9F1165E7, 0x9F1165E7 },
+ { 0x9EAECC8D, 0x9EAECC8D }, { 0x9E4CAD24, 0 },
+ { 0x9DEB06C9, 0x9DEB06C9 }, { 0x9D89D89E, 0 },
+ { 0x9D2921C4, 0 }, { 0x9CC8E161, 0 },
+ { 0x9C69169B, 0x9C69169B }, { 0x9C09C09C, 0x9C09C09C },
+ { 0x9BAADE8E, 0x9BAADE8E }, { 0x9B4C6F9F, 0 },
+ { 0x9AEE72FD, 0 }, { 0x9A90E7D9, 0x9A90E7D9 },
+ { 0x9A33CD67, 0x9A33CD67 }, { 0x99D722DB, 0 },
+ { 0x997AE76B, 0x997AE76B }, { 0x991F1A51, 0x991F1A51 },
+ { 0x98C3BAC7, 0x98C3BAC7 }, { 0x9868C80A, 0 },
+ { 0x980E4156, 0x980E4156 }, { 0x97B425ED, 0x97B425ED },
+ { 0x975A7510, 0 }, { 0x97012E02, 0x97012E02 },
+ { 0x96A8500A, 0 }, { 0x964FDA6C, 0x964FDA6C },
+ { 0x95F7CC73, 0 }, { 0x95A02568, 0x95A02568 },
+ { 0x9548E498, 0 }, { 0x94F2094F, 0x94F2094F },
+ { 0x949B92DE, 0 }, { 0x94458094, 0x94458094 },
+ { 0x93EFD1C5, 0x93EFD1C5 }, { 0x939A85C4, 0x939A85C4 },
+ { 0x93459BE7, 0 }, { 0x92F11384, 0x92F11384 },
+ { 0x929CEBF5, 0 }, { 0x92492492, 0x92492492 },
+ { 0x91F5BCB9, 0 }, { 0x91A2B3C5, 0 },
+ { 0x91500915, 0x91500915 }, { 0x90FDBC09, 0x90FDBC09 },
+ { 0x90ABCC02, 0x90ABCC02 }, { 0x905A3863, 0x905A3863 },
+ { 0x90090090, 0x90090090 }, { 0x8FB823EE, 0x8FB823EE },
+ { 0x8F67A1E4, 0 }, { 0x8F1779DA, 0 },
+ { 0x8EC7AB3A, 0 }, { 0x8E78356D, 0x8E78356D },
+ { 0x8E2917E1, 0 }, { 0x8DDA5202, 0x8DDA5202 },
+ { 0x8D8BE340, 0 }, { 0x8D3DCB09, 0 },
+ { 0x8CF008CF, 0x8CF008CF }, { 0x8CA29C04, 0x8CA29C04 },
+ { 0x8C55841D, 0 }, { 0x8C08C08C, 0x8C08C08C },
+ { 0x8BBC50C9, 0 }, { 0x8B70344A, 0x8B70344A },
+ { 0x8B246A88, 0 }, { 0x8AD8F2FC, 0 },
+ { 0x8A8DCD20, 0 }, { 0x8A42F870, 0x8A42F870 },
+ { 0x89F8746A, 0 }, { 0x89AE408A, 0 },
+ { 0x89645C4F, 0x89645C4F }, { 0x891AC73B, 0 },
+ { 0x88D180CD, 0x88D180CD }, { 0x88888889, 0 },
+ { 0x883FDDF0, 0x883FDDF0 }, { 0x87F78088, 0 },
+ { 0x87AF6FD6, 0 }, { 0x8767AB5F, 0x8767AB5F },
+ { 0x872032AC, 0x872032AC }, { 0x86D90545, 0 },
+ { 0x869222B2, 0 }, { 0x864B8A7E, 0 },
+ { 0x86053C34, 0x86053C34 }, { 0x85BF3761, 0x85BF3761 },
+ { 0x85797B91, 0x85797B91 }, { 0x85340853, 0x85340853 },
+ { 0x84EEDD36, 0 }, { 0x84A9F9C8, 0x84A9F9C8 },
+ { 0x84655D9C, 0 }, { 0x84210842, 0x84210842 },
+ { 0x83DCF94E, 0 }, { 0x83993052, 0x83993052 },
+ { 0x8355ACE4, 0 }, { 0x83126E98, 0 },
+ { 0x82CF7504, 0 }, { 0x828CBFBF, 0 },
+ { 0x824A4E61, 0 }, { 0x82082082, 0x82082082 },
+ { 0x81C635BC, 0x81C635BC }, { 0x81848DA9, 0 },
+ { 0x814327E4, 0 }, { 0x81020408, 0x81020408 },
+ { 0x80C121B3, 0 }, { 0x80808081, 0 },
+ { 0x80402010, 0x80402010 }, { 0xFFFFFFFF, 0xFFFFFFFF },
+ { 0xFF803FE1, 0 }, { 0xFF00FF01, 0 },
+ { 0xFE823CA6, 0 }, { 0xFE03F810, 0 },
+ { 0xFD863087, 0 }, { 0xFD08E551, 0 },
+ { 0xFC8C15B5, 0 }, { 0xFC0FC0FD, 0 },
+ { 0xFB93E673, 0 }, { 0xFB188566, 0 },
+ { 0xFA9D9D20, 0 }, { 0xFA232CF3, 0 },
+ { 0xF9A9342D, 0 }, { 0xF92FB222, 0 },
+ { 0xF8B6A622, 0xF8B6A622 }, { 0xF83E0F84, 0 },
+ { 0xF7C5ED9D, 0 }, { 0xF74E3FC3, 0 },
+ { 0xF6D7054E, 0 }, { 0xF6603D99, 0 },
+ { 0xF5E9E7FD, 0 }, { 0xF57403D6, 0 },
+ { 0xF4FE9083, 0 }, { 0xF4898D60, 0 },
+ { 0xF414F9CE, 0 }, { 0xF3A0D52D, 0 },
+ { 0xF32D1EE0, 0 }, { 0xF2B9D649, 0 },
+ { 0xF246FACC, 0 }, { 0xF1D48BCF, 0 },
+ { 0xF16288B9, 0 }, { 0xF0F0F0F1, 0 },
+ { 0xF07FC3E0, 0xF07FC3E0 }, { 0xF00F00F0, 0xF00F00F0 },
+ { 0xEF9EA78C, 0 }, { 0xEF2EB720, 0 },
+ { 0xEEBF2F19, 0 }, { 0xEE500EE5, 0xEE500EE5 },
+ { 0xEDE155F4, 0 }, { 0xED7303B6, 0 },
+ { 0xED05179C, 0xED05179C }, { 0xEC979119, 0 },
+ { 0xEC2A6FA0, 0xEC2A6FA0 }, { 0xEBBDB2A6, 0 },
+ { 0xEB5159A0, 0 }, { 0xEAE56404, 0 },
+ { 0xEA79D14A, 0 }, { 0xEA0EA0EA, 0xEA0EA0EA },
+ { 0xE9A3D25E, 0xE9A3D25E }, { 0xE9396520, 0 },
+ { 0xE8CF58AB, 0 }, { 0xE865AC7C, 0 },
+ { 0xE7FC600F, 0 }, { 0xE79372E3, 0 },
+ { 0xE72AE476, 0 }, { 0xE6C2B449, 0 },
+ { 0xE65AE1DC, 0 }, { 0xE5F36CB0, 0xE5F36CB0 },
+ { 0xE58C544A, 0 }, { 0xE525982B, 0 },
+ { 0xE4BF37D9, 0 }, { 0xE45932D8, 0 },
+ { 0xE3F388AF, 0 }, { 0xE38E38E4, 0 },
+ { 0xE32942FF, 0 }, { 0xE2C4A689, 0 },
+ { 0xE260630B, 0 }, { 0xE1FC780F, 0 },
+ { 0xE198E520, 0 }, { 0xE135A9CA, 0 },
+ { 0xE0D2C59A, 0 }, { 0xE070381C, 0xE070381C },
+ { 0xE00E00E0, 0xE00E00E0 }, { 0xDFAC1F75, 0 },
+ { 0xDF4A9369, 0 }, { 0xDEE95C4D, 0 },
+ { 0xDE8879B3, 0 }, { 0xDE27EB2D, 0 },
+ { 0xDDC7B04D, 0 }, { 0xDD67C8A6, 0xDD67C8A6 },
+ { 0xDD0833CE, 0 }, { 0xDCA8F159, 0 },
+ { 0xDC4A00DD, 0 }, { 0xDBEB61EF, 0 },
+ { 0xDB8D1428, 0 }, { 0xDB2F171E, 0 },
+ { 0xDAD16A6B, 0 }, { 0xDA740DA8, 0 },
+ { 0xDA17006D, 0xDA17006D }, { 0xD9BA4257, 0 },
+ { 0xD95DD300, 0 }, { 0xD901B204, 0 },
+ { 0xD8A5DEFF, 0 }, { 0xD84A598F, 0 },
+ { 0xD7EF2152, 0 }, { 0xD79435E5, 0xD79435E5 },
+ { 0xD73996E9, 0 }, { 0xD6DF43FD, 0 },
+ { 0xD6853CC1, 0 }, { 0xD62B80D7, 0 },
+ { 0xD5D20FDF, 0 }, { 0xD578E97D, 0 },
+ { 0xD5200D52, 0xD5200D52 }, { 0xD4C77B04, 0 },
+ { 0xD46F3235, 0 }, { 0xD417328A, 0 },
+ { 0xD3BF7BA9, 0 }, { 0xD3680D37, 0 },
+ { 0xD310E6DB, 0 }, { 0xD2BA083C, 0 },
+ { 0xD2637101, 0 }, { 0xD20D20D2, 0xD20D20D2 },
+ { 0xD1B71759, 0 }, { 0xD161543E, 0xD161543E },
+ { 0xD10BD72C, 0 }, { 0xD0B69FCC, 0 },
+ { 0xD061ADCA, 0 }, { 0xD00D00D0, 0xD00D00D0 },
+ { 0xCFB8988C, 0 }, { 0xCF6474A9, 0 },
+ { 0xCF1094D4, 0 }, { 0xCEBCF8BC, 0 },
+ { 0xCE69A00D, 0 }, { 0xCE168A77, 0xCE168A77 },
+ { 0xCDC3B7A9, 0xCDC3B7A9 }, { 0xCD712753, 0 },
+ { 0xCD1ED924, 0 }, { 0xCCCCCCCD, 0 },
+ { 0xCC7B0200, 0 }, { 0xCC29786D, 0 },
+ { 0xCBD82FC7, 0 }, { 0xCB8727C1, 0 },
+ { 0xCB36600D, 0 }, { 0xCAE5D85F, 0xCAE5D85F },
+ { 0xCA95906C, 0 }, { 0xCA4587E7, 0 },
+ { 0xC9F5BE86, 0 }, { 0xC9A633FD, 0 },
+ { 0xC956E803, 0xC956E803 }, { 0xC907DA4F, 0 },
+ { 0xC8B90A96, 0 }, { 0xC86A7890, 0xC86A7890 },
+ { 0xC81C23F5, 0xC81C23F5 }, { 0xC7CE0C7D, 0 },
+ { 0xC78031E0, 0xC78031E0 }, { 0xC73293D8, 0 },
+ { 0xC6E5321D, 0 }, { 0xC6980C6A, 0 },
+ { 0xC64B2278, 0xC64B2278 }, { 0xC5FE7403, 0xC5FE7403 },
+ { 0xC5B200C6, 0 }, { 0xC565C87C, 0 },
+ { 0xC519CAE0, 0xC519CAE0 }, { 0xC4CE07B0, 0xC4CE07B0 },
+ { 0xC4827EA8, 0xC4827EA8 }, { 0xC4372F86, 0 },
+ { 0xC3EC1A06, 0 }, { 0xC3A13DE6, 0xC3A13DE6 },
+ { 0xC3569AE6, 0 }, { 0xC30C30C3, 0xC30C30C3 },
+ { 0xC2C1FF3E, 0 }, { 0xC2780614, 0 },
+ { 0xC22E4507, 0 }, { 0xC1E4BBD6, 0 },
+ { 0xC19B6A42, 0 }, { 0xC152500C, 0xC152500C },
+ { 0xC1096CF6, 0 }, { 0xC0C0C0C1, 0 },
+ { 0xC0784B2F, 0 }, { 0xC0300C03, 0xC0300C03 },
+ { 0xBFE80300, 0 }, { 0xBFA02FE8, 0xBFA02FE8 },
+ { 0xBF589280, 0 }, { 0xBF112A8B, 0 },
+ { 0xBEC9F7CE, 0 }, { 0xBE82FA0C, 0 },
+ { 0xBE3C310C, 0 }, { 0xBDF59C92, 0 },
+ { 0xBDAF3C64, 0 }, { 0xBD691047, 0xBD691047 },
+ { 0xBD231803, 0 }, { 0xBCDD535E, 0 },
+ { 0xBC97C21E, 0xBC97C21E }, { 0xBC52640C, 0 },
+ { 0xBC0D38EE, 0xBC0D38EE }, { 0xBBC8408D, 0 },
+ { 0xBB837AB1, 0 }, { 0xBB3EE722, 0 },
+ { 0xBAFA85A9, 0xBAFA85A9 }, { 0xBAB65610, 0xBAB65610 },
+ { 0xBA725820, 0xBA725820 }, { 0xBA2E8BA3, 0 },
+ { 0xB9EAF063, 0 }, { 0xB9A7862A, 0xB9A7862A },
+ { 0xB9644CC4, 0 }, { 0xB92143FA, 0xB92143FA },
+ { 0xB8DE6B9A, 0 }, { 0xB89BC36D, 0 },
+ { 0xB8594B41, 0 }, { 0xB81702E1, 0 },
+ { 0xB7D4EA19, 0xB7D4EA19 }, { 0xB79300B8, 0 },
+ { 0xB7514689, 0 }, { 0xB70FBB5A, 0xB70FBB5A },
+ { 0xB6CE5EF9, 0xB6CE5EF9 }, { 0xB68D3134, 0xB68D3134 },
+ { 0xB64C31D9, 0 }, { 0xB60B60B6, 0xB60B60B6 },
+ { 0xB5CABD9B, 0 }, { 0xB58A4855, 0xB58A4855 },
+ { 0xB54A00B5, 0xB54A00B5 }, { 0xB509E68B, 0 },
+ { 0xB4C9F9A5, 0 }, { 0xB48A39D4, 0xB48A39D4 },
+ { 0xB44AA6E9, 0xB44AA6E9 }, { 0xB40B40B4, 0xB40B40B4 },
+ { 0xB3CC0706, 0 }, { 0xB38CF9B0, 0xB38CF9B0 },
+ { 0xB34E1884, 0 }, { 0xB30F6353, 0 },
+ { 0xB2D0D9EF, 0 }, { 0xB2927C2A, 0 },
+ { 0xB25449D7, 0 }, { 0xB21642C9, 0 },
+ { 0xB1D866D1, 0xB1D866D1 }, { 0xB19AB5C5, 0 },
+ { 0xB15D2F76, 0 }, { 0xB11FD3B8, 0xB11FD3B8 },
+ { 0xB0E2A260, 0xB0E2A260 }, { 0xB0A59B42, 0 },
+ { 0xB068BE31, 0 }, { 0xB02C0B03, 0 },
+ { 0xAFEF818C, 0 }, { 0xAFB321A1, 0xAFB321A1 },
+ { 0xAF76EB19, 0 }, { 0xAF3ADDC7, 0 },
+ { 0xAEFEF982, 0 }, { 0xAEC33E20, 0 },
+ { 0xAE87AB76, 0xAE87AB76 }, { 0xAE4C415D, 0 },
+ { 0xAE10FFA9, 0 }, { 0xADD5E632, 0xADD5E632 },
+ { 0xAD9AF4D0, 0 }, { 0xAD602B58, 0xAD602B58 },
+ { 0xAD2589A4, 0 }, { 0xACEB0F89, 0xACEB0F89 },
+ { 0xACB0BCE1, 0xACB0BCE1 }, { 0xAC769184, 0xAC769184 },
+ { 0xAC3C8D4A, 0 }, { 0xAC02B00B, 0 },
+ { 0xABC8F9A0, 0xABC8F9A0 }, { 0xAB8F69E3, 0 },
+ { 0xAB5600AC, 0 }, { 0xAB1CBDD4, 0 },
+ { 0xAAE3A136, 0 }, { 0xAAAAAAAB, 0 },
+ { 0xAA71DA0D, 0 }, { 0xAA392F36, 0 },
+ { 0xAA00AA01, 0 }, { 0xA9C84A48, 0 },
+ { 0xA9900FE6, 0 }, { 0xA957FAB5, 0xA957FAB5 },
+ { 0xA9200A92, 0xA9200A92 }, { 0xA8E83F57, 0xA8E83F57 },
+ { 0xA8B098E0, 0xA8B098E0 }, { 0xA8791709, 0 },
+ { 0xA841B9AD, 0 }, { 0xA80A80A8, 0xA80A80A8 },
+ { 0xA7D36BD8, 0 }, { 0xA79C7B17, 0 },
+ { 0xA765AE44, 0 }, { 0xA72F053A, 0 },
+ { 0xA6F87FD6, 0xA6F87FD6 }, { 0xA6C21DF7, 0 },
+ { 0xA68BDF79, 0 }, { 0xA655C439, 0xA655C439 },
+ { 0xA61FCC16, 0xA61FCC16 }, { 0xA5E9F6ED, 0xA5E9F6ED },
+ { 0xA5B4449D, 0 }, { 0xA57EB503, 0 },
+ { 0xA54947FE, 0 }, { 0xA513FD6C, 0 },
+ { 0xA4DED52C, 0xA4DED52C }, { 0xA4A9CF1E, 0 },
+ { 0xA474EB1F, 0xA474EB1F }, { 0xA4402910, 0xA4402910 },
+ { 0xA40B88D0, 0 }, { 0xA3D70A3E, 0 },
+ { 0xA3A2AD39, 0xA3A2AD39 }, { 0xA36E71A3, 0 },
+ { 0xA33A575A, 0xA33A575A }, { 0xA3065E40, 0 },
+ { 0xA2D28634, 0 }, { 0xA29ECF16, 0xA29ECF16 },
+ { 0xA26B38C9, 0 }, { 0xA237C32B, 0xA237C32B },
+ { 0xA2046E1F, 0xA2046E1F }, { 0xA1D13986, 0 },
+ { 0xA19E2540, 0 }, { 0xA16B312F, 0 },
+ { 0xA1385D35, 0 }, { 0xA105A933, 0 },
+ { 0xA0D3150C, 0 }, { 0xA0A0A0A1, 0 },
+ { 0xA06E4BD4, 0xA06E4BD4 }, { 0xA03C1689, 0 },
+ { 0xA00A00A0, 0xA00A00A0 }, { 0x9FD809FE, 0 },
+ { 0x9FA63284, 0 }, { 0x9F747A15, 0x9F747A15 },
+ { 0x9F42E095, 0x9F42E095 }, { 0x9F1165E7, 0x9F1165E7 },
+ { 0x9EE009EE, 0x9EE009EE }, { 0x9EAECC8D, 0x9EAECC8D },
+ { 0x9E7DADA9, 0 }, { 0x9E4CAD24, 0 },
+ { 0x9E1BCAE3, 0 }, { 0x9DEB06C9, 0x9DEB06C9 },
+ { 0x9DBA60BB, 0x9DBA60BB }, { 0x9D89D89E, 0 },
+ { 0x9D596E54, 0x9D596E54 }, { 0x9D2921C4, 0 },
+ { 0x9CF8F2D1, 0x9CF8F2D1 }, { 0x9CC8E161, 0 },
+ { 0x9C98ED58, 0 }, { 0x9C69169B, 0x9C69169B },
+ { 0x9C395D10, 0x9C395D10 }, { 0x9C09C09C, 0x9C09C09C },
+ { 0x9BDA4124, 0x9BDA4124 }, { 0x9BAADE8E, 0x9BAADE8E },
+ { 0x9B7B98C0, 0 }, { 0x9B4C6F9F, 0 },
+ { 0x9B1D6311, 0x9B1D6311 }, { 0x9AEE72FD, 0 },
+ { 0x9ABF9F48, 0x9ABF9F48 }, { 0x9A90E7D9, 0x9A90E7D9 },
+ { 0x9A624C97, 0 }, { 0x9A33CD67, 0x9A33CD67 },
+ { 0x9A056A31, 0 }, { 0x99D722DB, 0 },
+ { 0x99A8F74C, 0 }, { 0x997AE76B, 0x997AE76B },
+ { 0x994CF320, 0x994CF320 }, { 0x991F1A51, 0x991F1A51 },
+ { 0x98F15CE7, 0 }, { 0x98C3BAC7, 0x98C3BAC7 },
+ { 0x989633DB, 0x989633DB }, { 0x9868C80A, 0 },
+ { 0x983B773B, 0 }, { 0x980E4156, 0x980E4156 },
+ { 0x97E12644, 0x97E12644 }, { 0x97B425ED, 0x97B425ED },
+ { 0x97874039, 0 }, { 0x975A7510, 0 },
+ { 0x972DC45B, 0 }, { 0x97012E02, 0x97012E02 },
+ { 0x96D4B1EF, 0 }, { 0x96A8500A, 0 },
+ { 0x967C083B, 0 }, { 0x964FDA6C, 0x964FDA6C },
+ { 0x9623C686, 0x9623C686 }, { 0x95F7CC73, 0 },
+ { 0x95CBEC1B, 0 }, { 0x95A02568, 0x95A02568 },
+ { 0x95747844, 0 }, { 0x9548E498, 0 },
+ { 0x951D6A4E, 0 }, { 0x94F2094F, 0x94F2094F },
+ { 0x94C6C187, 0 }, { 0x949B92DE, 0 },
+ { 0x94707D3F, 0 }, { 0x94458094, 0x94458094 },
+ { 0x941A9CC8, 0x941A9CC8 }, { 0x93EFD1C5, 0x93EFD1C5 },
+ { 0x93C51F76, 0 }, { 0x939A85C4, 0x939A85C4 },
+ { 0x9370049C, 0 }, { 0x93459BE7, 0 },
+ { 0x931B4B91, 0 }, { 0x92F11384, 0x92F11384 },
+ { 0x92C6F3AC, 0x92C6F3AC }, { 0x929CEBF5, 0 },
+ { 0x9272FC48, 0x9272FC48 }, { 0x92492492, 0x92492492 },
+ { 0x921F64BF, 0 }, { 0x91F5BCB9, 0 },
+ { 0x91CC2C6C, 0x91CC2C6C }, { 0x91A2B3C5, 0 },
+ { 0x917952AF, 0 }, { 0x91500915, 0x91500915 },
+ { 0x9126D6E5, 0 }, { 0x90FDBC09, 0x90FDBC09 },
+ { 0x90D4B86F, 0 }, { 0x90ABCC02, 0x90ABCC02 },
+ { 0x9082F6B0, 0 }, { 0x905A3863, 0x905A3863 },
+ { 0x9031910A, 0 }, { 0x90090090, 0x90090090 },
+ { 0x8FE086E3, 0 }, { 0x8FB823EE, 0x8FB823EE },
+ { 0x8F8FD7A0, 0 }, { 0x8F67A1E4, 0 },
+ { 0x8F3F82A8, 0x8F3F82A8 }, { 0x8F1779DA, 0 },
+ { 0x8EEF8766, 0 }, { 0x8EC7AB3A, 0 },
+ { 0x8E9FE542, 0x8E9FE542 }, { 0x8E78356D, 0x8E78356D },
+ { 0x8E509BA8, 0x8E509BA8 }, { 0x8E2917E1, 0 },
+ { 0x8E01AA05, 0 }, { 0x8DDA5202, 0x8DDA5202 },
+ { 0x8DB30FC6, 0x8DB30FC6 }, { 0x8D8BE340, 0 },
+ { 0x8D64CC5C, 0 }, { 0x8D3DCB09, 0 },
+ { 0x8D16DF35, 0x8D16DF35 }, { 0x8CF008CF, 0x8CF008CF },
+ { 0x8CC947C5, 0 }, { 0x8CA29C04, 0x8CA29C04 },
+ { 0x8C7C057D, 0 }, { 0x8C55841D, 0 },
+ { 0x8C2F17D2, 0x8C2F17D2 }, { 0x8C08C08C, 0x8C08C08C },
+ { 0x8BE27E39, 0x8BE27E39 }, { 0x8BBC50C9, 0 },
+ { 0x8B963829, 0x8B963829 }, { 0x8B70344A, 0x8B70344A },
+ { 0x8B4A451A, 0 }, { 0x8B246A88, 0 },
+ { 0x8AFEA483, 0x8AFEA483 }, { 0x8AD8F2FC, 0 },
+ { 0x8AB355E0, 0x8AB355E0 }, { 0x8A8DCD20, 0 },
+ { 0x8A6858AB, 0 }, { 0x8A42F870, 0x8A42F870 },
+ { 0x8A1DAC60, 0x8A1DAC60 }, { 0x89F8746A, 0 },
+ { 0x89D3507D, 0 }, { 0x89AE408A, 0 },
+ { 0x89894480, 0 }, { 0x89645C4F, 0x89645C4F },
+ { 0x893F87E8, 0x893F87E8 }, { 0x891AC73B, 0 },
+ { 0x88F61A37, 0x88F61A37 }, { 0x88D180CD, 0x88D180CD },
+ { 0x88ACFAEE, 0 }, { 0x88888889, 0 },
+ { 0x8864298F, 0 }, { 0x883FDDF0, 0x883FDDF0 },
+ { 0x881BA59E, 0 }, { 0x87F78088, 0 },
+ { 0x87D36EA0, 0 }, { 0x87AF6FD6, 0 },
+ { 0x878B841B, 0 }, { 0x8767AB5F, 0x8767AB5F },
+ { 0x8743E595, 0 }, { 0x872032AC, 0x872032AC },
+ { 0x86FC9296, 0x86FC9296 }, { 0x86D90545, 0 },
+ { 0x86B58AA8, 0 }, { 0x869222B2, 0 },
+ { 0x866ECD53, 0x866ECD53 }, { 0x864B8A7E, 0 },
+ { 0x86285A23, 0x86285A23 }, { 0x86053C34, 0x86053C34 },
+ { 0x85E230A3, 0x85E230A3 }, { 0x85BF3761, 0x85BF3761 },
+ { 0x859C5060, 0x859C5060 }, { 0x85797B91, 0x85797B91 },
+ { 0x8556B8E7, 0x8556B8E7 }, { 0x85340853, 0x85340853 },
+ { 0x851169C7, 0x851169C7 }, { 0x84EEDD36, 0 },
+ { 0x84CC6290, 0 }, { 0x84A9F9C8, 0x84A9F9C8 },
+ { 0x8487A2D1, 0 }, { 0x84655D9C, 0 },
+ { 0x84432A1B, 0x84432A1B }, { 0x84210842, 0x84210842 },
+ { 0x83FEF802, 0x83FEF802 }, { 0x83DCF94E, 0 },
+ { 0x83BB0C18, 0 }, { 0x83993052, 0x83993052 },
+ { 0x837765F0, 0x837765F0 }, { 0x8355ACE4, 0 },
+ { 0x83340520, 0x83340520 }, { 0x83126E98, 0 },
+ { 0x82F0E93D, 0x82F0E93D }, { 0x82CF7504, 0 },
+ { 0x82AE11DE, 0 }, { 0x828CBFBF, 0 },
+ { 0x826B7E99, 0x826B7E99 }, { 0x824A4E61, 0 },
+ { 0x82292F08, 0 }, { 0x82082082, 0x82082082 },
+ { 0x81E722C2, 0x81E722C2 }, { 0x81C635BC, 0x81C635BC },
+ { 0x81A55963, 0 }, { 0x81848DA9, 0 },
+ { 0x8163D283, 0 }, { 0x814327E4, 0 },
+ { 0x81228DBF, 0 }, { 0x81020408, 0x81020408 },
+ { 0x80E18AB3, 0 }, { 0x80C121B3, 0 },
+ { 0x80A0C8FB, 0x80A0C8FB }, { 0x80808081, 0 },
+ { 0x80604836, 0x80604836 }, { 0x80402010, 0x80402010 },
+ { 0x80200802, 0x80200802 }, { 0xFFFFFFFF, 0xFFFFFFFF }
+};
diff --git a/av1/common/odintrin.h b/av1/common/odintrin.h
new file mode 100644
index 0000000..87b1a36
--- /dev/null
+++ b/av1/common/odintrin.h
@@ -0,0 +1,48 @@
+#ifndef VP10_COMMON_ODINTRIN_H_
+#define VP10_COMMON_ODINTRIN_H_
+
+#include "av1/common/enums.h"
+#include "aom/vpx_integer.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_ports/bitops.h"
+
+/*Smallest blocks are 4x4*/
+#define OD_LOG_BSIZE0 (2)
+/*There are 5 block sizes total (4x4, 8x8, 16x16, 32x32 and 64x64).*/
+#define OD_NBSIZES (5)
+/*The log of the maximum length of the side of a block.*/
+#define OD_LOG_BSIZE_MAX (OD_LOG_BSIZE0 + OD_NBSIZES - 1)
+/*The maximum length of the side of a block.*/
+#define OD_BSIZE_MAX (1 << OD_LOG_BSIZE_MAX)
+
+typedef int od_coeff;
+
+typedef int16_t od_dering_in;
+
+#define OD_DIVU_DMAX (1024)
+
+extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2];
+
+/*Translate unsigned division by small divisors into multiplications.*/
+#define OD_DIVU_SMALL(_x, _d) \
+ ((uint32_t)((OD_DIVU_SMALL_CONSTS[(_d)-1][0] * (uint64_t)(_x) + \
+ OD_DIVU_SMALL_CONSTS[(_d)-1][1]) >> \
+ 32) >> \
+ (OD_ILOG(_d) - 1))
+
+#define OD_DIVU(_x, _d) \
+ (((_d) < OD_DIVU_DMAX) ? (OD_DIVU_SMALL((_x), (_d))) : ((_x) / (_d)))
+
+#define OD_MINI VPXMIN
+#define OD_CLAMPI(min, val, max) clamp((val), (min), (max))
+
+#define OD_CLZ0 (1)
+#define OD_CLZ(x) (-get_msb(x))
+#define OD_ILOG_NZ(x) (OD_CLZ0 - OD_CLZ(x))
+/*Note that __builtin_clz is not defined when x == 0, according to the gcc
+ documentation (and that of the x86 BSR instruction that implements it), so
+ we have to special-case it.
+ We define a special version of the macro to use when x can be zero.*/
+#define OD_ILOG(x) ((x) ? OD_ILOG_NZ(x) : 0)
+
+#endif
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
new file mode 100644
index 0000000..55a8112
--- /dev/null
+++ b/av1/common/onyxc_int.h
@@ -0,0 +1,740 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_ONYXC_INT_H_
+#define VP10_COMMON_ONYXC_INT_H_
+
+#include "./vpx_config.h"
+#include "aom/internal/vpx_codec_internal.h"
+#include "aom_util/vpx_thread.h"
+#include "./vp10_rtcd.h"
+#include "av1/common/alloccommon.h"
+#include "av1/common/loopfilter.h"
+#include "av1/common/entropymv.h"
+#include "av1/common/entropy.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/mv.h"
+#include "av1/common/frame_buffers.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/tile_common.h"
+#include "av1/common/restoration.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define REF_FRAMES_LOG2 3
+#define REF_FRAMES (1 << REF_FRAMES_LOG2)
+
+// 4 scratch frames for the new frames to support a maximum of 4 cores decoding
+// in parallel, 3 for scaled references on the encoder.
+// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number
+// of framebuffers.
+// TODO(jkoleszar): These 3 extra references could probably come from the
+// normal reference pool.
+#define FRAME_BUFFERS (REF_FRAMES + 7)
+
+#if CONFIG_EXT_REFS
+#define FRAME_CONTEXTS_LOG2 3
+#else
+#define FRAME_CONTEXTS_LOG2 2
+#endif
+
+#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2)
+
+#define NUM_PING_PONG_BUFFERS 2
+
+typedef enum {
+ SINGLE_REFERENCE = 0,
+ COMPOUND_REFERENCE = 1,
+ REFERENCE_MODE_SELECT = 2,
+ REFERENCE_MODES = 3,
+} REFERENCE_MODE;
+
+typedef enum {
+ RESET_FRAME_CONTEXT_NONE = 0,
+ RESET_FRAME_CONTEXT_CURRENT = 1,
+ RESET_FRAME_CONTEXT_ALL = 2,
+} RESET_FRAME_CONTEXT_MODE;
+
+typedef enum {
+ /**
+ * Update frame context to values resulting from forward probability
+ * updates signaled in the frame header
+ */
+ REFRESH_FRAME_CONTEXT_FORWARD,
+ /**
+ * Update frame context to values resulting from backward probability
+ * updates based on entropy/counts in the decoded frame
+ */
+ REFRESH_FRAME_CONTEXT_BACKWARD,
+} REFRESH_FRAME_CONTEXT_MODE;
+
+typedef struct {
+ int_mv mv[2];
+ MV_REFERENCE_FRAME ref_frame[2];
+} MV_REF;
+
+typedef struct {
+ int ref_count;
+ MV_REF *mvs;
+ int mi_rows;
+ int mi_cols;
+ vpx_codec_frame_buffer_t raw_frame_buffer;
+ YV12_BUFFER_CONFIG buf;
+
+ // The Following variables will only be used in frame parallel decode.
+
+ // frame_worker_owner indicates which FrameWorker owns this buffer. NULL means
+ // that no FrameWorker owns, or is decoding, this buffer.
+ VPxWorker *frame_worker_owner;
+
+ // row and col indicate which position frame has been decoded to in real
+ // pixel unit. They are reset to -1 when decoding begins and set to INT_MAX
+ // when the frame is fully decoded.
+ int row;
+ int col;
+} RefCntBuffer;
+
+typedef struct BufferPool {
+// Protect BufferPool from being accessed by several FrameWorkers at
+// the same time during frame parallel decode.
+// TODO(hkuang): Try to use atomic variable instead of locking the whole pool.
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t pool_mutex;
+#endif
+
+ // Private data associated with the frame buffer callbacks.
+ void *cb_priv;
+
+ vpx_get_frame_buffer_cb_fn_t get_fb_cb;
+ vpx_release_frame_buffer_cb_fn_t release_fb_cb;
+
+ RefCntBuffer frame_bufs[FRAME_BUFFERS];
+
+ // Frame buffers allocated internally by the codec.
+ InternalFrameBufferList int_frame_buffers;
+} BufferPool;
+
+typedef struct VP10Common {
+ struct vpx_internal_error_info error;
+ vpx_color_space_t color_space;
+ int color_range;
+ int width;
+ int height;
+ int render_width;
+ int render_height;
+ int last_width;
+ int last_height;
+
+ // TODO(jkoleszar): this implies chroma ss right now, but could vary per
+ // plane. Revisit as part of the future change to YV12_BUFFER_CONFIG to
+ // support additional planes.
+ int subsampling_x;
+ int subsampling_y;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ // Marks if we need to use 16bit frame buffers (1: yes, 0: no).
+ int use_highbitdepth;
+#endif
+#if CONFIG_CLPF
+ int clpf;
+#endif
+
+ YV12_BUFFER_CONFIG *frame_to_show;
+ RefCntBuffer *prev_frame;
+
+ // TODO(hkuang): Combine this with cur_buf in macroblockd.
+ RefCntBuffer *cur_frame;
+
+ int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */
+
+ // Prepare ref_frame_map for the next frame.
+ // Only used in frame parallel decode.
+ int next_ref_frame_map[REF_FRAMES];
+
+ // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and
+ // roll new_fb_idx into it.
+
+ // Each Inter frame can reference INTER_REFS_PER_FRAME buffers
+ RefBuffer frame_refs[INTER_REFS_PER_FRAME];
+
+ int new_fb_idx;
+
+ FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/
+#if CONFIG_EXT_REFS
+ // frame type of the frame before last frame
+ FRAME_TYPE last2_frame_type;
+ // TODO(zoeliu): To check whether last3_frame_type is still needed.
+ // frame type of the frame two frames before last frame
+ FRAME_TYPE last3_frame_type;
+#endif // CONFIG_EXT_REFS
+ FRAME_TYPE frame_type;
+
+ int show_frame;
+ int last_show_frame;
+ int show_existing_frame;
+#if CONFIG_EXT_REFS
+ // Flag for a frame used as a reference - not written to the bitstream
+ int is_reference_frame;
+#endif // CONFIG_EXT_REFS
+
+ // Flag signaling that the frame is encoded using only INTRA modes.
+ uint8_t intra_only;
+ uint8_t last_intra_only;
+
+ int allow_high_precision_mv;
+
+ int allow_screen_content_tools;
+
+ // Flag signaling which frame contexts should be reset to default values.
+ RESET_FRAME_CONTEXT_MODE reset_frame_context;
+
+ // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in
+ // MODE_INFO (8-pixel) units.
+ int MBs;
+ int mb_rows, mi_rows;
+ int mb_cols, mi_cols;
+ int mi_stride;
+
+ /* profile settings */
+ TX_MODE tx_mode;
+
+ int base_qindex;
+ int y_dc_delta_q;
+ int uv_dc_delta_q;
+ int uv_ac_delta_q;
+ int16_t y_dequant[MAX_SEGMENTS][2];
+ int16_t uv_dequant[MAX_SEGMENTS][2];
+
+#if CONFIG_AOM_QM
+ // Global quant matrix tables
+ qm_val_t *giqmatrix[NUM_QM_LEVELS][2][2][TX_SIZES];
+ qm_val_t *gqmatrix[NUM_QM_LEVELS][2][2][TX_SIZES];
+
+ // Local quant matrix tables for each frame
+ qm_val_t *y_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
+ qm_val_t *uv_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
+ // Encoder
+ qm_val_t *y_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
+ qm_val_t *uv_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
+
+ int using_qmatrix;
+ int min_qmlevel;
+ int max_qmlevel;
+#endif
+#if CONFIG_NEW_QUANT
+ dequant_val_type_nuq y_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS];
+ dequant_val_type_nuq uv_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS];
+#endif
+
+ /* We allocate a MODE_INFO struct for each macroblock, together with
+ an extra row on top and column on the left to simplify prediction. */
+ int mi_alloc_size;
+ MODE_INFO *mip; /* Base of allocated array */
+ MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
+
+ // TODO(agrange): Move prev_mi into encoder structure.
+ // prev_mip and prev_mi will only be allocated in encoder.
+ MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
+ MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
+
+ // Separate mi functions between encoder and decoder.
+ int (*alloc_mi)(struct VP10Common *cm, int mi_size);
+ void (*free_mi)(struct VP10Common *cm);
+ void (*setup_mi)(struct VP10Common *cm);
+
+ // Grid of pointers to 8x8 MODE_INFO structs. Any 8x8 not in the visible
+ // area will be NULL.
+ MODE_INFO **mi_grid_base;
+ MODE_INFO **mi_grid_visible;
+ MODE_INFO **prev_mi_grid_base;
+ MODE_INFO **prev_mi_grid_visible;
+
+ // Whether to use previous frame's motion vectors for prediction.
+ int use_prev_frame_mvs;
+
+ // Persistent mb segment id map used in prediction.
+ int seg_map_idx;
+ int prev_seg_map_idx;
+
+ uint8_t *seg_map_array[NUM_PING_PONG_BUFFERS];
+ uint8_t *last_frame_seg_map;
+ uint8_t *current_frame_seg_map;
+ int seg_map_alloc_size;
+
+ INTERP_FILTER interp_filter;
+
+ loop_filter_info_n lf_info;
+#if CONFIG_LOOP_RESTORATION
+ RestorationInfo rst_info;
+ RestorationInternal rst_internal;
+#endif // CONFIG_LOOP_RESTORATION
+
+ // Flag signaling how frame contexts should be updated at the end of
+ // a frame decode
+ REFRESH_FRAME_CONTEXT_MODE refresh_frame_context;
+
+ int ref_frame_sign_bias[TOTAL_REFS_PER_FRAME]; /* Two state 0, 1 */
+
+ struct loopfilter lf;
+ struct segmentation seg;
+
+ int frame_parallel_decode; // frame-based threading.
+
+// Context probabilities for reference frame prediction
+#if CONFIG_EXT_REFS
+ MV_REFERENCE_FRAME comp_fwd_ref[FWD_REFS];
+ MV_REFERENCE_FRAME comp_bwd_ref[BWD_REFS];
+#else
+ MV_REFERENCE_FRAME comp_fixed_ref;
+ MV_REFERENCE_FRAME comp_var_ref[COMP_REFS];
+#endif // CONFIG_EXT_REFS
+ REFERENCE_MODE reference_mode;
+
+ FRAME_CONTEXT *fc; /* this frame entropy */
+ FRAME_CONTEXT *frame_contexts; // FRAME_CONTEXTS
+ unsigned int frame_context_idx; /* Context to use/update */
+ FRAME_COUNTS counts;
+
+#if CONFIG_ENTROPY
+ // The initial probabilities for a frame, before any subframe backward update,
+ // and after forward update.
+ vp10_coeff_probs_model starting_coef_probs[TX_SIZES][PLANE_TYPES];
+ // Number of subframe backward updates already done
+ uint8_t coef_probs_update_idx;
+ // Signal if the backward update is subframe or end-of-frame
+ uint8_t partial_prob_update;
+ // Frame level flag to turn on/off subframe backward update
+ uint8_t do_subframe_update;
+#endif // CONFIG_ENTROPY
+
+ unsigned int current_video_frame;
+ BITSTREAM_PROFILE profile;
+
+ // VPX_BITS_8 in profile 0 or 1, VPX_BITS_10 or VPX_BITS_12 in profile 2 or 3.
+ vpx_bit_depth_t bit_depth;
+ vpx_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer
+
+ int error_resilient_mode;
+
+#if !CONFIG_EXT_TILE
+ int log2_tile_cols, log2_tile_rows;
+#endif // !CONFIG_EXT_TILE
+ int tile_cols, tile_rows;
+ int tile_width, tile_height; // In MI units
+
+ int byte_alignment;
+ int skip_loop_filter;
+
+ // Private data associated with the frame buffer callbacks.
+ void *cb_priv;
+ vpx_get_frame_buffer_cb_fn_t get_fb_cb;
+ vpx_release_frame_buffer_cb_fn_t release_fb_cb;
+
+ // Handles memory for the codec.
+ InternalFrameBufferList int_frame_buffers;
+
+ // External BufferPool passed from outside.
+ BufferPool *buffer_pool;
+
+ PARTITION_CONTEXT *above_seg_context;
+ ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT *above_txfm_context;
+ TXFM_CONTEXT left_txfm_context[MAX_MIB_SIZE];
+#endif
+ int above_context_alloc_cols;
+
+ // scratch memory for intraonly/keyframe forward updates from default tables
+ // - this is intentionally not placed in FRAME_CONTEXT since it's reset upon
+ // each keyframe and not used afterwards
+ vpx_prob kf_y_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1];
+#if CONFIG_GLOBAL_MOTION
+ Global_Motion_Params global_motion[TOTAL_REFS_PER_FRAME];
+#endif
+
+ BLOCK_SIZE sb_size; // Size of the superblock used for this frame
+ int mib_size; // Size of the superblock in units of MI blocks
+ int mib_size_log2; // Log 2 of above.
+#if CONFIG_DERING
+ int dering_level;
+#endif
+} VP10_COMMON;
+
+// TODO(hkuang): Don't need to lock the whole pool after implementing atomic
+// frame reference count.
+static void lock_buffer_pool(BufferPool *const pool) {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_lock(&pool->pool_mutex);
+#else
+ (void)pool;
+#endif
+}
+
+static void unlock_buffer_pool(BufferPool *const pool) {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_unlock(&pool->pool_mutex);
+#else
+ (void)pool;
+#endif
+}
+
+static INLINE YV12_BUFFER_CONFIG *get_ref_frame(VP10_COMMON *cm, int index) {
+ if (index < 0 || index >= REF_FRAMES) return NULL;
+ if (cm->ref_frame_map[index] < 0) return NULL;
+ assert(cm->ref_frame_map[index] < FRAME_BUFFERS);
+ return &cm->buffer_pool->frame_bufs[cm->ref_frame_map[index]].buf;
+}
+
+static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(
+ const VP10_COMMON *const cm) {
+ return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf;
+}
+
+static INLINE int get_free_fb(VP10_COMMON *cm) {
+ RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+ int i;
+
+ lock_buffer_pool(cm->buffer_pool);
+ for (i = 0; i < FRAME_BUFFERS; ++i)
+ if (frame_bufs[i].ref_count == 0) break;
+
+ if (i != FRAME_BUFFERS) {
+ frame_bufs[i].ref_count = 1;
+ } else {
+ // Reset i to be INVALID_IDX to indicate no free buffer found.
+ i = INVALID_IDX;
+ }
+
+ unlock_buffer_pool(cm->buffer_pool);
+ return i;
+}
+
+static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
+ const int ref_index = *idx;
+
+ if (ref_index >= 0 && bufs[ref_index].ref_count > 0)
+ bufs[ref_index].ref_count--;
+
+ *idx = new_idx;
+
+ bufs[new_idx].ref_count++;
+}
+
+static INLINE int mi_cols_aligned_to_sb(const VP10_COMMON *cm) {
+ return ALIGN_POWER_OF_TWO(cm->mi_cols, cm->mib_size_log2);
+}
+
+static INLINE int mi_rows_aligned_to_sb(const VP10_COMMON *cm) {
+ return ALIGN_POWER_OF_TWO(cm->mi_rows, cm->mib_size_log2);
+}
+
+static INLINE int frame_is_intra_only(const VP10_COMMON *const cm) {
+ return cm->frame_type == KEY_FRAME || cm->intra_only;
+}
+
+static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd,
+ tran_low_t *dqcoeff) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ xd->plane[i].dqcoeff = dqcoeff;
+ xd->above_context[i] = cm->above_context[i];
+ if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
+ memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
+#if CONFIG_AOM_QM
+ memcpy(xd->plane[i].seg_iqmatrix, cm->y_iqmatrix, sizeof(cm->y_iqmatrix));
+#endif
+
+#if CONFIG_NEW_QUANT
+ memcpy(xd->plane[i].seg_dequant_nuq, cm->y_dequant_nuq,
+ sizeof(cm->y_dequant_nuq));
+#endif
+ } else {
+ memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant));
+#if CONFIG_AOM_QM
+ memcpy(xd->plane[i].seg_iqmatrix, cm->uv_iqmatrix,
+ sizeof(cm->uv_iqmatrix));
+#endif
+#if CONFIG_NEW_QUANT
+ memcpy(xd->plane[i].seg_dequant_nuq, cm->uv_dequant_nuq,
+ sizeof(cm->uv_dequant_nuq));
+#endif
+ }
+ xd->fc = cm->fc;
+ }
+
+ xd->above_seg_context = cm->above_seg_context;
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context;
+#endif
+ xd->mi_stride = cm->mi_stride;
+ xd->error_info = &cm->error;
+}
+
+static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
+ const int above_idx = mi_col * 2;
+ const int left_idx = (mi_row * 2) & MAX_MIB_MASK_2;
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ struct macroblockd_plane *const pd = &xd->plane[i];
+ pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x];
+ pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y];
+ }
+}
+
+static INLINE int calc_mi_size(int len) {
+ // len is in mi units.
+ return len + MAX_MIB_SIZE;
+}
+
+static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
+ int mi_row, int bh, int mi_col, int bw,
+ int mi_rows, int mi_cols) {
+ xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
+ xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8;
+ xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
+ xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8;
+
+ // Are edges available for intra prediction?
+ xd->up_available = (mi_row > tile->mi_row_start);
+ xd->left_available = (mi_col > tile->mi_col_start);
+ if (xd->up_available) {
+ xd->above_mi = xd->mi[-xd->mi_stride];
+ // above_mi may be NULL in encoder's first pass.
+ xd->above_mbmi = xd->above_mi ? &xd->above_mi->mbmi : NULL;
+ } else {
+ xd->above_mi = NULL;
+ xd->above_mbmi = NULL;
+ }
+
+ if (xd->left_available) {
+ xd->left_mi = xd->mi[-1];
+ // left_mi may be NULL in encoder's first pass.
+ xd->left_mbmi = xd->left_mi ? &xd->left_mi->mbmi : NULL;
+ } else {
+ xd->left_mi = NULL;
+ xd->left_mbmi = NULL;
+ }
+
+ xd->n8_h = bh;
+ xd->n8_w = bw;
+#if CONFIG_REF_MV
+ xd->is_sec_rect = 0;
+ if (xd->n8_w < xd->n8_h)
+ if (mi_col & (xd->n8_h - 1)) xd->is_sec_rect = 1;
+
+ if (xd->n8_w > xd->n8_h)
+ if (mi_row & (xd->n8_w - 1)) xd->is_sec_rect = 1;
+#endif
+}
+
+static INLINE const vpx_prob *get_y_mode_probs(const VP10_COMMON *cm,
+ const MODE_INFO *mi,
+ const MODE_INFO *above_mi,
+ const MODE_INFO *left_mi,
+ int block) {
+ const PREDICTION_MODE above = vp10_above_block_mode(mi, above_mi, block);
+ const PREDICTION_MODE left = vp10_left_block_mode(mi, left_mi, block);
+ return cm->kf_y_prob[above][left];
+}
+
+static INLINE void update_partition_context(MACROBLOCKD *xd, int mi_row,
+ int mi_col, BLOCK_SIZE subsize,
+ BLOCK_SIZE bsize) {
+ PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col;
+ PARTITION_CONTEXT *const left_ctx =
+ xd->left_seg_context + (mi_row & MAX_MIB_MASK);
+
+#if CONFIG_EXT_PARTITION_TYPES
+ const int bw = num_8x8_blocks_wide_lookup[bsize];
+ const int bh = num_8x8_blocks_high_lookup[bsize];
+ memset(above_ctx, partition_context_lookup[subsize].above, bw);
+ memset(left_ctx, partition_context_lookup[subsize].left, bh);
+#else
+ // num_4x4_blocks_wide_lookup[bsize] / 2
+ const int bs = num_8x8_blocks_wide_lookup[bsize];
+
+ // update the partition context at the end notes. set partition bits
+ // of block sizes larger than the current one to be one, and partition
+ // bits of smaller block sizes to be zero.
+ memset(above_ctx, partition_context_lookup[subsize].above, bs);
+ memset(left_ctx, partition_context_lookup[subsize].left, bs);
+#endif // CONFIG_EXT_PARTITION_TYPES
+}
+
+#if CONFIG_EXT_PARTITION_TYPES
+static INLINE void update_ext_partition_context(MACROBLOCKD *xd, int mi_row,
+ int mi_col, BLOCK_SIZE subsize,
+ BLOCK_SIZE bsize,
+ PARTITION_TYPE partition) {
+ if (bsize >= BLOCK_8X8) {
+ const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
+ BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+ switch (partition) {
+ case PARTITION_SPLIT:
+ if (bsize != BLOCK_8X8) break;
+ case PARTITION_NONE:
+ case PARTITION_HORZ:
+ case PARTITION_VERT:
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+ break;
+ case PARTITION_HORZ_A:
+ update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
+ update_partition_context(xd, mi_row + hbs, mi_col, subsize, subsize);
+ break;
+ case PARTITION_HORZ_B:
+ update_partition_context(xd, mi_row, mi_col, subsize, subsize);
+ update_partition_context(xd, mi_row + hbs, mi_col, bsize2, subsize);
+ break;
+ case PARTITION_VERT_A:
+ update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
+ update_partition_context(xd, mi_row, mi_col + hbs, subsize, subsize);
+ break;
+ case PARTITION_VERT_B:
+ update_partition_context(xd, mi_row, mi_col, subsize, subsize);
+ update_partition_context(xd, mi_row, mi_col + hbs, bsize2, subsize);
+ break;
+ default: assert(0 && "Invalid partition type");
+ }
+ }
+}
+#endif // CONFIG_EXT_PARTITION_TYPES
+
+static INLINE int partition_plane_context(const MACROBLOCKD *xd, int mi_row,
+ int mi_col, BLOCK_SIZE bsize) {
+ const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col;
+ const PARTITION_CONTEXT *left_ctx =
+ xd->left_seg_context + (mi_row & MAX_MIB_MASK);
+ const int bsl = mi_width_log2_lookup[bsize];
+ int above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1;
+
+ assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
+ assert(bsl >= 0);
+
+ return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
+}
+
+static INLINE void vp10_zero_above_context(VP10_COMMON *const cm,
+ int mi_col_start, int mi_col_end) {
+ const int width = mi_col_end - mi_col_start;
+
+ const int offset_y = 2 * mi_col_start;
+ const int width_y = 2 * width;
+ const int offset_uv = offset_y >> cm->subsampling_x;
+ const int width_uv = width_y >> cm->subsampling_x;
+
+ vp10_zero_array(cm->above_context[0] + offset_y, width_y);
+ vp10_zero_array(cm->above_context[1] + offset_uv, width_uv);
+ vp10_zero_array(cm->above_context[2] + offset_uv, width_uv);
+
+ vp10_zero_array(cm->above_seg_context + mi_col_start, width);
+
+#if CONFIG_VAR_TX
+ vp10_zero_array(cm->above_txfm_context + mi_col_start, width);
+#endif // CONFIG_VAR_TX
+}
+
+static INLINE void vp10_zero_left_context(MACROBLOCKD *const xd) {
+ vp10_zero(xd->left_context);
+ vp10_zero(xd->left_seg_context);
+#if CONFIG_VAR_TX
+ vp10_zero(xd->left_txfm_context_buffer);
+#endif
+}
+
+#if CONFIG_VAR_TX
+static INLINE void set_txfm_ctx(TXFM_CONTEXT *txfm_ctx, TX_SIZE tx_size,
+ int len) {
+ int i;
+ for (i = 0; i < len; ++i) txfm_ctx[i] = tx_size;
+}
+
+static INLINE void set_txfm_ctxs(TX_SIZE tx_size, int n8_w, int n8_h,
+ const MACROBLOCKD *xd) {
+ set_txfm_ctx(xd->above_txfm_context, txsize_horz_map[tx_size], n8_w);
+ set_txfm_ctx(xd->left_txfm_context, txsize_vert_map[tx_size], n8_h);
+}
+
+static INLINE void txfm_partition_update(TXFM_CONTEXT *above_ctx,
+ TXFM_CONTEXT *left_ctx,
+ TX_SIZE tx_size) {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bs = num_8x8_blocks_high_lookup[bsize];
+ int i;
+ for (i = 0; i < bs; ++i) {
+ above_ctx[i] = tx_size;
+ left_ctx[i] = tx_size;
+ }
+}
+
+static INLINE int txfm_partition_context(TXFM_CONTEXT *above_ctx,
+ TXFM_CONTEXT *left_ctx,
+ TX_SIZE tx_size) {
+ int above = *above_ctx < tx_size;
+ int left = *left_ctx < tx_size;
+ return (tx_size - 1) * 3 + above + left;
+}
+#endif
+
+static INLINE PARTITION_TYPE get_partition(const VP10_COMMON *const cm,
+ const int mi_row, const int mi_col,
+ const BLOCK_SIZE bsize) {
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) {
+ return PARTITION_INVALID;
+ } else {
+ const int offset = mi_row * cm->mi_stride + mi_col;
+ MODE_INFO **mi = cm->mi_grid_visible + offset;
+ const MB_MODE_INFO *const mbmi = &mi[0]->mbmi;
+ const int bsl = b_width_log2_lookup[bsize];
+ const PARTITION_TYPE partition = partition_lookup[bsl][mbmi->sb_type];
+#if !CONFIG_EXT_PARTITION_TYPES
+ return partition;
+#else
+ const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
+
+ assert(cm->mi_grid_visible[offset] == &cm->mi[offset]);
+
+ if (partition != PARTITION_NONE && bsize > BLOCK_8X8 &&
+ mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
+ const BLOCK_SIZE h = get_subsize(bsize, PARTITION_HORZ_A);
+ const BLOCK_SIZE v = get_subsize(bsize, PARTITION_VERT_A);
+ const MB_MODE_INFO *const mbmi_right = &mi[hbs]->mbmi;
+ const MB_MODE_INFO *const mbmi_below = &mi[hbs * cm->mi_stride]->mbmi;
+ if (mbmi->sb_type == h) {
+ return mbmi_below->sb_type == h ? PARTITION_HORZ : PARTITION_HORZ_B;
+ } else if (mbmi->sb_type == v) {
+ return mbmi_right->sb_type == v ? PARTITION_VERT : PARTITION_VERT_B;
+ } else if (mbmi_below->sb_type == h) {
+ return PARTITION_HORZ_A;
+ } else if (mbmi_right->sb_type == v) {
+ return PARTITION_VERT_A;
+ } else {
+ return PARTITION_SPLIT;
+ }
+ }
+
+ return partition;
+#endif // !CONFIG_EXT_PARTITION_TYPES
+ }
+}
+
+static INLINE void set_sb_size(VP10_COMMON *const cm,
+ const BLOCK_SIZE sb_size) {
+ cm->sb_size = sb_size;
+ cm->mib_size = num_8x8_blocks_wide_lookup[cm->sb_size];
+ cm->mib_size_log2 = mi_width_log2_lookup[cm->sb_size];
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_ONYXC_INT_H_
diff --git a/av1/common/pred_common.c b/av1/common/pred_common.c
new file mode 100644
index 0000000..0e1045e
--- /dev/null
+++ b/av1/common/pred_common.c
@@ -0,0 +1,1384 @@
+
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/common.h"
+#include "av1/common/pred_common.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/seg_common.h"
+
+// Returns a context number for the given MB prediction signal
+#if CONFIG_DUAL_FILTER
+static INTERP_FILTER get_ref_filter_type(const MODE_INFO *mi,
+ const MACROBLOCKD *xd, int dir,
+ MV_REFERENCE_FRAME ref_frame) {
+ INTERP_FILTER ref_type = SWITCHABLE_FILTERS;
+ const MB_MODE_INFO *ref_mbmi = &mi->mbmi;
+ int use_subpel[2] = {
+ has_subpel_mv_component(mi, xd, dir),
+ has_subpel_mv_component(mi, xd, dir + 2),
+ };
+
+ if (ref_mbmi->ref_frame[0] == ref_frame && use_subpel[0])
+ ref_type = ref_mbmi->interp_filter[(dir & 0x01)];
+ else if (ref_mbmi->ref_frame[1] == ref_frame && use_subpel[1])
+ ref_type = ref_mbmi->interp_filter[(dir & 0x01) + 2];
+
+ return ref_type;
+}
+
+int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir) {
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const int ctx_offset =
+ (mbmi->ref_frame[1] > INTRA_FRAME) * INTER_FILTER_COMP_OFFSET;
+ MV_REFERENCE_FRAME ref_frame =
+ (dir < 2) ? mbmi->ref_frame[0] : mbmi->ref_frame[1];
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ int filter_type_ctx = ctx_offset + (dir & 0x01) * INTER_FILTER_DIR_OFFSET;
+ int left_type = SWITCHABLE_FILTERS;
+ int above_type = SWITCHABLE_FILTERS;
+
+ if (xd->left_available)
+ left_type = get_ref_filter_type(xd->mi[-1], xd, dir, ref_frame);
+
+ if (xd->up_available)
+ above_type =
+ get_ref_filter_type(xd->mi[-xd->mi_stride], xd, dir, ref_frame);
+
+ if (left_type == above_type)
+ filter_type_ctx += left_type;
+ else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS)
+ filter_type_ctx += above_type;
+ else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS)
+ filter_type_ctx += left_type;
+ else
+ filter_type_ctx += SWITCHABLE_FILTERS;
+
+ return filter_type_ctx;
+}
+#else
+int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int left_type = xd->left_available && is_inter_block(left_mbmi)
+ ? left_mbmi->interp_filter
+ : SWITCHABLE_FILTERS;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const int above_type = xd->up_available && is_inter_block(above_mbmi)
+ ? above_mbmi->interp_filter
+ : SWITCHABLE_FILTERS;
+
+ if (left_type == above_type)
+ return left_type;
+ else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS)
+ return above_type;
+ else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS)
+ return left_type;
+ else
+ return SWITCHABLE_FILTERS;
+}
+#endif
+
+#if CONFIG_EXT_INTRA
+// Obtain the reference filter type from the above/left neighbor blocks.
+static INTRA_FILTER get_ref_intra_filter(const MB_MODE_INFO *ref_mbmi) {
+ INTRA_FILTER ref_type = INTRA_FILTERS;
+
+ if (ref_mbmi->sb_type >= BLOCK_8X8) {
+ PREDICTION_MODE mode = ref_mbmi->mode;
+ if (is_inter_block(ref_mbmi)) {
+#if CONFIG_DUAL_FILTER
+ switch (ref_mbmi->interp_filter[0]) {
+#else
+ switch (ref_mbmi->interp_filter) {
+#endif
+ case EIGHTTAP_REGULAR: ref_type = INTRA_FILTER_8TAP; break;
+ case EIGHTTAP_SMOOTH: ref_type = INTRA_FILTER_8TAP_SMOOTH; break;
+ case MULTITAP_SHARP: ref_type = INTRA_FILTER_8TAP_SHARP; break;
+ case BILINEAR: ref_type = INTRA_FILTERS; break;
+ default: break;
+ }
+ } else {
+ if (mode != DC_PRED && mode != TM_PRED) {
+ int p_angle =
+ mode_to_angle_map[mode] + ref_mbmi->angle_delta[0] * ANGLE_STEP;
+ if (vp10_is_intra_filter_switchable(p_angle)) {
+ ref_type = ref_mbmi->intra_filter;
+ }
+ }
+ }
+ }
+ return ref_type;
+}
+
+int vp10_get_pred_context_intra_interp(const MACROBLOCKD *xd) {
+ int left_type = INTRA_FILTERS, above_type = INTRA_FILTERS;
+
+ if (xd->left_available) left_type = get_ref_intra_filter(xd->left_mbmi);
+
+ if (xd->up_available) above_type = get_ref_intra_filter(xd->above_mbmi);
+
+ if (left_type == above_type)
+ return left_type;
+ else if (left_type == INTRA_FILTERS && above_type != INTRA_FILTERS)
+ return above_type;
+ else if (left_type != INTRA_FILTERS && above_type == INTRA_FILTERS)
+ return left_type;
+ else
+ return INTRA_FILTERS;
+}
+#endif // CONFIG_EXT_INTRA
+
+// The mode info data structure has a one element border above and to the
+// left of the entries corresponding to real macroblocks.
+// The prediction flags in these dummy entries are initialized to 0.
+// 0 - inter/inter, inter/--, --/inter, --/--
+// 1 - intra/inter, inter/intra
+// 2 - intra/--, --/intra
+// 3 - intra/intra
+int vp10_get_intra_inter_context(const MACROBLOCKD *xd) {
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+ return left_intra && above_intra ? 3 : left_intra || above_intra;
+ } else if (has_above || has_left) { // one edge available
+ return 2 * !is_inter_block(has_above ? above_mbmi : left_mbmi);
+ } else {
+ return 0;
+ }
+}
+
+#if CONFIG_EXT_REFS
+
+#define CHECK_BWDREF_OR_ALTREF(ref_frame) \
+ (((ref_frame) == BWDREF_FRAME) || ((ref_frame) == ALTREF_FRAME))
+
+int vp10_get_reference_mode_context(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int ctx;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ (void)cm;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ if (has_above && has_left) { // both edges available
+ if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi))
+ // neither edge uses comp pred (0/1)
+ ctx = CHECK_BWDREF_OR_ALTREF(above_mbmi->ref_frame[0]) ^
+ CHECK_BWDREF_OR_ALTREF(left_mbmi->ref_frame[0]);
+ else if (!has_second_ref(above_mbmi))
+ // one of two edges uses comp pred (2/3)
+ ctx = 2 + (CHECK_BWDREF_OR_ALTREF(above_mbmi->ref_frame[0]) ||
+ !is_inter_block(above_mbmi));
+ else if (!has_second_ref(left_mbmi))
+ // one of two edges uses comp pred (2/3)
+ ctx = 2 + (CHECK_BWDREF_OR_ALTREF(left_mbmi->ref_frame[0]) ||
+ !is_inter_block(left_mbmi));
+ else // both edges use comp pred (4)
+ ctx = 4;
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+ if (!has_second_ref(edge_mbmi))
+ // edge does not use comp pred (0/1)
+ ctx = CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]);
+ else
+ // edge uses comp pred (3)
+ ctx = 3;
+ } else { // no edges available (1)
+ ctx = 1;
+ }
+ assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS);
+ return ctx;
+}
+
+#else // CONFIG_EXT_REFS
+
+int vp10_get_reference_mode_context(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int ctx;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ if (has_above && has_left) { // both edges available
+ if (!has_second_ref(above_mbmi) && !has_second_ref(left_mbmi))
+ // neither edge uses comp pred (0/1)
+ ctx = (above_mbmi->ref_frame[0] == cm->comp_fixed_ref) ^
+ (left_mbmi->ref_frame[0] == cm->comp_fixed_ref);
+ else if (!has_second_ref(above_mbmi))
+ // one of two edges uses comp pred (2/3)
+ ctx = 2 + (above_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
+ !is_inter_block(above_mbmi));
+ else if (!has_second_ref(left_mbmi))
+ // one of two edges uses comp pred (2/3)
+ ctx = 2 + (left_mbmi->ref_frame[0] == cm->comp_fixed_ref ||
+ !is_inter_block(left_mbmi));
+ else // both edges use comp pred (4)
+ ctx = 4;
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+ if (!has_second_ref(edge_mbmi))
+ // edge does not use comp pred (0/1)
+ ctx = edge_mbmi->ref_frame[0] == cm->comp_fixed_ref;
+ else
+ // edge uses comp pred (3)
+ ctx = 3;
+ } else { // no edges available (1)
+ ctx = 1;
+ }
+ assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS);
+ return ctx;
+}
+
+#endif // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+
+// TODO(zoeliu): Future work will be conducted to optimize the context design
+// for the coding of the reference frames.
+
+#define CHECK_LAST_OR_LAST2(ref_frame) \
+ ((ref_frame == LAST_FRAME) || (ref_frame == LAST2_FRAME))
+
+#define CHECK_GOLDEN_OR_LAST3(ref_frame) \
+ ((ref_frame == GOLDEN_FRAME) || (ref_frame == LAST3_FRAME))
+
+// Returns a context number for the given MB prediction signal
+// Signal the first reference frame for a compound mode be either
+// GOLDEN/LAST3, or LAST/LAST2.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is either
+// GOLDEN_FRAME or LAST3_FRAME.
+int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int above_in_image = xd->up_available;
+ const int left_in_image = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
+ const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
+
+ if (above_in_image && left_in_image) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra (2)
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+ if (!has_second_ref(edge_mbmi)) // single pred (1/3)
+ pred_context =
+ 1 + 2 * (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]));
+ else // comp pred (1/3)
+ pred_context = 1 +
+ 2 * (!CHECK_GOLDEN_OR_LAST3(
+ edge_mbmi->ref_frame[fwd_ref_sign_idx]));
+ } else { // inter/inter
+ const int l_sg = !has_second_ref(left_mbmi);
+ const int a_sg = !has_second_ref(above_mbmi);
+ const MV_REFERENCE_FRAME frfa =
+ a_sg ? above_mbmi->ref_frame[0]
+ : above_mbmi->ref_frame[fwd_ref_sign_idx];
+ const MV_REFERENCE_FRAME frfl =
+ l_sg ? left_mbmi->ref_frame[0]
+ : left_mbmi->ref_frame[fwd_ref_sign_idx];
+
+ if (frfa == frfl && CHECK_GOLDEN_OR_LAST3(frfa)) {
+ pred_context = 0;
+ } else if (l_sg && a_sg) { // single/single
+ if ((CHECK_BWDREF_OR_ALTREF(frfa) && CHECK_LAST_OR_LAST2(frfl)) ||
+ (CHECK_BWDREF_OR_ALTREF(frfl) && CHECK_LAST_OR_LAST2(frfa))) {
+ pred_context = 4;
+ } else if (CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl)) {
+ pred_context = 1;
+ } else {
+ pred_context = 3;
+ }
+ } else if (l_sg || a_sg) { // single/comp
+ const MV_REFERENCE_FRAME frfc = l_sg ? frfa : frfl;
+ const MV_REFERENCE_FRAME rfs = a_sg ? frfa : frfl;
+
+ if (CHECK_GOLDEN_OR_LAST3(frfc) && !CHECK_GOLDEN_OR_LAST3(rfs))
+ pred_context = 1;
+ else if (CHECK_GOLDEN_OR_LAST3(rfs) && !CHECK_GOLDEN_OR_LAST3(frfc))
+ pred_context = 2;
+ else
+ pred_context = 4;
+ } else { // comp/comp
+ if ((CHECK_LAST_OR_LAST2(frfa) && CHECK_LAST_OR_LAST2(frfl))) {
+ pred_context = 4;
+ } else {
+ // NOTE(zoeliu): Following assert may be removed once confirmed.
+ assert(CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl));
+ pred_context = 2;
+ }
+ }
+ }
+ } else if (above_in_image || left_in_image) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi)) {
+ pred_context = 2;
+ } else {
+ if (has_second_ref(edge_mbmi))
+ pred_context =
+ 4 *
+ (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[fwd_ref_sign_idx]));
+ else
+ pred_context = 3 * (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]));
+ }
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+ return pred_context;
+}
+
+// Returns a context number for the given MB prediction signal
+// Signal the first reference frame for a compound mode be LAST,
+// conditioning on that it is known either LAST/LAST2.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is LAST_FRAME,
+// conditioning on it is either LAST_FRAME or LAST2_FRAME.
+int vp10_get_pred_context_comp_ref_p1(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int above_in_image = xd->up_available;
+ const int left_in_image = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
+ const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
+
+ if (above_in_image && left_in_image) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra (2)
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+ if (!has_second_ref(edge_mbmi)) // single pred (1/3)
+ pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != LAST_FRAME);
+ else // comp pred (1/3)
+ pred_context =
+ 1 + 2 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != LAST_FRAME);
+ } else { // inter/inter
+ const int l_sg = !has_second_ref(left_mbmi);
+ const int a_sg = !has_second_ref(above_mbmi);
+ const MV_REFERENCE_FRAME frfa =
+ a_sg ? above_mbmi->ref_frame[0]
+ : above_mbmi->ref_frame[fwd_ref_sign_idx];
+ const MV_REFERENCE_FRAME frfl =
+ l_sg ? left_mbmi->ref_frame[0]
+ : left_mbmi->ref_frame[fwd_ref_sign_idx];
+
+ if (frfa == frfl && frfa == LAST_FRAME)
+ pred_context = 0;
+ else if (l_sg && a_sg) { // single/single
+ if (frfa == LAST_FRAME || frfl == LAST_FRAME)
+ pred_context = 1;
+ else if (CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl))
+ pred_context = 2 + (frfa != frfl);
+ else if (frfa == frfl ||
+ (CHECK_BWDREF_OR_ALTREF(frfa) && CHECK_BWDREF_OR_ALTREF(frfl)))
+ pred_context = 3;
+ else
+ pred_context = 4;
+ } else if (l_sg || a_sg) { // single/comp
+ const MV_REFERENCE_FRAME frfc = l_sg ? frfa : frfl;
+ const MV_REFERENCE_FRAME rfs = a_sg ? frfa : frfl;
+
+ if (frfc == LAST_FRAME && rfs != LAST_FRAME)
+ pred_context = 1;
+ else if (rfs == LAST_FRAME && frfc != LAST_FRAME)
+ pred_context = 2;
+ else
+ pred_context =
+ 3 + (frfc == LAST2_FRAME || CHECK_GOLDEN_OR_LAST3(rfs));
+ } else { // comp/comp
+ if (frfa == LAST_FRAME || frfl == LAST_FRAME)
+ pred_context = 2;
+ else
+ pred_context =
+ 3 + (CHECK_GOLDEN_OR_LAST3(frfa) || CHECK_GOLDEN_OR_LAST3(frfl));
+ }
+ }
+ } else if (above_in_image || left_in_image) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi)) {
+ pred_context = 2;
+ } else {
+ if (has_second_ref(edge_mbmi)) {
+ pred_context =
+ 4 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != LAST_FRAME);
+ } else {
+ if (edge_mbmi->ref_frame[0] == LAST_FRAME)
+ pred_context = 0;
+ else
+ pred_context = 2 + CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]);
+ }
+ }
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+ return pred_context;
+}
+
+// Returns a context number for the given MB prediction signal
+// Signal the first reference frame for a compound mode be GOLDEN,
+// conditioning on that it is known either GOLDEN or LAST3.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is GOLDEN_FRAME,
+// conditioning on it is either GOLDEN or LAST3.
+int vp10_get_pred_context_comp_ref_p2(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int above_in_image = xd->up_available;
+ const int left_in_image = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
+ const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
+
+ if (above_in_image && left_in_image) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra (2)
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+ if (!has_second_ref(edge_mbmi)) // single pred (1/3)
+ pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != GOLDEN_FRAME);
+ else // comp pred (1/3)
+ pred_context =
+ 1 + 2 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != GOLDEN_FRAME);
+ } else { // inter/inter
+ const int l_sg = !has_second_ref(left_mbmi);
+ const int a_sg = !has_second_ref(above_mbmi);
+ const MV_REFERENCE_FRAME frfa =
+ a_sg ? above_mbmi->ref_frame[0]
+ : above_mbmi->ref_frame[fwd_ref_sign_idx];
+ const MV_REFERENCE_FRAME frfl =
+ l_sg ? left_mbmi->ref_frame[0]
+ : left_mbmi->ref_frame[fwd_ref_sign_idx];
+
+ if (frfa == frfl && frfa == GOLDEN_FRAME)
+ pred_context = 0;
+ else if (l_sg && a_sg) { // single/single
+ if (frfa == GOLDEN_FRAME || frfl == GOLDEN_FRAME)
+ pred_context = 1;
+ else if (CHECK_LAST_OR_LAST2(frfa) || CHECK_LAST_OR_LAST2(frfl))
+ pred_context = 2 + (frfa != frfl);
+ else if (frfa == frfl ||
+ (CHECK_BWDREF_OR_ALTREF(frfa) && CHECK_BWDREF_OR_ALTREF(frfl)))
+ pred_context = 3;
+ else
+ pred_context = 4;
+ } else if (l_sg || a_sg) { // single/comp
+ const MV_REFERENCE_FRAME frfc = l_sg ? frfa : frfl;
+ const MV_REFERENCE_FRAME rfs = a_sg ? frfa : frfl;
+
+ if (frfc == GOLDEN_FRAME && rfs != GOLDEN_FRAME)
+ pred_context = 1;
+ else if (rfs == GOLDEN_FRAME && frfc != GOLDEN_FRAME)
+ pred_context = 2;
+ else
+ pred_context = 3 + (frfc == LAST3_FRAME || CHECK_LAST_OR_LAST2(rfs));
+ } else { // comp/comp
+ if (frfa == GOLDEN_FRAME || frfl == GOLDEN_FRAME)
+ pred_context = 2;
+ else
+ pred_context =
+ 3 + (CHECK_LAST_OR_LAST2(frfa) || CHECK_LAST_OR_LAST2(frfl));
+ }
+ }
+ } else if (above_in_image || left_in_image) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi)) {
+ pred_context = 2;
+ } else {
+ if (has_second_ref(edge_mbmi)) {
+ pred_context =
+ 4 * (edge_mbmi->ref_frame[fwd_ref_sign_idx] != GOLDEN_FRAME);
+ } else {
+ if (edge_mbmi->ref_frame[0] == GOLDEN_FRAME)
+ pred_context = 0;
+ else
+ pred_context = 2 + CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]);
+ }
+ }
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+ return pred_context;
+}
+
+// Returns a context number for the given MB prediction signal
+int vp10_get_pred_context_comp_bwdref_p(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int above_in_image = xd->up_available;
+ const int left_in_image = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ const int bwd_ref_sign_idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
+ const int fwd_ref_sign_idx = !bwd_ref_sign_idx;
+
+ if (above_in_image && left_in_image) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra (2)
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+ if (!has_second_ref(edge_mbmi)) // single pred (1/3)
+ pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != cm->comp_bwd_ref[1]);
+ else // comp pred (1/3)
+ pred_context =
+ 1 +
+ 2 * (edge_mbmi->ref_frame[bwd_ref_sign_idx] != cm->comp_bwd_ref[1]);
+ } else { // inter/inter
+ const int l_comp = has_second_ref(left_mbmi);
+ const int a_comp = has_second_ref(above_mbmi);
+
+ const MV_REFERENCE_FRAME l_brf =
+ l_comp ? left_mbmi->ref_frame[bwd_ref_sign_idx] : NONE;
+ const MV_REFERENCE_FRAME a_brf =
+ a_comp ? above_mbmi->ref_frame[bwd_ref_sign_idx] : NONE;
+
+ const MV_REFERENCE_FRAME l_frf =
+ !l_comp ? left_mbmi->ref_frame[0]
+ : left_mbmi->ref_frame[fwd_ref_sign_idx];
+ const MV_REFERENCE_FRAME a_frf =
+ !a_comp ? above_mbmi->ref_frame[0]
+ : above_mbmi->ref_frame[fwd_ref_sign_idx];
+
+ if (l_comp && a_comp) { // comp/comp
+ if (l_brf == a_brf && l_brf == cm->comp_bwd_ref[1]) {
+ pred_context = 0;
+ } else if (l_brf == cm->comp_bwd_ref[1] ||
+ a_brf == cm->comp_bwd_ref[1]) {
+ pred_context = 1;
+ } else {
+ // NOTE: Backward ref should be either BWDREF or ALTREF.
+ assert(l_brf == a_brf && l_brf != cm->comp_bwd_ref[1]);
+ pred_context = 3;
+ }
+ } else if (!l_comp && !a_comp) { // single/single
+ if (l_frf == a_frf && l_frf == cm->comp_bwd_ref[1]) {
+ pred_context = 0;
+ } else if (l_frf == cm->comp_bwd_ref[1] ||
+ a_frf == cm->comp_bwd_ref[1]) {
+ pred_context = 1;
+ } else if (l_frf == a_frf) {
+ pred_context = 3;
+ } else {
+ assert(l_frf != a_frf && l_frf != cm->comp_bwd_ref[1] &&
+ a_frf != cm->comp_bwd_ref[1]);
+ pred_context = 4;
+ }
+ } else { // comp/single
+ assert((l_comp && !a_comp) || (!l_comp && a_comp));
+
+ if ((l_comp && l_brf == cm->comp_bwd_ref[1] &&
+ a_frf == cm->comp_bwd_ref[1]) ||
+ (a_comp && a_brf == cm->comp_bwd_ref[1] &&
+ l_frf == cm->comp_bwd_ref[1])) {
+ pred_context = 1;
+ } else if ((l_comp && l_brf == cm->comp_bwd_ref[1]) ||
+ (a_comp && a_brf == cm->comp_bwd_ref[1]) ||
+ (!l_comp && l_frf == cm->comp_bwd_ref[1]) ||
+ (!a_comp && a_frf == cm->comp_bwd_ref[1])) {
+ pred_context = 2;
+ } else {
+ pred_context = 4;
+ }
+ }
+ }
+ } else if (above_in_image || left_in_image) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi)) {
+ pred_context = 2;
+ } else {
+ if (has_second_ref(edge_mbmi)) {
+ pred_context =
+ 4 * (edge_mbmi->ref_frame[bwd_ref_sign_idx] != cm->comp_bwd_ref[1]);
+ } else {
+ pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_bwd_ref[1]);
+ }
+ }
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+ return pred_context;
+}
+
+#else // CONFIG_EXT_REFS
+
+// Returns a context number for the given MB prediction signal
+int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int above_in_image = xd->up_available;
+ const int left_in_image = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+ const int var_ref_idx = !fix_ref_idx;
+
+ if (above_in_image && left_in_image) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra (2)
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+ if (!has_second_ref(edge_mbmi)) // single pred (1/3)
+ pred_context = 1 + 2 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]);
+ else // comp pred (1/3)
+ pred_context =
+ 1 + 2 * (edge_mbmi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]);
+ } else { // inter/inter
+ const int l_sg = !has_second_ref(left_mbmi);
+ const int a_sg = !has_second_ref(above_mbmi);
+ const MV_REFERENCE_FRAME vrfa =
+ a_sg ? above_mbmi->ref_frame[0] : above_mbmi->ref_frame[var_ref_idx];
+ const MV_REFERENCE_FRAME vrfl =
+ l_sg ? left_mbmi->ref_frame[0] : left_mbmi->ref_frame[var_ref_idx];
+
+ if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) {
+ pred_context = 0;
+ } else if (l_sg && a_sg) { // single/single
+ if ((vrfa == cm->comp_fixed_ref && vrfl == cm->comp_var_ref[0]) ||
+ (vrfl == cm->comp_fixed_ref && vrfa == cm->comp_var_ref[0]))
+ pred_context = 4;
+ else if (vrfa == vrfl)
+ pred_context = 3;
+ else
+ pred_context = 1;
+ } else if (l_sg || a_sg) { // single/comp
+ const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl;
+ const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl;
+ if (vrfc == cm->comp_var_ref[1] && rfs != cm->comp_var_ref[1])
+ pred_context = 1;
+ else if (rfs == cm->comp_var_ref[1] && vrfc != cm->comp_var_ref[1])
+ pred_context = 2;
+ else
+ pred_context = 4;
+ } else if (vrfa == vrfl) { // comp/comp
+ pred_context = 4;
+ } else {
+ pred_context = 2;
+ }
+ }
+ } else if (above_in_image || left_in_image) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = above_in_image ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi)) {
+ pred_context = 2;
+ } else {
+ if (has_second_ref(edge_mbmi))
+ pred_context =
+ 4 * (edge_mbmi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]);
+ else
+ pred_context = 3 * (edge_mbmi->ref_frame[0] != cm->comp_var_ref[1]);
+ }
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+
+ return pred_context;
+}
+
+#endif // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+
+// For the bit to signal whether the single reference is a ALTREF_FRAME
+// or a BWDREF_FRAME.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is ALTREF/BWDREF.
+int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter or inter/intra
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+
+ if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]));
+ else
+ pred_context = 1 + (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]) ||
+ !CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[1]));
+ } else { // inter/inter
+ const int above_has_second = has_second_ref(above_mbmi);
+ const int left_has_second = has_second_ref(left_mbmi);
+
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+ if (above_has_second && left_has_second) {
+ pred_context = 1 + (!CHECK_BWDREF_OR_ALTREF(above0) ||
+ !CHECK_BWDREF_OR_ALTREF(above1) ||
+ !CHECK_BWDREF_OR_ALTREF(left0) ||
+ !CHECK_BWDREF_OR_ALTREF(left1));
+ } else if (above_has_second || left_has_second) {
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+ if (!CHECK_BWDREF_OR_ALTREF(rfs))
+ pred_context = 3 + (!CHECK_BWDREF_OR_ALTREF(crf1) ||
+ !CHECK_BWDREF_OR_ALTREF(crf2));
+ else
+ pred_context =
+ !CHECK_BWDREF_OR_ALTREF(crf1) || !CHECK_BWDREF_OR_ALTREF(crf2);
+ } else {
+ pred_context = 2 * (!CHECK_BWDREF_OR_ALTREF(above0)) +
+ 2 * (!CHECK_BWDREF_OR_ALTREF(left0));
+ }
+ }
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+ if (!is_inter_block(edge_mbmi)) { // intra
+ pred_context = 2;
+ } else { // inter
+ if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]));
+ else
+ pred_context = 1 + (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]) ||
+ !CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[1]));
+ }
+ } else { // no edges available
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+ return pred_context;
+}
+
+// For the bit to signal whether the single reference is ALTREF_FRAME or
+// BWDREF_FRAME, knowing that it shall be either of these 2 choices.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is ALTREF_FRAME, conditioning
+// on it is either ALTREF_FRAME/BWDREF_FRAME.
+int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter or inter/intra
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+ if (!has_second_ref(edge_mbmi)) {
+ if (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]))
+ pred_context = 3;
+ else
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME);
+ } else {
+ pred_context = 1 +
+ 2 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME ||
+ edge_mbmi->ref_frame[1] == BWDREF_FRAME);
+ }
+ } else { // inter/inter
+ const int above_has_second = has_second_ref(above_mbmi);
+ const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+ if (above_has_second && left_has_second) {
+ if (above0 == left0 && above1 == left1)
+ pred_context =
+ 3 * (above0 == BWDREF_FRAME || above1 == BWDREF_FRAME ||
+ left0 == BWDREF_FRAME || left1 == BWDREF_FRAME);
+ else
+ pred_context = 2;
+ } else if (above_has_second || left_has_second) {
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+ if (rfs == BWDREF_FRAME)
+ pred_context = 3 + (crf1 == BWDREF_FRAME || crf2 == BWDREF_FRAME);
+ else if (rfs == ALTREF_FRAME)
+ pred_context = (crf1 == BWDREF_FRAME || crf2 == BWDREF_FRAME);
+ else
+ pred_context = 1 + 2 * (crf1 == BWDREF_FRAME || crf2 == BWDREF_FRAME);
+ } else {
+ if (!CHECK_BWDREF_OR_ALTREF(above0) && !CHECK_BWDREF_OR_ALTREF(left0)) {
+ pred_context = 2 + (above0 == left0);
+ } else if (!CHECK_BWDREF_OR_ALTREF(above0) ||
+ !CHECK_BWDREF_OR_ALTREF(left0)) {
+ const MV_REFERENCE_FRAME edge0 =
+ !CHECK_BWDREF_OR_ALTREF(above0) ? left0 : above0;
+ pred_context = 4 * (edge0 == BWDREF_FRAME);
+ } else {
+ pred_context =
+ 2 * (above0 == BWDREF_FRAME) + 2 * (left0 == BWDREF_FRAME);
+ }
+ }
+ }
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi) ||
+ (!CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]) &&
+ !has_second_ref(edge_mbmi)))
+ pred_context = 2;
+ else if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME);
+ else
+ pred_context = 3 * (edge_mbmi->ref_frame[0] == BWDREF_FRAME ||
+ edge_mbmi->ref_frame[1] == BWDREF_FRAME);
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+ return pred_context;
+}
+
+// For the bit to signal whether the single reference is LAST3/GOLDEN or
+// LAST2/LAST, knowing that it shall be either of these 2 choices.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is LAST3/GOLDEN, conditioning
+// on it is either LAST3/GOLDEN/LAST2/LAST.
+int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter or inter/intra
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+ if (!has_second_ref(edge_mbmi)) {
+ if (CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]))
+ pred_context = 3;
+ else
+ pred_context = 4 * CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]);
+ } else {
+ pred_context = 1 +
+ 2 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) ||
+ CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[1]));
+ }
+ } else { // inter/inter
+ const int above_has_second = has_second_ref(above_mbmi);
+ const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+ if (above_has_second && left_has_second) {
+ if (above0 == left0 && above1 == left1)
+ pred_context =
+ 3 * (CHECK_LAST_OR_LAST2(above0) || CHECK_LAST_OR_LAST2(above1) ||
+ CHECK_LAST_OR_LAST2(left0) || CHECK_LAST_OR_LAST2(left1));
+ else
+ pred_context = 2;
+ } else if (above_has_second || left_has_second) {
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+ if (CHECK_LAST_OR_LAST2(rfs))
+ pred_context =
+ 3 + (CHECK_LAST_OR_LAST2(crf1) || CHECK_LAST_OR_LAST2(crf2));
+ else if (CHECK_GOLDEN_OR_LAST3(rfs))
+ pred_context =
+ (CHECK_LAST_OR_LAST2(crf1) || CHECK_LAST_OR_LAST2(crf2));
+ else
+ pred_context =
+ 1 + 2 * (CHECK_LAST_OR_LAST2(crf1) || CHECK_LAST_OR_LAST2(crf2));
+ } else {
+ if (CHECK_BWDREF_OR_ALTREF(above0) && CHECK_BWDREF_OR_ALTREF(left0)) {
+ pred_context = 2 + (above0 == left0);
+ } else if (CHECK_BWDREF_OR_ALTREF(above0) ||
+ CHECK_BWDREF_OR_ALTREF(left0)) {
+ const MV_REFERENCE_FRAME edge0 =
+ CHECK_BWDREF_OR_ALTREF(above0) ? left0 : above0;
+ pred_context = 4 * CHECK_LAST_OR_LAST2(edge0);
+ } else {
+ pred_context =
+ 2 * CHECK_LAST_OR_LAST2(above0) + 2 * CHECK_LAST_OR_LAST2(left0);
+ }
+ }
+ }
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi) ||
+ (CHECK_BWDREF_OR_ALTREF(edge_mbmi->ref_frame[0]) &&
+ !has_second_ref(edge_mbmi)))
+ pred_context = 2;
+ else if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]));
+ else
+ pred_context = 3 * (CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) ||
+ CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[1]));
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+ return pred_context;
+}
+
+// For the bit to signal whether the single reference is LAST2_FRAME or
+// LAST_FRAME, knowing that it shall be either of these 2 choices.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is LAST2_FRAME, conditioning
+// on it is either LAST2_FRAME/LAST_FRAME.
+int vp10_get_pred_context_single_ref_p4(const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter or inter/intra
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+ if (!has_second_ref(edge_mbmi)) {
+ if (!CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]))
+ pred_context = 3;
+ else
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+ } else {
+ pred_context = 1 +
+ 2 * (edge_mbmi->ref_frame[0] == LAST_FRAME ||
+ edge_mbmi->ref_frame[1] == LAST_FRAME);
+ }
+ } else { // inter/inter
+ const int above_has_second = has_second_ref(above_mbmi);
+ const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+ if (above_has_second && left_has_second) {
+ if (above0 == left0 && above1 == left1)
+ pred_context = 3 * (above0 == LAST_FRAME || above1 == LAST_FRAME ||
+ left0 == LAST_FRAME || left1 == LAST_FRAME);
+ else
+ pred_context = 2;
+ } else if (above_has_second || left_has_second) {
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+ if (rfs == LAST_FRAME)
+ pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+ else if (rfs == LAST2_FRAME)
+ pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+ else
+ pred_context = 1 + 2 * (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+ } else {
+ if (!CHECK_LAST_OR_LAST2(above0) && !CHECK_LAST_OR_LAST2(left0)) {
+ pred_context = 2 + (above0 == left0);
+ } else if (!CHECK_LAST_OR_LAST2(above0) ||
+ !CHECK_LAST_OR_LAST2(left0)) {
+ const MV_REFERENCE_FRAME edge0 =
+ !CHECK_LAST_OR_LAST2(above0) ? left0 : above0;
+ pred_context = 4 * (edge0 == LAST_FRAME);
+ } else {
+ pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME);
+ }
+ }
+ }
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi) ||
+ (!CHECK_LAST_OR_LAST2(edge_mbmi->ref_frame[0]) &&
+ !has_second_ref(edge_mbmi)))
+ pred_context = 2;
+ else if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+ else
+ pred_context = 3 * (edge_mbmi->ref_frame[0] == LAST_FRAME ||
+ edge_mbmi->ref_frame[1] == LAST_FRAME);
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+ return pred_context;
+}
+
+// For the bit to signal whether the single reference is GOLDEN_FRAME or
+// LAST3_FRAME, knowing that it shall be either of these 2 choices.
+//
+// NOTE(zoeliu): The probability of ref_frame[0] is GOLDEN_FRAME, conditioning
+// on it is either GOLDEN_FRAME/LAST3_FRAME.
+int vp10_get_pred_context_single_ref_p5(const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries correpsonding to real macroblocks.
+ // The prediction flags in these dummy entries are initialised to 0.
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter or inter/intra
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+ if (!has_second_ref(edge_mbmi)) {
+ if (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]))
+ pred_context = 3;
+ else
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST3_FRAME);
+ } else {
+ pred_context = 1 +
+ 2 * (edge_mbmi->ref_frame[0] == LAST3_FRAME ||
+ edge_mbmi->ref_frame[1] == LAST3_FRAME);
+ }
+ } else { // inter/inter
+ const int above_has_second = has_second_ref(above_mbmi);
+ const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+ if (above_has_second && left_has_second) {
+ if (above0 == left0 && above1 == left1)
+ pred_context = 3 * (above0 == LAST3_FRAME || above1 == LAST3_FRAME ||
+ left0 == LAST3_FRAME || left1 == LAST3_FRAME);
+ else
+ pred_context = 2;
+ } else if (above_has_second || left_has_second) {
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+ if (rfs == LAST3_FRAME)
+ pred_context = 3 + (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME);
+ else if (rfs == GOLDEN_FRAME)
+ pred_context = (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME);
+ else
+ pred_context = 1 + 2 * (crf1 == LAST3_FRAME || crf2 == LAST3_FRAME);
+ } else {
+ if (!CHECK_GOLDEN_OR_LAST3(above0) && !CHECK_GOLDEN_OR_LAST3(left0)) {
+ pred_context = 2 + (above0 == left0);
+ } else if (!CHECK_GOLDEN_OR_LAST3(above0) ||
+ !CHECK_GOLDEN_OR_LAST3(left0)) {
+ const MV_REFERENCE_FRAME edge0 =
+ !CHECK_GOLDEN_OR_LAST3(above0) ? left0 : above0;
+ pred_context = 4 * (edge0 == LAST3_FRAME);
+ } else {
+ pred_context =
+ 2 * (above0 == LAST3_FRAME) + 2 * (left0 == LAST3_FRAME);
+ }
+ }
+ }
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi) ||
+ (!CHECK_GOLDEN_OR_LAST3(edge_mbmi->ref_frame[0]) &&
+ !has_second_ref(edge_mbmi)))
+ pred_context = 2;
+ else if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST3_FRAME);
+ else
+ pred_context = 3 * (edge_mbmi->ref_frame[0] == LAST3_FRAME ||
+ edge_mbmi->ref_frame[1] == LAST3_FRAME);
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+ return pred_context;
+}
+
+#else // CONFIG_EXT_REFS
+
+int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter or inter/intra
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+ if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+ else
+ pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME ||
+ edge_mbmi->ref_frame[1] == LAST_FRAME);
+ } else { // inter/inter
+ const int above_has_second = has_second_ref(above_mbmi);
+ const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+ if (above_has_second && left_has_second) {
+ pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME ||
+ left0 == LAST_FRAME || left1 == LAST_FRAME);
+ } else if (above_has_second || left_has_second) {
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+ if (rfs == LAST_FRAME)
+ pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+ else
+ pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME);
+ } else {
+ pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME);
+ }
+ }
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+ if (!is_inter_block(edge_mbmi)) { // intra
+ pred_context = 2;
+ } else { // inter
+ if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == LAST_FRAME);
+ else
+ pred_context = 1 + (edge_mbmi->ref_frame[0] == LAST_FRAME ||
+ edge_mbmi->ref_frame[1] == LAST_FRAME);
+ }
+ } else { // no edges available
+ pred_context = 2;
+ }
+
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+ return pred_context;
+}
+
+int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
+ int pred_context;
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+
+ // Note:
+ // The mode info data structure has a one element border above and to the
+ // left of the entries corresponding to real macroblocks.
+ // The prediction flags in these dummy entries are initialized to 0.
+ if (has_above && has_left) { // both edges available
+ const int above_intra = !is_inter_block(above_mbmi);
+ const int left_intra = !is_inter_block(left_mbmi);
+
+ if (above_intra && left_intra) { // intra/intra
+ pred_context = 2;
+ } else if (above_intra || left_intra) { // intra/inter or inter/intra
+ const MB_MODE_INFO *edge_mbmi = above_intra ? left_mbmi : above_mbmi;
+ if (!has_second_ref(edge_mbmi)) {
+ if (edge_mbmi->ref_frame[0] == LAST_FRAME)
+ pred_context = 3;
+ else
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
+ } else {
+ pred_context = 1 +
+ 2 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
+ edge_mbmi->ref_frame[1] == GOLDEN_FRAME);
+ }
+ } else { // inter/inter
+ const int above_has_second = has_second_ref(above_mbmi);
+ const int left_has_second = has_second_ref(left_mbmi);
+ const MV_REFERENCE_FRAME above0 = above_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME above1 = above_mbmi->ref_frame[1];
+ const MV_REFERENCE_FRAME left0 = left_mbmi->ref_frame[0];
+ const MV_REFERENCE_FRAME left1 = left_mbmi->ref_frame[1];
+
+ if (above_has_second && left_has_second) {
+ if (above0 == left0 && above1 == left1)
+ pred_context =
+ 3 * (above0 == GOLDEN_FRAME || above1 == GOLDEN_FRAME ||
+ left0 == GOLDEN_FRAME || left1 == GOLDEN_FRAME);
+ else
+ pred_context = 2;
+ } else if (above_has_second || left_has_second) {
+ const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0;
+ const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1;
+
+ if (rfs == GOLDEN_FRAME)
+ pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
+ else if (rfs != GOLDEN_FRAME && rfs != LAST_FRAME)
+ pred_context = crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME;
+ else
+ pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME);
+ } else {
+ if (above0 == LAST_FRAME && left0 == LAST_FRAME) {
+ pred_context = 3;
+ } else if (above0 == LAST_FRAME || left0 == LAST_FRAME) {
+ const MV_REFERENCE_FRAME edge0 =
+ (above0 == LAST_FRAME) ? left0 : above0;
+ pred_context = 4 * (edge0 == GOLDEN_FRAME);
+ } else {
+ pred_context =
+ 2 * (above0 == GOLDEN_FRAME) + 2 * (left0 == GOLDEN_FRAME);
+ }
+ }
+ }
+ } else if (has_above || has_left) { // one edge available
+ const MB_MODE_INFO *edge_mbmi = has_above ? above_mbmi : left_mbmi;
+
+ if (!is_inter_block(edge_mbmi) ||
+ (edge_mbmi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mbmi)))
+ pred_context = 2;
+ else if (!has_second_ref(edge_mbmi))
+ pred_context = 4 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME);
+ else
+ pred_context = 3 * (edge_mbmi->ref_frame[0] == GOLDEN_FRAME ||
+ edge_mbmi->ref_frame[1] == GOLDEN_FRAME);
+ } else { // no edges available (2)
+ pred_context = 2;
+ }
+ assert(pred_context >= 0 && pred_context < REF_CONTEXTS);
+ return pred_context;
+}
+
+#endif // CONFIG_EXT_REFS
diff --git a/av1/common/pred_common.h b/av1/common/pred_common.h
new file mode 100644
index 0000000..9a3e3f1
--- /dev/null
+++ b/av1/common/pred_common.h
@@ -0,0 +1,257 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_PRED_COMMON_H_
+#define VP10_COMMON_PRED_COMMON_H_
+
+#include "av1/common/blockd.h"
+#include "av1/common/onyxc_int.h"
+#include "aom_dsp/vpx_dsp_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static INLINE int get_segment_id(const VP10_COMMON *cm,
+ const uint8_t *segment_ids, BLOCK_SIZE bsize,
+ int mi_row, int mi_col) {
+ const int mi_offset = mi_row * cm->mi_cols + mi_col;
+ const int bw = num_8x8_blocks_wide_lookup[bsize];
+ const int bh = num_8x8_blocks_high_lookup[bsize];
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+ int x, y, segment_id = MAX_SEGMENTS;
+
+ for (y = 0; y < ymis; ++y)
+ for (x = 0; x < xmis; ++x)
+ segment_id =
+ VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]);
+
+ assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
+ return segment_id;
+}
+
+static INLINE int vp10_get_pred_context_seg_id(const MACROBLOCKD *xd) {
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const int above_sip =
+ (above_mi != NULL) ? above_mi->mbmi.seg_id_predicted : 0;
+ const int left_sip = (left_mi != NULL) ? left_mi->mbmi.seg_id_predicted : 0;
+
+ return above_sip + left_sip;
+}
+
+static INLINE vpx_prob vp10_get_pred_prob_seg_id(
+ const struct segmentation_probs *segp, const MACROBLOCKD *xd) {
+ return segp->pred_probs[vp10_get_pred_context_seg_id(xd)];
+}
+
+static INLINE int vp10_get_skip_context(const MACROBLOCKD *xd) {
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const int above_skip = (above_mi != NULL) ? above_mi->mbmi.skip : 0;
+ const int left_skip = (left_mi != NULL) ? left_mi->mbmi.skip : 0;
+ return above_skip + left_skip;
+}
+
+static INLINE vpx_prob vp10_get_skip_prob(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ return cm->fc->skip_probs[vp10_get_skip_context(xd)];
+}
+
+#if CONFIG_DUAL_FILTER
+int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd, int dir);
+#else
+int vp10_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
+#endif
+
+#if CONFIG_EXT_INTRA
+int vp10_get_pred_context_intra_interp(const MACROBLOCKD *xd);
+#endif // CONFIG_EXT_INTRA
+
+int vp10_get_intra_inter_context(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_intra_inter_prob(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ return cm->fc->intra_inter_prob[vp10_get_intra_inter_context(xd)];
+}
+
+int vp10_get_reference_mode_context(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_reference_mode_prob(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ return cm->fc->comp_inter_prob[vp10_get_reference_mode_context(cm, xd)];
+}
+
+int vp10_get_pred_context_comp_ref_p(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ const int pred_context = vp10_get_pred_context_comp_ref_p(cm, xd);
+ return cm->fc->comp_ref_prob[pred_context][0];
+}
+
+#if CONFIG_EXT_REFS
+int vp10_get_pred_context_comp_ref_p1(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p1(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ const int pred_context = vp10_get_pred_context_comp_ref_p1(cm, xd);
+ return cm->fc->comp_ref_prob[pred_context][1];
+}
+
+int vp10_get_pred_context_comp_ref_p2(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_comp_ref_p2(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ const int pred_context = vp10_get_pred_context_comp_ref_p2(cm, xd);
+ return cm->fc->comp_ref_prob[pred_context][2];
+}
+
+int vp10_get_pred_context_comp_bwdref_p(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_comp_bwdref_p(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ const int pred_context = vp10_get_pred_context_comp_bwdref_p(cm, xd);
+ return cm->fc->comp_bwdref_prob[pred_context][0];
+}
+
+#endif // CONFIG_EXT_REFS
+
+int vp10_get_pred_context_single_ref_p1(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_single_ref_p1(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p1(xd)][0];
+}
+
+int vp10_get_pred_context_single_ref_p2(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_single_ref_p2(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p2(xd)][1];
+}
+
+#if CONFIG_EXT_REFS
+int vp10_get_pred_context_single_ref_p3(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_single_ref_p3(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p3(xd)][2];
+}
+
+int vp10_get_pred_context_single_ref_p4(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_single_ref_p4(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p4(xd)][3];
+}
+
+int vp10_get_pred_context_single_ref_p5(const MACROBLOCKD *xd);
+
+static INLINE vpx_prob vp10_get_pred_prob_single_ref_p5(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd) {
+ return cm->fc->single_ref_prob[vp10_get_pred_context_single_ref_p5(xd)][4];
+}
+#endif // CONFIG_EXT_REFS
+
+// Returns a context number for the given MB prediction signal
+// The mode info data structure has a one element border above and to the
+// left of the entries corresponding to real blocks.
+// The prediction flags in these dummy entries are initialized to 0.
+static INLINE int get_tx_size_context(const MACROBLOCKD *xd) {
+ const int max_tx_size = max_txsize_lookup[xd->mi[0]->mbmi.sb_type];
+ const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+ const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+ const int has_above = xd->up_available;
+ const int has_left = xd->left_available;
+ int above_ctx = (has_above && !above_mbmi->skip)
+ ? (int)txsize_sqr_map[above_mbmi->tx_size]
+ : max_tx_size;
+ int left_ctx = (has_left && !left_mbmi->skip)
+ ? (int)txsize_sqr_map[left_mbmi->tx_size]
+ : max_tx_size;
+ assert(xd->mi[0]->mbmi.sb_type >= BLOCK_8X8);
+ if (!has_left) left_ctx = above_ctx;
+
+ if (!has_above) above_ctx = left_ctx;
+
+ return (above_ctx + left_ctx) > max_tx_size;
+}
+
+#if CONFIG_VAR_TX
+static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd,
+ MB_MODE_INFO *mbmi, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, int blk_row, int blk_col,
+ TX_SIZE max_tx_size, int ctx) {
+ const struct macroblockd_plane *const pd = &xd->plane[0];
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+ if (tx_size == plane_tx_size) {
+ ++xd->counts->tx_size[max_tx_size - TX_8X8][ctx][tx_size];
+ mbmi->tx_size = tx_size;
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
+ update_tx_counts(cm, xd, mbmi, plane_bsize, tx_size - 1, offsetr, offsetc,
+ max_tx_size, ctx);
+ }
+ }
+}
+
+static INLINE void inter_block_tx_count_update(VP10_COMMON *cm, MACROBLOCKD *xd,
+ MB_MODE_INFO *mbmi,
+ BLOCK_SIZE plane_bsize,
+ int ctx) {
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+
+ for (idy = 0; idy < mi_height; idy += bh)
+ for (idx = 0; idx < mi_width; idx += bh)
+ update_tx_counts(cm, xd, mbmi, plane_bsize, max_tx_size, idy, idx,
+ max_tx_size, ctx);
+}
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_PRED_COMMON_H_
diff --git a/av1/common/quant_common.c b/av1/common/quant_common.c
new file mode 100644
index 0000000..79d8fb8
--- /dev/null
+++ b/av1/common/quant_common.c
@@ -0,0 +1,11325 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/common.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/entropy.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/seg_common.h"
+#include "av1/common/blockd.h"
+
+#if CONFIG_AOM_QM
+static void make_qmatrices(qm_val_t *wmatrix[NUM_QM_LEVELS][2][2][TX_SIZES],
+ qm_val_t *iwmatrix[NUM_QM_LEVELS][2][2][TX_SIZES]);
+#endif
+#if CONFIG_NEW_QUANT
+
+// Bin widths expressed as a fraction over 128 of the quant stepsize,
+// for the quantization bins 0-4.
+// So a value x indicates the bin is actually factor x/128 of the
+// nominal quantization step. For the zero bin, the width is only
+// for one side of zero, so the actual width is twice that.
+//
+// Functions with nuq correspond to "non uniform quantization"
+// TODO(sarahparker, debargha): Optimize these tables
+
+typedef struct {
+ uint8_t knots[NUQ_KNOTS]; // offsets
+ uint8_t doff; // dequantization
+} qprofile_type;
+
+static const qprofile_type nuq_lossless[COEF_BANDS] = {
+ { { 64, 128, 128 }, 0 }, // dc, band 0
+ { { 64, 128, 128 }, 0 }, // band 1
+ { { 64, 128, 128 }, 0 }, // band 2
+ { { 64, 128, 128 }, 0 }, // band 3
+ { { 64, 128, 128 }, 0 }, // band 4
+ { { 64, 128, 128 }, 0 }, // band 5
+};
+
+static const qprofile_type nuq[QUANT_PROFILES][QUANT_RANGES][COEF_BANDS] = {
+ { {
+ { { 64, 128, 128 }, 0 }, // dc, band 0
+ { { 64, 128, 128 }, 0 }, // band 1
+ { { 64, 128, 128 }, 0 }, // band 2
+ { { 64, 128, 128 }, 0 }, // band 3
+ { { 64, 128, 128 }, 0 }, // band 4
+ { { 64, 128, 128 }, 0 } // band 5
+ },
+ {
+ { { 64, 128, 128 }, 0 }, // dc, band 0
+ { { 64, 128, 128 }, 0 }, // band 1
+ { { 64, 128, 128 }, 0 }, // band 2
+ { { 64, 128, 128 }, 0 }, // band 3
+ { { 64, 128, 128 }, 0 }, // band 4
+ { { 64, 128, 128 }, 0 } // band 5
+ } },
+#if QUANT_PROFILES > 1
+ { {
+ { { 64, 128, 128 }, 0 }, // dc, band 0
+ { { 64, 128, 128 }, 0 }, // band 1
+ { { 64, 128, 128 }, 0 }, // band 2
+ { { 64, 128, 128 }, 0 }, // band 3
+ { { 64, 128, 128 }, 0 }, // band 4
+ { { 64, 128, 128 }, 0 } // band 5
+ },
+ {
+ { { 64, 128, 128 }, 0 }, // dc, band 0
+ { { 64, 128, 128 }, 0 }, // band 1
+ { { 64, 128, 128 }, 0 }, // band 2
+ { { 64, 128, 128 }, 0 }, // band 3
+ { { 64, 128, 128 }, 0 }, // band 4
+ { { 64, 128, 128 }, 0 } // band 5
+ } },
+#if QUANT_PROFILES > 2
+ { {
+ { { 64, 128, 128 }, 0 }, // dc, band 0
+ { { 64, 128, 128 }, 0 }, // band 1
+ { { 64, 128, 128 }, 0 }, // band 2
+ { { 64, 128, 128 }, 0 }, // band 3
+ { { 64, 128, 128 }, 0 }, // band 4
+ { { 64, 128, 128 }, 0 }, // band 5
+ },
+ {
+ { { 64, 128, 128 }, 0 }, // dc, band 0
+ { { 64, 128, 128 }, 0 }, // band 1
+ { { 64, 128, 128 }, 0 }, // band 2
+ { { 64, 128, 128 }, 0 }, // band 3
+ { { 64, 128, 128 }, 0 }, // band 4
+ { { 64, 128, 128 }, 0 }, // band 5
+ } }
+#endif // QUANT_PROFILES > 2
+#endif // QUANT_PROFILES > 1
+};
+
+static INLINE int qrange_from_qindex(int qindex) {
+ // return high quality (1) or low quality (0)
+ return qindex < 140 ? 1 : 0;
+}
+
+static const uint8_t *get_nuq_knots(int qindex, int band, int q_profile) {
+ if (!qindex)
+ return nuq_lossless[band].knots;
+ else
+ return nuq[q_profile][qrange_from_qindex(qindex)][band].knots;
+}
+
+static INLINE int16_t quant_to_doff_fixed(int qindex, int band, int q_profile) {
+ if (!qindex)
+ return nuq_lossless[band].doff;
+ else
+ return nuq[q_profile][qrange_from_qindex(qindex)][band].doff;
+}
+
+// get cumulative bins
+static INLINE void get_cuml_bins_nuq(int q, int qindex, int band,
+ tran_low_t *cuml_bins, int q_profile) {
+ const uint8_t *knots = get_nuq_knots(qindex, band, q_profile);
+ int16_t cuml_knots[NUQ_KNOTS];
+ int i;
+ cuml_knots[0] = knots[0];
+ for (i = 1; i < NUQ_KNOTS; ++i) cuml_knots[i] = cuml_knots[i - 1] + knots[i];
+ for (i = 0; i < NUQ_KNOTS; ++i)
+ cuml_bins[i] = ROUND_POWER_OF_TWO(cuml_knots[i] * q, 7);
+}
+
+void vp10_get_dequant_val_nuq(int q, int qindex, int band, tran_low_t *dq,
+ tran_low_t *cuml_bins, int q_profile) {
+ const uint8_t *knots = get_nuq_knots(qindex, band, q_profile);
+ tran_low_t cuml_bins_[NUQ_KNOTS], *cuml_bins_ptr;
+ tran_low_t doff;
+ int i;
+ cuml_bins_ptr = (cuml_bins ? cuml_bins : cuml_bins_);
+ get_cuml_bins_nuq(q, qindex, band, cuml_bins_ptr, q_profile);
+ dq[0] = 0;
+ for (i = 1; i < NUQ_KNOTS; ++i) {
+ doff = quant_to_doff_fixed(qindex, band, q_profile);
+ doff = ROUND_POWER_OF_TWO(doff * knots[i], 7);
+ dq[i] =
+ cuml_bins_ptr[i - 1] + ROUND_POWER_OF_TWO((knots[i] - doff * 2) * q, 8);
+ }
+ doff = quant_to_doff_fixed(qindex, band, q_profile);
+ dq[NUQ_KNOTS] =
+ cuml_bins_ptr[NUQ_KNOTS - 1] + ROUND_POWER_OF_TWO((64 - doff) * q, 7);
+}
+
+tran_low_t vp10_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq) {
+ if (v <= NUQ_KNOTS)
+ return dq[v];
+ else
+ return dq[NUQ_KNOTS] + (v - NUQ_KNOTS) * q;
+}
+
+tran_low_t vp10_dequant_coeff_nuq(int v, int q, const tran_low_t *dq) {
+ tran_low_t dqmag = vp10_dequant_abscoeff_nuq(abs(v), q, dq);
+ return (v < 0 ? -dqmag : dqmag);
+}
+#endif // CONFIG_NEW_QUANT
+
+static const int16_t dc_qlookup[QINDEX_RANGE] = {
+ 4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18,
+ 19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30,
+ 31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42,
+ 43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53,
+ 54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65,
+ 66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76,
+ 77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88,
+ 90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110,
+ 111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134,
+ 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164,
+ 166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202,
+ 205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247,
+ 250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300,
+ 304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364,
+ 369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441,
+ 447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549,
+ 559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736,
+ 755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139,
+ 1184, 1232, 1282, 1336,
+};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static const int16_t dc_qlookup_10[QINDEX_RANGE] = {
+ 4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37,
+ 40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82,
+ 86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132,
+ 136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182,
+ 185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230,
+ 233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276,
+ 280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321,
+ 324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387,
+ 394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466,
+ 472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567,
+ 576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687,
+ 698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831,
+ 844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001,
+ 1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202,
+ 1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436,
+ 1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717,
+ 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088,
+ 2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675,
+ 2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823,
+ 3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
+};
+
+static const int16_t dc_qlookup_12[QINDEX_RANGE] = {
+ 4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91,
+ 103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237,
+ 251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405,
+ 421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580,
+ 596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752,
+ 768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919,
+ 934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080,
+ 1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234,
+ 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419,
+ 1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692,
+ 1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957,
+ 1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334,
+ 2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746,
+ 2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226,
+ 3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788,
+ 3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420,
+ 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153,
+ 5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984,
+ 6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966,
+ 7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214,
+ 8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031,
+ 10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118,
+ 13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949,
+ 19718, 20521, 21387,
+};
+#endif
+
+static const int16_t ac_qlookup[QINDEX_RANGE] = {
+ 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
+ 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
+ 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
+ 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
+ 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
+ 98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
+ 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144,
+ 146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179,
+ 182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223,
+ 227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280,
+ 285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353,
+ 359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448,
+ 456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571,
+ 582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729,
+ 743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933,
+ 951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196,
+ 1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537,
+ 1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
+};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static const int16_t ac_qlookup_10[QINDEX_RANGE] = {
+ 4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40,
+ 44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92,
+ 96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149,
+ 154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208,
+ 213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267,
+ 271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324,
+ 328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379,
+ 384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466,
+ 474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571,
+ 579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713,
+ 725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889,
+ 905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118,
+ 1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411,
+ 1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791,
+ 1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283,
+ 2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915,
+ 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731,
+ 3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784,
+ 4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148,
+ 6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312,
+};
+
+static const int16_t ac_qlookup_12[QINDEX_RANGE] = {
+ 4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99,
+ 112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263,
+ 280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456,
+ 475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660,
+ 679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865,
+ 884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067,
+ 1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264,
+ 1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457,
+ 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693,
+ 1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052,
+ 2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411,
+ 2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943,
+ 2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555,
+ 3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310,
+ 4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256,
+ 5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410,
+ 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867,
+ 8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660,
+ 9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885,
+ 12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637,
+ 14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062,
+ 18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334,
+ 22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599,
+ 28143, 28687, 29247,
+};
+#endif
+
+int16_t vp10_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (bit_depth) {
+ case VPX_BITS_8: return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
+ case VPX_BITS_10: return dc_qlookup_10[clamp(qindex + delta, 0, MAXQ)];
+ case VPX_BITS_12: return dc_qlookup_12[clamp(qindex + delta, 0, MAXQ)];
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1;
+ }
+#else
+ (void)bit_depth;
+ return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
+#endif
+}
+
+int16_t vp10_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (bit_depth) {
+ case VPX_BITS_8: return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
+ case VPX_BITS_10: return ac_qlookup_10[clamp(qindex + delta, 0, MAXQ)];
+ case VPX_BITS_12: return ac_qlookup_12[clamp(qindex + delta, 0, MAXQ)];
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1;
+ }
+#else
+ (void)bit_depth;
+ return ac_qlookup[clamp(qindex + delta, 0, MAXQ)];
+#endif
+}
+
+int vp10_get_qindex(const struct segmentation *seg, int segment_id,
+ int base_qindex) {
+ if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
+ const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
+ const int seg_qindex =
+ seg->abs_delta == SEGMENT_ABSDATA ? data : base_qindex + data;
+ return clamp(seg_qindex, 0, MAXQ);
+ } else {
+ return base_qindex;
+ }
+}
+
+#if CONFIG_AOM_QM
+qm_val_t *aom_iqmatrix(VP10_COMMON *cm, int qmlevel, int is_chroma,
+ int log2sizem2, int is_intra) {
+ return &cm->giqmatrix[qmlevel][!!is_chroma][!!is_intra][log2sizem2][0];
+}
+qm_val_t *aom_qmatrix(VP10_COMMON *cm, int qmlevel, int is_chroma,
+ int log2sizem2, int is_intra) {
+ return &cm->gqmatrix[qmlevel][!!is_chroma][!!is_intra][log2sizem2][0];
+}
+
+static uint16_t
+ iwt_matrix_ref[NUM_QM_LEVELS][2][2][4 * 4 + 8 * 8 + 16 * 16 + 32 * 32];
+static uint16_t
+ wt_matrix_ref[NUM_QM_LEVELS][2][2][4 * 4 + 8 * 8 + 16 * 16 + 32 * 32];
+
+void aom_qm_init(VP10_COMMON *cm) {
+ int q, c, f, t, size;
+ int current;
+ for (q = 0; q < NUM_QM_LEVELS; ++q) {
+ for (c = 0; c < 2; ++c) {
+ for (f = 0; f < 2; ++f) {
+ current = 0;
+ for (t = 0; t < TX_SIZES; ++t) {
+ size = 1 << (t + 2);
+ cm->gqmatrix[q][c][f][t] = &wt_matrix_ref[q][c][f][current];
+ cm->giqmatrix[q][c][f][t] = &iwt_matrix_ref[q][c][f][current];
+ current += size * size;
+ }
+ }
+ }
+ }
+}
+
+static uint16_t iwt_matrix_ref[NUM_QM_LEVELS][2][2][4 * 4 + 8 * 8 + 16 * 16 +
+ 32 * 32] = {
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 71, 124, 214, 71, 112, 165, 241, 124, 165, 254, 331, 214, 241, 331,
+ 414,
+ /* Size 8 */
+ 64, 47, 51, 69, 97, 132, 173, 218, 47, 54, 52, 62, 81, 109, 142, 181,
+ 51, 52, 75, 90, 108, 133, 165, 201, 69, 62, 90, 119, 144, 169, 198, 232,
+ 97, 81, 108, 144, 178, 208, 238, 268, 132, 109, 133, 169, 208, 244, 276,
+ 305, 173, 142, 165, 198, 238, 276, 309, 338, 218, 181, 201, 232, 268,
+ 305, 338, 367,
+ /* Size 16 */
+ 64, 54, 47, 49, 51, 59, 69, 81, 97, 111, 132, 150, 173, 193, 218, 218,
+ 54, 52, 50, 51, 51, 58, 65, 75, 88, 101, 119, 135, 156, 175, 198, 198,
+ 47, 50, 54, 53, 52, 56, 62, 70, 81, 93, 109, 123, 142, 159, 181, 181,
+ 49, 51, 53, 57, 61, 67, 73, 82, 93, 104, 120, 134, 153, 170, 191, 191,
+ 51, 51, 52, 61, 75, 82, 90, 98, 108, 119, 133, 147, 165, 181, 201, 201,
+ 59, 58, 56, 67, 82, 91, 102, 112, 123, 135, 149, 163, 180, 196, 215,
+ 215, 69, 65, 62, 73, 90, 102, 119, 130, 144, 155, 169, 182, 198, 214,
+ 232, 232, 81, 75, 70, 82, 98, 112, 130, 143, 159, 172, 186, 200, 216,
+ 231, 249, 249, 97, 88, 81, 93, 108, 123, 144, 159, 178, 192, 208, 222,
+ 238, 252, 268, 268, 111, 101, 93, 104, 119, 135, 155, 172, 192, 207,
+ 225, 239, 255, 269, 285, 285, 132, 119, 109, 120, 133, 149, 169, 186,
+ 208, 225, 244, 259, 276, 290, 305, 305, 150, 135, 123, 134, 147, 163,
+ 182, 200, 222, 239, 259, 274, 291, 305, 321, 321, 173, 156, 142, 153,
+ 165, 180, 198, 216, 238, 255, 276, 291, 309, 323, 338, 338, 193, 175,
+ 159, 170, 181, 196, 214, 231, 252, 269, 290, 305, 323, 337, 352, 352,
+ 218, 198, 181, 191, 201, 215, 232, 249, 268, 285, 305, 321, 338, 352,
+ 367, 367, 218, 198, 181, 191, 201, 215, 232, 249, 268, 285, 305, 321,
+ 338, 352, 367, 367,
+ /* Size 32 */
+ 64, 59, 54, 50, 47, 48, 49, 50, 51, 55, 59, 63, 69, 74, 81, 88, 97, 104,
+ 111, 121, 132, 140, 150, 161, 173, 183, 193, 205, 218, 218, 218, 218,
+ 59, 56, 53, 51, 49, 49, 50, 51, 51, 54, 58, 62, 67, 72, 78, 84, 92, 99,
+ 106, 115, 125, 133, 142, 152, 164, 173, 183, 195, 208, 208, 208, 208,
+ 54, 53, 52, 51, 50, 51, 51, 51, 51, 54, 58, 61, 65, 70, 75, 81, 88, 94,
+ 101, 110, 119, 127, 135, 145, 156, 165, 175, 186, 198, 198, 198, 198,
+ 50, 51, 51, 52, 52, 52, 52, 52, 52, 54, 57, 60, 63, 68, 72, 78, 85, 90,
+ 97, 105, 114, 121, 129, 138, 149, 157, 167, 177, 189, 189, 189, 189, 47,
+ 49, 50, 52, 54, 54, 53, 52, 52, 54, 56, 59, 62, 66, 70, 75, 81, 87, 93,
+ 100, 109, 115, 123, 132, 142, 150, 159, 170, 181, 181, 181, 181, 48, 49,
+ 51, 52, 54, 54, 55, 56, 56, 59, 61, 64, 67, 71, 76, 81, 87, 92, 98, 105,
+ 114, 121, 128, 137, 147, 155, 164, 174, 186, 186, 186, 186, 49, 50, 51,
+ 52, 53, 55, 57, 59, 61, 64, 67, 70, 73, 77, 82, 87, 93, 98, 104, 111,
+ 120, 126, 134, 143, 153, 161, 170, 179, 191, 191, 191, 191, 50, 51, 51,
+ 52, 52, 56, 59, 63, 68, 71, 74, 77, 81, 85, 89, 94, 100, 105, 111, 118,
+ 126, 133, 140, 149, 158, 166, 175, 185, 196, 196, 196, 196, 51, 51, 51,
+ 52, 52, 56, 61, 68, 75, 79, 82, 86, 90, 94, 98, 103, 108, 113, 119, 126,
+ 133, 140, 147, 155, 165, 172, 181, 191, 201, 201, 201, 201, 55, 54, 54,
+ 54, 54, 59, 64, 71, 79, 82, 86, 91, 96, 100, 105, 110, 115, 120, 126,
+ 133, 140, 147, 155, 163, 172, 180, 188, 198, 208, 208, 208, 208, 59, 58,
+ 58, 57, 56, 61, 67, 74, 82, 86, 91, 96, 102, 107, 112, 117, 123, 129,
+ 135, 141, 149, 156, 163, 171, 180, 188, 196, 205, 215, 215, 215, 215,
+ 63, 62, 61, 60, 59, 64, 70, 77, 86, 91, 96, 103, 110, 115, 120, 126,
+ 133, 138, 144, 151, 158, 165, 172, 180, 189, 196, 204, 213, 223, 223,
+ 223, 223, 69, 67, 65, 63, 62, 67, 73, 81, 90, 96, 102, 110, 119, 124,
+ 130, 137, 144, 149, 155, 162, 169, 175, 182, 190, 198, 206, 214, 222,
+ 232, 232, 232, 232, 74, 72, 70, 68, 66, 71, 77, 85, 94, 100, 107, 115,
+ 124, 130, 136, 143, 151, 157, 163, 170, 177, 184, 191, 199, 207, 214,
+ 222, 231, 240, 240, 240, 240, 81, 78, 75, 72, 70, 76, 82, 89, 98, 105,
+ 112, 120, 130, 136, 143, 151, 159, 165, 172, 179, 186, 193, 200, 208,
+ 216, 223, 231, 240, 249, 249, 249, 249, 88, 84, 81, 78, 75, 81, 87, 94,
+ 103, 110, 117, 126, 137, 143, 151, 159, 168, 174, 181, 189, 197, 203,
+ 211, 218, 226, 234, 241, 249, 258, 258, 258, 258, 97, 92, 88, 85, 81,
+ 87, 93, 100, 108, 115, 123, 133, 144, 151, 159, 168, 178, 184, 192, 200,
+ 208, 215, 222, 229, 238, 245, 252, 260, 268, 268, 268, 268, 104, 99, 94,
+ 90, 87, 92, 98, 105, 113, 120, 129, 138, 149, 157, 165, 174, 184, 191,
+ 199, 207, 216, 223, 230, 238, 246, 253, 260, 268, 276, 276, 276, 276,
+ 111, 106, 101, 97, 93, 98, 104, 111, 119, 126, 135, 144, 155, 163, 172,
+ 181, 192, 199, 207, 215, 225, 232, 239, 247, 255, 262, 269, 277, 285,
+ 285, 285, 285, 121, 115, 110, 105, 100, 105, 111, 118, 126, 133, 141,
+ 151, 162, 170, 179, 189, 200, 207, 215, 224, 234, 241, 248, 256, 265,
+ 272, 279, 287, 295, 295, 295, 295, 132, 125, 119, 114, 109, 114, 120,
+ 126, 133, 140, 149, 158, 169, 177, 186, 197, 208, 216, 225, 234, 244,
+ 251, 259, 267, 276, 282, 290, 297, 305, 305, 305, 305, 140, 133, 127,
+ 121, 115, 121, 126, 133, 140, 147, 156, 165, 175, 184, 193, 203, 215,
+ 223, 232, 241, 251, 258, 266, 275, 283, 290, 297, 305, 313, 313, 313,
+ 313, 150, 142, 135, 129, 123, 128, 134, 140, 147, 155, 163, 172, 182,
+ 191, 200, 211, 222, 230, 239, 248, 259, 266, 274, 283, 291, 298, 305,
+ 313, 321, 321, 321, 321, 161, 152, 145, 138, 132, 137, 143, 149, 155,
+ 163, 171, 180, 190, 199, 208, 218, 229, 238, 247, 256, 267, 275, 283,
+ 291, 300, 307, 314, 322, 329, 329, 329, 329, 173, 164, 156, 149, 142,
+ 147, 153, 158, 165, 172, 180, 189, 198, 207, 216, 226, 238, 246, 255,
+ 265, 276, 283, 291, 300, 309, 316, 323, 331, 338, 338, 338, 338, 183,
+ 173, 165, 157, 150, 155, 161, 166, 172, 180, 188, 196, 206, 214, 223,
+ 234, 245, 253, 262, 272, 282, 290, 298, 307, 316, 323, 330, 337, 345,
+ 345, 345, 345, 193, 183, 175, 167, 159, 164, 170, 175, 181, 188, 196,
+ 204, 214, 222, 231, 241, 252, 260, 269, 279, 290, 297, 305, 314, 323,
+ 330, 337, 345, 352, 352, 352, 352, 205, 195, 186, 177, 170, 174, 179,
+ 185, 191, 198, 205, 213, 222, 231, 240, 249, 260, 268, 277, 287, 297,
+ 305, 313, 322, 331, 337, 345, 352, 360, 360, 360, 360, 218, 208, 198,
+ 189, 181, 186, 191, 196, 201, 208, 215, 223, 232, 240, 249, 258, 268,
+ 276, 285, 295, 305, 313, 321, 329, 338, 345, 352, 360, 367, 367, 367,
+ 367, 218, 208, 198, 189, 181, 186, 191, 196, 201, 208, 215, 223, 232,
+ 240, 249, 258, 268, 276, 285, 295, 305, 313, 321, 329, 338, 345, 352,
+ 360, 367, 367, 367, 367, 218, 208, 198, 189, 181, 186, 191, 196, 201,
+ 208, 215, 223, 232, 240, 249, 258, 268, 276, 285, 295, 305, 313, 321,
+ 329, 338, 345, 352, 360, 367, 367, 367, 367, 218, 208, 198, 189, 181,
+ 186, 191, 196, 201, 208, 215, 223, 232, 240, 249, 258, 268, 276, 285,
+ 295, 305, 313, 321, 329, 338, 345, 352, 360, 367, 367, 367, 367 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 16, 18, 33, 60, 18, 29, 45, 68, 33, 45, 72, 98, 60, 68, 98, 129,
+ /* Size 8 */
+ 20, 14, 16, 21, 31, 43, 58, 75, 14, 17, 16, 19, 25, 35, 46, 61, 16, 16,
+ 24, 28, 34, 43, 54, 68, 21, 19, 28, 38, 47, 56, 67, 80, 31, 25, 34, 47,
+ 59, 71, 83, 95, 43, 35, 43, 56, 71, 85, 99, 112, 58, 46, 54, 67, 83, 99,
+ 113, 127, 75, 61, 68, 80, 95, 112, 127, 141,
+ /* Size 16 */
+ 19, 16, 14, 14, 15, 17, 20, 24, 29, 34, 41, 47, 55, 62, 71, 71, 16, 15,
+ 15, 15, 15, 17, 19, 22, 26, 31, 36, 42, 49, 55, 64, 64, 14, 15, 16, 16,
+ 15, 17, 18, 21, 24, 28, 33, 38, 44, 50, 58, 58, 14, 15, 16, 17, 18, 20,
+ 22, 24, 28, 32, 37, 41, 48, 54, 61, 61, 15, 15, 15, 18, 22, 24, 27, 30,
+ 33, 36, 41, 46, 52, 58, 65, 65, 17, 17, 17, 20, 24, 27, 31, 34, 38, 42,
+ 46, 51, 57, 63, 70, 70, 20, 19, 18, 22, 27, 31, 36, 40, 45, 49, 53, 58,
+ 64, 70, 76, 76, 24, 22, 21, 24, 30, 34, 40, 44, 50, 54, 60, 65, 71, 76,
+ 83, 83, 29, 26, 24, 28, 33, 38, 45, 50, 56, 61, 67, 73, 79, 84, 91, 91,
+ 34, 31, 28, 32, 36, 42, 49, 54, 61, 67, 74, 79, 86, 91, 98, 98, 41, 36,
+ 33, 37, 41, 46, 53, 60, 67, 74, 81, 87, 94, 100, 106, 106, 47, 42, 38,
+ 41, 46, 51, 58, 65, 73, 79, 87, 93, 100, 106, 113, 113, 55, 49, 44, 48,
+ 52, 57, 64, 71, 79, 86, 94, 100, 108, 114, 121, 121, 62, 55, 50, 54, 58,
+ 63, 70, 76, 84, 91, 100, 106, 114, 120, 127, 127, 71, 64, 58, 61, 65,
+ 70, 76, 83, 91, 98, 106, 113, 121, 127, 134, 134, 71, 64, 58, 61, 65,
+ 70, 76, 83, 91, 98, 106, 113, 121, 127, 134, 134,
+ /* Size 32 */
+ 18, 17, 15, 14, 13, 14, 14, 14, 15, 16, 17, 18, 20, 22, 23, 26, 28, 30,
+ 33, 36, 40, 42, 45, 49, 53, 57, 60, 65, 69, 69, 69, 69, 17, 16, 15, 14,
+ 14, 14, 14, 14, 15, 16, 17, 18, 19, 21, 23, 25, 27, 29, 31, 34, 37, 40,
+ 43, 46, 50, 53, 57, 61, 66, 66, 66, 66, 15, 15, 15, 15, 14, 14, 14, 15,
+ 15, 15, 16, 18, 19, 20, 22, 24, 26, 28, 30, 32, 35, 38, 41, 44, 48, 51,
+ 54, 58, 62, 62, 62, 62, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 17,
+ 18, 19, 21, 23, 25, 26, 28, 31, 34, 36, 39, 42, 45, 48, 51, 55, 59, 59,
+ 59, 59, 13, 14, 14, 15, 16, 15, 15, 15, 15, 15, 16, 17, 18, 19, 20, 22,
+ 24, 25, 27, 29, 32, 34, 37, 40, 43, 46, 49, 52, 56, 56, 56, 56, 14, 14,
+ 14, 15, 15, 16, 16, 16, 16, 17, 18, 18, 19, 20, 22, 23, 25, 27, 29, 31,
+ 34, 36, 38, 41, 45, 47, 50, 54, 58, 58, 58, 58, 14, 14, 14, 15, 15, 16,
+ 16, 17, 18, 18, 19, 20, 21, 22, 24, 25, 27, 29, 31, 33, 36, 38, 40, 43,
+ 46, 49, 52, 56, 60, 60, 60, 60, 14, 14, 15, 15, 15, 16, 17, 18, 19, 20,
+ 21, 22, 23, 25, 26, 28, 29, 31, 33, 35, 38, 40, 42, 45, 48, 51, 54, 57,
+ 61, 61, 61, 61, 15, 15, 15, 15, 15, 16, 18, 19, 22, 23, 24, 25, 26, 27,
+ 29, 30, 32, 34, 35, 38, 40, 42, 45, 47, 50, 53, 56, 59, 63, 63, 63, 63,
+ 16, 16, 15, 15, 15, 17, 18, 20, 23, 24, 25, 27, 28, 29, 31, 32, 34, 36,
+ 38, 40, 42, 45, 47, 50, 53, 56, 59, 62, 66, 66, 66, 66, 17, 17, 16, 16,
+ 16, 18, 19, 21, 24, 25, 27, 28, 30, 32, 33, 35, 37, 39, 41, 43, 45, 47,
+ 50, 53, 56, 58, 61, 65, 68, 68, 68, 68, 18, 18, 18, 17, 17, 18, 20, 22,
+ 25, 27, 28, 30, 33, 34, 36, 38, 40, 42, 44, 46, 48, 51, 53, 56, 59, 62,
+ 64, 68, 71, 71, 71, 71, 20, 19, 19, 18, 18, 19, 21, 23, 26, 28, 30, 33,
+ 35, 37, 39, 41, 43, 45, 47, 50, 52, 54, 57, 59, 62, 65, 68, 71, 74, 74,
+ 74, 74, 22, 21, 20, 19, 19, 20, 22, 25, 27, 29, 32, 34, 37, 39, 41, 43,
+ 46, 48, 50, 52, 55, 57, 60, 62, 65, 68, 71, 74, 77, 77, 77, 77, 23, 23,
+ 22, 21, 20, 22, 24, 26, 29, 31, 33, 36, 39, 41, 43, 46, 49, 51, 53, 55,
+ 58, 60, 63, 66, 69, 71, 74, 77, 81, 81, 81, 81, 26, 25, 24, 23, 22, 23,
+ 25, 28, 30, 32, 35, 38, 41, 43, 46, 48, 52, 54, 56, 59, 62, 64, 67, 69,
+ 72, 75, 78, 81, 84, 84, 84, 84, 28, 27, 26, 25, 24, 25, 27, 29, 32, 34,
+ 37, 40, 43, 46, 49, 52, 55, 57, 60, 63, 66, 68, 71, 74, 77, 79, 82, 85,
+ 88, 88, 88, 88, 30, 29, 28, 26, 25, 27, 29, 31, 34, 36, 39, 42, 45, 48,
+ 51, 54, 57, 60, 62, 65, 69, 71, 74, 77, 80, 83, 85, 88, 92, 92, 92, 92,
+ 33, 31, 30, 28, 27, 29, 31, 33, 35, 38, 41, 44, 47, 50, 53, 56, 60, 62,
+ 65, 68, 72, 74, 77, 80, 83, 86, 89, 92, 95, 95, 95, 95, 36, 34, 32, 31,
+ 29, 31, 33, 35, 38, 40, 43, 46, 50, 52, 55, 59, 63, 65, 68, 72, 75, 78,
+ 81, 84, 87, 90, 93, 96, 99, 99, 99, 99, 40, 37, 35, 34, 32, 34, 36, 38,
+ 40, 42, 45, 48, 52, 55, 58, 62, 66, 69, 72, 75, 79, 82, 85, 88, 91, 94,
+ 97, 100, 103, 103, 103, 103, 42, 40, 38, 36, 34, 36, 38, 40, 42, 45, 47,
+ 51, 54, 57, 60, 64, 68, 71, 74, 78, 82, 85, 88, 91, 94, 97, 100, 103,
+ 107, 107, 107, 107, 45, 43, 41, 39, 37, 38, 40, 42, 45, 47, 50, 53, 57,
+ 60, 63, 67, 71, 74, 77, 81, 85, 88, 91, 94, 98, 101, 104, 107, 110, 110,
+ 110, 110, 49, 46, 44, 42, 40, 41, 43, 45, 47, 50, 53, 56, 59, 62, 66,
+ 69, 74, 77, 80, 84, 88, 91, 94, 98, 101, 104, 107, 110, 114, 114, 114,
+ 114, 53, 50, 48, 45, 43, 45, 46, 48, 50, 53, 56, 59, 62, 65, 69, 72, 77,
+ 80, 83, 87, 91, 94, 98, 101, 105, 108, 111, 114, 118, 118, 118, 118, 57,
+ 53, 51, 48, 46, 47, 49, 51, 53, 56, 58, 62, 65, 68, 71, 75, 79, 83, 86,
+ 90, 94, 97, 101, 104, 108, 111, 114, 117, 121, 121, 121, 121, 60, 57,
+ 54, 51, 49, 50, 52, 54, 56, 59, 61, 64, 68, 71, 74, 78, 82, 85, 89, 93,
+ 97, 100, 104, 107, 111, 114, 117, 120, 124, 124, 124, 124, 65, 61, 58,
+ 55, 52, 54, 56, 57, 59, 62, 65, 68, 71, 74, 77, 81, 85, 88, 92, 96, 100,
+ 103, 107, 110, 114, 117, 120, 124, 127, 127, 127, 127, 69, 66, 62, 59,
+ 56, 58, 60, 61, 63, 66, 68, 71, 74, 77, 81, 84, 88, 92, 95, 99, 103,
+ 107, 110, 114, 118, 121, 124, 127, 130, 130, 130, 130, 69, 66, 62, 59,
+ 56, 58, 60, 61, 63, 66, 68, 71, 74, 77, 81, 84, 88, 92, 95, 99, 103,
+ 107, 110, 114, 118, 121, 124, 127, 130, 130, 130, 130, 69, 66, 62, 59,
+ 56, 58, 60, 61, 63, 66, 68, 71, 74, 77, 81, 84, 88, 92, 95, 99, 103,
+ 107, 110, 114, 118, 121, 124, 127, 130, 130, 130, 130, 69, 66, 62, 59,
+ 56, 58, 60, 61, 63, 66, 68, 71, 74, 77, 81, 84, 88, 92, 95, 99, 103,
+ 107, 110, 114, 118, 121, 124, 127, 130, 130, 130, 130 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 106, 117, 154, 106, 131, 141, 167, 117, 141, 191, 225, 154, 167,
+ 225, 279,
+ /* Size 8 */
+ 64, 51, 98, 104, 113, 128, 148, 172, 51, 76, 100, 89, 92, 103, 118, 136,
+ 98, 100, 119, 115, 114, 121, 134, 151, 104, 89, 115, 132, 140, 147, 158,
+ 173, 113, 92, 114, 140, 160, 174, 186, 201, 128, 103, 121, 147, 174,
+ 195, 213, 229, 148, 118, 134, 158, 186, 213, 236, 256, 172, 136, 151,
+ 173, 201, 229, 256, 280,
+ /* Size 16 */
+ 64, 57, 51, 67, 98, 101, 104, 108, 113, 120, 128, 137, 148, 159, 172,
+ 172, 57, 59, 61, 75, 99, 97, 96, 99, 101, 107, 114, 122, 131, 141, 152,
+ 152, 51, 61, 76, 86, 100, 94, 89, 91, 92, 97, 103, 110, 118, 126, 136,
+ 136, 67, 75, 86, 96, 109, 104, 100, 101, 102, 106, 111, 118, 125, 134,
+ 143, 143, 98, 99, 100, 109, 119, 117, 115, 115, 114, 118, 121, 127, 134,
+ 142, 151, 151, 101, 97, 94, 104, 117, 120, 123, 124, 126, 129, 133, 139,
+ 145, 153, 161, 161, 104, 96, 89, 100, 115, 123, 132, 136, 140, 144, 147,
+ 153, 158, 165, 173, 173, 108, 99, 91, 101, 115, 124, 136, 142, 149, 154,
+ 160, 165, 171, 178, 186, 186, 113, 101, 92, 102, 114, 126, 140, 149,
+ 160, 167, 174, 180, 186, 193, 201, 201, 120, 107, 97, 106, 118, 129,
+ 144, 154, 167, 175, 184, 191, 199, 206, 214, 214, 128, 114, 103, 111,
+ 121, 133, 147, 160, 174, 184, 195, 204, 213, 221, 229, 229, 137, 122,
+ 110, 118, 127, 139, 153, 165, 180, 191, 204, 213, 224, 233, 242, 242,
+ 148, 131, 118, 125, 134, 145, 158, 171, 186, 199, 213, 224, 236, 246,
+ 256, 256, 159, 141, 126, 134, 142, 153, 165, 178, 193, 206, 221, 233,
+ 246, 256, 267, 267, 172, 152, 136, 143, 151, 161, 173, 186, 201, 214,
+ 229, 242, 256, 267, 280, 280, 172, 152, 136, 143, 151, 161, 173, 186,
+ 201, 214, 229, 242, 256, 267, 280, 280,
+ /* Size 32 */
+ 64, 60, 57, 54, 51, 58, 67, 79, 98, 99, 101, 103, 104, 106, 108, 110,
+ 113, 116, 120, 124, 128, 133, 137, 143, 148, 153, 159, 165, 172, 172,
+ 172, 172, 60, 59, 58, 56, 55, 62, 71, 82, 98, 99, 99, 100, 100, 102,
+ 103, 105, 107, 110, 113, 117, 121, 125, 129, 134, 139, 144, 149, 155,
+ 161, 161, 161, 161, 57, 58, 59, 60, 61, 67, 75, 85, 99, 98, 97, 97, 96,
+ 97, 99, 100, 101, 104, 107, 110, 114, 118, 122, 126, 131, 136, 141, 146,
+ 152, 152, 152, 152, 54, 56, 60, 63, 67, 73, 80, 89, 99, 97, 96, 94, 93,
+ 94, 94, 95, 97, 99, 102, 105, 108, 112, 115, 120, 124, 128, 133, 138,
+ 144, 144, 144, 144, 51, 55, 61, 67, 76, 81, 86, 92, 100, 97, 94, 92, 89,
+ 90, 91, 91, 92, 95, 97, 100, 103, 106, 110, 113, 118, 122, 126, 131,
+ 136, 136, 136, 136, 58, 62, 67, 73, 81, 85, 91, 97, 104, 101, 99, 97,
+ 94, 95, 96, 96, 97, 99, 101, 104, 107, 110, 113, 117, 121, 125, 130,
+ 134, 140, 140, 140, 140, 67, 71, 75, 80, 86, 91, 96, 102, 109, 106, 104,
+ 102, 100, 101, 101, 102, 102, 104, 106, 109, 111, 114, 118, 121, 125,
+ 129, 134, 138, 143, 143, 143, 143, 79, 82, 85, 89, 92, 97, 102, 108,
+ 114, 112, 110, 109, 107, 107, 107, 108, 108, 110, 112, 114, 116, 119,
+ 122, 126, 129, 133, 138, 142, 147, 147, 147, 147, 98, 98, 99, 99, 100,
+ 104, 109, 114, 119, 118, 117, 116, 115, 115, 115, 114, 114, 116, 118,
+ 119, 121, 124, 127, 130, 134, 138, 142, 146, 151, 151, 151, 151, 99, 99,
+ 98, 97, 97, 101, 106, 112, 118, 118, 118, 119, 119, 119, 119, 120, 120,
+ 122, 123, 125, 127, 130, 133, 136, 139, 143, 147, 151, 156, 156, 156,
+ 156, 101, 99, 97, 96, 94, 99, 104, 110, 117, 118, 120, 121, 123, 124,
+ 124, 125, 126, 128, 129, 131, 133, 136, 139, 142, 145, 149, 153, 157,
+ 161, 161, 161, 161, 103, 100, 97, 94, 92, 97, 102, 109, 116, 119, 121,
+ 124, 127, 129, 130, 131, 133, 134, 136, 138, 140, 143, 145, 148, 151,
+ 155, 159, 163, 167, 167, 167, 167, 104, 100, 96, 93, 89, 94, 100, 107,
+ 115, 119, 123, 127, 132, 134, 136, 138, 140, 142, 144, 146, 147, 150,
+ 153, 155, 158, 162, 165, 169, 173, 173, 173, 173, 106, 102, 97, 94, 90,
+ 95, 101, 107, 115, 119, 124, 129, 134, 137, 139, 142, 145, 147, 149,
+ 151, 153, 156, 159, 162, 164, 168, 172, 175, 179, 179, 179, 179, 108,
+ 103, 99, 94, 91, 96, 101, 107, 115, 119, 124, 130, 136, 139, 142, 146,
+ 149, 152, 154, 157, 160, 162, 165, 168, 171, 175, 178, 182, 186, 186,
+ 186, 186, 110, 105, 100, 95, 91, 96, 102, 108, 114, 120, 125, 131, 138,
+ 142, 146, 150, 154, 157, 160, 163, 166, 169, 172, 175, 178, 182, 185,
+ 189, 193, 193, 193, 193, 113, 107, 101, 97, 92, 97, 102, 108, 114, 120,
+ 126, 133, 140, 145, 149, 154, 160, 163, 167, 170, 174, 177, 180, 183,
+ 186, 190, 193, 197, 201, 201, 201, 201, 116, 110, 104, 99, 95, 99, 104,
+ 110, 116, 122, 128, 134, 142, 147, 152, 157, 163, 167, 171, 175, 179,
+ 182, 185, 189, 192, 196, 199, 203, 207, 207, 207, 207, 120, 113, 107,
+ 102, 97, 101, 106, 112, 118, 123, 129, 136, 144, 149, 154, 160, 167,
+ 171, 175, 179, 184, 187, 191, 195, 199, 202, 206, 210, 214, 214, 214,
+ 214, 124, 117, 110, 105, 100, 104, 109, 114, 119, 125, 131, 138, 146,
+ 151, 157, 163, 170, 175, 179, 184, 190, 193, 197, 201, 206, 209, 213,
+ 217, 221, 221, 221, 221, 128, 121, 114, 108, 103, 107, 111, 116, 121,
+ 127, 133, 140, 147, 153, 160, 166, 174, 179, 184, 190, 195, 200, 204,
+ 208, 213, 217, 221, 225, 229, 229, 229, 229, 133, 125, 118, 112, 106,
+ 110, 114, 119, 124, 130, 136, 143, 150, 156, 162, 169, 177, 182, 187,
+ 193, 200, 204, 209, 213, 218, 222, 227, 231, 235, 235, 235, 235, 137,
+ 129, 122, 115, 110, 113, 118, 122, 127, 133, 139, 145, 153, 159, 165,
+ 172, 180, 185, 191, 197, 204, 209, 213, 219, 224, 228, 233, 237, 242,
+ 242, 242, 242, 143, 134, 126, 120, 113, 117, 121, 126, 130, 136, 142,
+ 148, 155, 162, 168, 175, 183, 189, 195, 201, 208, 213, 219, 224, 230,
+ 234, 239, 244, 249, 249, 249, 249, 148, 139, 131, 124, 118, 121, 125,
+ 129, 134, 139, 145, 151, 158, 164, 171, 178, 186, 192, 199, 206, 213,
+ 218, 224, 230, 236, 241, 246, 251, 256, 256, 256, 256, 153, 144, 136,
+ 128, 122, 125, 129, 133, 138, 143, 149, 155, 162, 168, 175, 182, 190,
+ 196, 202, 209, 217, 222, 228, 234, 241, 246, 251, 256, 262, 262, 262,
+ 262, 159, 149, 141, 133, 126, 130, 134, 138, 142, 147, 153, 159, 165,
+ 172, 178, 185, 193, 199, 206, 213, 221, 227, 233, 239, 246, 251, 256,
+ 262, 267, 267, 267, 267, 165, 155, 146, 138, 131, 134, 138, 142, 146,
+ 151, 157, 163, 169, 175, 182, 189, 197, 203, 210, 217, 225, 231, 237,
+ 244, 251, 256, 262, 267, 273, 273, 273, 273, 172, 161, 152, 144, 136,
+ 140, 143, 147, 151, 156, 161, 167, 173, 179, 186, 193, 201, 207, 214,
+ 221, 229, 235, 242, 249, 256, 262, 267, 273, 280, 280, 280, 280, 172,
+ 161, 152, 144, 136, 140, 143, 147, 151, 156, 161, 167, 173, 179, 186,
+ 193, 201, 207, 214, 221, 229, 235, 242, 249, 256, 262, 267, 273, 280,
+ 280, 280, 280, 172, 161, 152, 144, 136, 140, 143, 147, 151, 156, 161,
+ 167, 173, 179, 186, 193, 201, 207, 214, 221, 229, 235, 242, 249, 256,
+ 262, 267, 273, 280, 280, 280, 280, 172, 161, 152, 144, 136, 140, 143,
+ 147, 151, 156, 161, 167, 173, 179, 186, 193, 201, 207, 214, 221, 229,
+ 235, 242, 249, 256, 262, 267, 273, 280, 280, 280, 280 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 23, 40, 44, 59, 40, 50, 54, 64, 44, 54, 74, 89, 59, 64, 89, 114,
+ /* Size 8 */
+ 25, 20, 39, 42, 46, 52, 61, 72, 20, 30, 40, 36, 37, 41, 48, 56, 39, 40,
+ 49, 47, 46, 49, 55, 62, 42, 36, 47, 54, 58, 61, 66, 73, 46, 37, 46, 58,
+ 67, 73, 79, 85, 52, 41, 49, 61, 73, 83, 91, 99, 61, 48, 55, 66, 79, 91,
+ 103, 113, 72, 56, 62, 73, 85, 99, 113, 125,
+ /* Size 16 */
+ 24, 22, 19, 26, 38, 39, 41, 42, 44, 47, 51, 55, 59, 64, 69, 69, 22, 22,
+ 23, 29, 38, 38, 37, 38, 39, 42, 45, 48, 52, 56, 61, 61, 19, 23, 29, 33,
+ 39, 37, 34, 35, 36, 38, 40, 43, 46, 50, 54, 54, 26, 29, 33, 37, 42, 41,
+ 39, 39, 40, 42, 43, 46, 49, 53, 57, 57, 38, 38, 39, 42, 47, 46, 45, 45,
+ 45, 46, 48, 50, 53, 56, 60, 60, 39, 38, 37, 41, 46, 47, 48, 49, 50, 51,
+ 53, 55, 58, 61, 65, 65, 41, 37, 34, 39, 45, 48, 52, 54, 56, 57, 59, 61,
+ 64, 67, 70, 70, 42, 38, 35, 39, 45, 49, 54, 57, 60, 62, 64, 67, 69, 72,
+ 76, 76, 44, 39, 36, 40, 45, 50, 56, 60, 64, 67, 70, 73, 76, 79, 82, 82,
+ 47, 42, 38, 42, 46, 51, 57, 62, 67, 71, 75, 78, 82, 85, 89, 89, 51, 45,
+ 40, 43, 48, 53, 59, 64, 70, 75, 80, 84, 88, 92, 96, 96, 55, 48, 43, 46,
+ 50, 55, 61, 67, 73, 78, 84, 88, 93, 97, 102, 102, 59, 52, 46, 49, 53,
+ 58, 64, 69, 76, 82, 88, 93, 99, 104, 109, 109, 64, 56, 50, 53, 56, 61,
+ 67, 72, 79, 85, 92, 97, 104, 109, 114, 114, 69, 61, 54, 57, 60, 65, 70,
+ 76, 82, 89, 96, 102, 109, 114, 120, 120, 69, 61, 54, 57, 60, 65, 70, 76,
+ 82, 89, 96, 102, 109, 114, 120, 120,
+ /* Size 32 */
+ 24, 22, 21, 20, 19, 22, 25, 30, 37, 38, 39, 39, 40, 41, 42, 42, 43, 45,
+ 46, 48, 50, 51, 54, 56, 58, 60, 63, 65, 68, 68, 68, 68, 22, 22, 22, 21,
+ 21, 23, 27, 31, 37, 38, 38, 38, 38, 39, 40, 40, 41, 42, 44, 45, 47, 48,
+ 50, 52, 54, 56, 59, 61, 64, 64, 64, 64, 21, 22, 22, 22, 23, 25, 28, 32,
+ 38, 37, 37, 37, 37, 37, 38, 38, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53,
+ 55, 57, 60, 60, 60, 60, 20, 21, 22, 24, 25, 28, 30, 34, 38, 37, 36, 36,
+ 35, 36, 36, 36, 37, 38, 39, 40, 41, 43, 44, 46, 48, 50, 52, 54, 56, 56,
+ 56, 56, 19, 21, 23, 25, 29, 30, 33, 35, 38, 37, 36, 35, 34, 34, 34, 35,
+ 35, 36, 37, 38, 39, 41, 42, 44, 45, 47, 49, 51, 53, 53, 53, 53, 22, 23,
+ 25, 28, 30, 32, 34, 37, 40, 39, 38, 37, 36, 36, 36, 37, 37, 38, 39, 40,
+ 41, 42, 44, 45, 47, 49, 50, 52, 54, 54, 54, 54, 25, 27, 28, 30, 33, 34,
+ 37, 39, 42, 41, 40, 39, 38, 38, 39, 39, 39, 40, 41, 42, 43, 44, 45, 47,
+ 48, 50, 52, 54, 56, 56, 56, 56, 30, 31, 32, 34, 35, 37, 39, 41, 44, 43,
+ 42, 42, 41, 41, 41, 41, 41, 42, 43, 44, 45, 46, 47, 49, 50, 52, 54, 55,
+ 57, 57, 57, 57, 37, 37, 38, 38, 38, 40, 42, 44, 46, 46, 45, 45, 44, 44,
+ 44, 44, 44, 45, 45, 46, 47, 48, 49, 51, 52, 54, 55, 57, 59, 59, 59, 59,
+ 38, 38, 37, 37, 37, 39, 41, 43, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47,
+ 48, 48, 49, 50, 52, 53, 54, 56, 58, 59, 61, 61, 61, 61, 39, 38, 37, 36,
+ 36, 38, 40, 42, 45, 46, 46, 47, 48, 48, 48, 48, 49, 49, 50, 51, 52, 53,
+ 54, 55, 57, 58, 60, 62, 64, 64, 64, 64, 39, 38, 37, 36, 35, 37, 39, 42,
+ 45, 46, 47, 48, 49, 50, 50, 51, 52, 52, 53, 54, 55, 56, 57, 58, 59, 61,
+ 63, 64, 66, 66, 66, 66, 40, 38, 37, 35, 34, 36, 38, 41, 44, 46, 48, 49,
+ 51, 52, 53, 54, 55, 55, 56, 57, 58, 59, 60, 61, 62, 64, 65, 67, 69, 69,
+ 69, 69, 41, 39, 37, 36, 34, 36, 38, 41, 44, 46, 48, 50, 52, 53, 54, 55,
+ 57, 57, 58, 59, 60, 61, 63, 64, 65, 67, 68, 70, 71, 71, 71, 71, 42, 40,
+ 38, 36, 34, 36, 39, 41, 44, 46, 48, 50, 53, 54, 56, 57, 59, 60, 61, 62,
+ 63, 64, 65, 67, 68, 69, 71, 73, 74, 74, 74, 74, 42, 40, 38, 36, 35, 37,
+ 39, 41, 44, 46, 48, 51, 54, 55, 57, 59, 61, 62, 63, 64, 66, 67, 68, 70,
+ 71, 73, 74, 76, 78, 78, 78, 78, 43, 41, 39, 37, 35, 37, 39, 41, 44, 46,
+ 49, 52, 55, 57, 59, 61, 63, 64, 66, 67, 69, 70, 72, 73, 75, 76, 78, 79,
+ 81, 81, 81, 81, 45, 42, 40, 38, 36, 38, 40, 42, 45, 47, 49, 52, 55, 57,
+ 60, 62, 64, 66, 68, 69, 71, 73, 74, 76, 77, 79, 80, 82, 84, 84, 84, 84,
+ 46, 44, 41, 39, 37, 39, 41, 43, 45, 48, 50, 53, 56, 58, 61, 63, 66, 68,
+ 70, 71, 74, 75, 77, 78, 80, 82, 83, 85, 87, 87, 87, 87, 48, 45, 42, 40,
+ 38, 40, 42, 44, 46, 48, 51, 54, 57, 59, 62, 64, 67, 69, 71, 74, 76, 78,
+ 79, 81, 83, 85, 87, 89, 90, 90, 90, 90, 50, 47, 44, 41, 39, 41, 43, 45,
+ 47, 49, 52, 55, 58, 60, 63, 66, 69, 71, 74, 76, 79, 80, 82, 84, 87, 88,
+ 90, 92, 94, 94, 94, 94, 51, 48, 45, 43, 41, 42, 44, 46, 48, 50, 53, 56,
+ 59, 61, 64, 67, 70, 73, 75, 78, 80, 82, 85, 87, 89, 91, 93, 95, 97, 97,
+ 97, 97, 54, 50, 47, 44, 42, 44, 45, 47, 49, 52, 54, 57, 60, 63, 65, 68,
+ 72, 74, 77, 79, 82, 85, 87, 89, 92, 94, 96, 98, 100, 100, 100, 100, 56,
+ 52, 49, 46, 44, 45, 47, 49, 51, 53, 55, 58, 61, 64, 67, 70, 73, 76, 78,
+ 81, 84, 87, 89, 92, 94, 96, 99, 101, 103, 103, 103, 103, 58, 54, 51, 48,
+ 45, 47, 48, 50, 52, 54, 57, 59, 62, 65, 68, 71, 75, 77, 80, 83, 87, 89,
+ 92, 94, 97, 99, 102, 104, 107, 107, 107, 107, 60, 56, 53, 50, 47, 49,
+ 50, 52, 54, 56, 58, 61, 64, 67, 69, 73, 76, 79, 82, 85, 88, 91, 94, 96,
+ 99, 102, 104, 107, 109, 109, 109, 109, 63, 59, 55, 52, 49, 50, 52, 54,
+ 55, 58, 60, 63, 65, 68, 71, 74, 78, 80, 83, 87, 90, 93, 96, 99, 102,
+ 104, 107, 109, 112, 112, 112, 112, 65, 61, 57, 54, 51, 52, 54, 55, 57,
+ 59, 62, 64, 67, 70, 73, 76, 79, 82, 85, 89, 92, 95, 98, 101, 104, 107,
+ 109, 112, 115, 115, 115, 115, 68, 64, 60, 56, 53, 54, 56, 57, 59, 61,
+ 64, 66, 69, 71, 74, 78, 81, 84, 87, 90, 94, 97, 100, 103, 107, 109, 112,
+ 115, 118, 118, 118, 118, 68, 64, 60, 56, 53, 54, 56, 57, 59, 61, 64, 66,
+ 69, 71, 74, 78, 81, 84, 87, 90, 94, 97, 100, 103, 107, 109, 112, 115,
+ 118, 118, 118, 118, 68, 64, 60, 56, 53, 54, 56, 57, 59, 61, 64, 66, 69,
+ 71, 74, 78, 81, 84, 87, 90, 94, 97, 100, 103, 107, 109, 112, 115, 118,
+ 118, 118, 118, 68, 64, 60, 56, 53, 54, 56, 57, 59, 61, 64, 66, 69, 71,
+ 74, 78, 81, 84, 87, 90, 94, 97, 100, 103, 107, 109, 112, 115, 118, 118,
+ 118, 118 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 70, 120, 197, 70, 109, 156, 218, 120, 156, 229, 287, 197, 218, 287,
+ 344,
+ /* Size 8 */
+ 64, 47, 51, 69, 94, 126, 161, 197, 47, 55, 52, 62, 80, 105, 135, 167,
+ 51, 52, 75, 88, 105, 127, 154, 183, 69, 62, 88, 115, 136, 157, 181, 207,
+ 94, 80, 105, 136, 165, 189, 212, 234, 126, 105, 127, 157, 189, 216, 240,
+ 261, 161, 135, 154, 181, 212, 240, 264, 284, 197, 167, 183, 207, 234,
+ 261, 284, 303,
+ /* Size 16 */
+ 64, 54, 47, 49, 51, 59, 69, 80, 94, 108, 126, 141, 161, 177, 197, 197,
+ 54, 53, 51, 51, 52, 58, 65, 74, 87, 99, 115, 129, 147, 162, 181, 181,
+ 47, 51, 55, 53, 52, 57, 62, 70, 80, 91, 105, 118, 135, 149, 167, 167,
+ 49, 51, 53, 57, 62, 67, 73, 81, 91, 102, 115, 128, 144, 158, 175, 175,
+ 51, 52, 52, 62, 75, 81, 88, 96, 105, 115, 127, 139, 154, 167, 183, 183,
+ 59, 58, 57, 67, 81, 89, 100, 108, 118, 128, 140, 152, 166, 179, 195,
+ 195, 69, 65, 62, 73, 88, 100, 115, 124, 136, 146, 157, 168, 181, 193,
+ 207, 207, 80, 74, 70, 81, 96, 108, 124, 136, 149, 159, 172, 183, 195,
+ 207, 220, 220, 94, 87, 80, 91, 105, 118, 136, 149, 165, 176, 189, 200,
+ 212, 222, 234, 234, 108, 99, 91, 102, 115, 128, 146, 159, 176, 188, 202,
+ 213, 225, 235, 247, 247, 126, 115, 105, 115, 127, 140, 157, 172, 189,
+ 202, 216, 228, 240, 250, 261, 261, 141, 129, 118, 128, 139, 152, 168,
+ 183, 200, 213, 228, 239, 251, 261, 272, 272, 161, 147, 135, 144, 154,
+ 166, 181, 195, 212, 225, 240, 251, 264, 273, 284, 284, 177, 162, 149,
+ 158, 167, 179, 193, 207, 222, 235, 250, 261, 273, 283, 293, 293, 197,
+ 181, 167, 175, 183, 195, 207, 220, 234, 247, 261, 272, 284, 293, 303,
+ 303, 197, 181, 167, 175, 183, 195, 207, 220, 234, 247, 261, 272, 284,
+ 293, 303, 303,
+ /* Size 32 */
+ 64, 59, 54, 51, 47, 48, 49, 50, 51, 55, 59, 64, 69, 74, 80, 86, 94, 101,
+ 108, 116, 126, 133, 141, 150, 161, 169, 177, 186, 197, 197, 197, 197,
+ 59, 56, 54, 51, 49, 50, 50, 51, 52, 55, 58, 62, 67, 72, 77, 83, 90, 96,
+ 103, 111, 120, 127, 135, 143, 153, 161, 169, 178, 189, 189, 189, 189,
+ 54, 54, 53, 52, 51, 51, 51, 52, 52, 55, 58, 61, 65, 69, 74, 80, 87, 92,
+ 99, 106, 115, 121, 129, 137, 147, 154, 162, 171, 181, 181, 181, 181, 51,
+ 51, 52, 52, 53, 53, 52, 52, 52, 55, 57, 60, 63, 67, 72, 77, 83, 89, 95,
+ 102, 110, 116, 123, 131, 141, 148, 155, 164, 174, 174, 174, 174, 47, 49,
+ 51, 53, 55, 54, 53, 53, 52, 54, 57, 59, 62, 66, 70, 75, 80, 85, 91, 98,
+ 105, 111, 118, 126, 135, 142, 149, 158, 167, 167, 167, 167, 48, 50, 51,
+ 53, 54, 55, 55, 56, 57, 59, 61, 64, 67, 71, 75, 80, 85, 90, 96, 102,
+ 110, 116, 123, 130, 139, 146, 153, 162, 171, 171, 171, 171, 49, 50, 51,
+ 52, 53, 55, 57, 59, 62, 64, 67, 70, 73, 77, 81, 86, 91, 96, 102, 108,
+ 115, 121, 128, 135, 144, 150, 158, 166, 175, 175, 175, 175, 50, 51, 52,
+ 52, 53, 56, 59, 63, 68, 70, 73, 76, 80, 84, 88, 92, 97, 102, 108, 114,
+ 121, 127, 133, 140, 148, 155, 162, 170, 179, 179, 179, 179, 51, 52, 52,
+ 52, 52, 57, 62, 68, 75, 78, 81, 84, 88, 92, 96, 100, 105, 109, 115, 120,
+ 127, 133, 139, 146, 154, 160, 167, 175, 183, 183, 183, 183, 55, 55, 55,
+ 55, 54, 59, 64, 70, 78, 81, 85, 89, 94, 98, 102, 106, 111, 116, 121,
+ 127, 133, 139, 145, 152, 160, 166, 173, 181, 189, 189, 189, 189, 59, 58,
+ 58, 57, 57, 61, 67, 73, 81, 85, 89, 94, 100, 104, 108, 113, 118, 123,
+ 128, 134, 140, 146, 152, 159, 166, 173, 179, 187, 195, 195, 195, 195,
+ 64, 62, 61, 60, 59, 64, 70, 76, 84, 89, 94, 100, 107, 111, 116, 121,
+ 126, 131, 137, 142, 148, 154, 160, 166, 173, 180, 186, 193, 201, 201,
+ 201, 201, 69, 67, 65, 63, 62, 67, 73, 80, 88, 94, 100, 107, 115, 119,
+ 124, 130, 136, 141, 146, 151, 157, 163, 168, 175, 181, 187, 193, 200,
+ 207, 207, 207, 207, 74, 72, 69, 67, 66, 71, 77, 84, 92, 98, 104, 111,
+ 119, 124, 130, 136, 142, 147, 152, 158, 164, 170, 175, 181, 188, 194,
+ 200, 206, 213, 213, 213, 213, 80, 77, 74, 72, 70, 75, 81, 88, 96, 102,
+ 108, 116, 124, 130, 136, 142, 149, 154, 159, 165, 172, 177, 183, 189,
+ 195, 201, 207, 213, 220, 220, 220, 220, 86, 83, 80, 77, 75, 80, 86, 92,
+ 100, 106, 113, 121, 130, 136, 142, 149, 156, 162, 167, 173, 180, 185,
+ 191, 197, 203, 209, 214, 220, 227, 227, 227, 227, 94, 90, 87, 83, 80,
+ 85, 91, 97, 105, 111, 118, 126, 136, 142, 149, 156, 165, 170, 176, 182,
+ 189, 194, 200, 205, 212, 217, 222, 228, 234, 234, 234, 234, 101, 96, 92,
+ 89, 85, 90, 96, 102, 109, 116, 123, 131, 141, 147, 154, 162, 170, 176,
+ 182, 188, 195, 200, 206, 212, 218, 223, 229, 234, 240, 240, 240, 240,
+ 108, 103, 99, 95, 91, 96, 102, 108, 115, 121, 128, 137, 146, 152, 159,
+ 167, 176, 182, 188, 195, 202, 207, 213, 219, 225, 230, 235, 241, 247,
+ 247, 247, 247, 116, 111, 106, 102, 98, 102, 108, 114, 120, 127, 134,
+ 142, 151, 158, 165, 173, 182, 188, 195, 201, 209, 214, 220, 226, 232,
+ 237, 242, 248, 254, 254, 254, 254, 126, 120, 115, 110, 105, 110, 115,
+ 121, 127, 133, 140, 148, 157, 164, 172, 180, 189, 195, 202, 209, 216,
+ 222, 228, 234, 240, 245, 250, 255, 261, 261, 261, 261, 133, 127, 121,
+ 116, 111, 116, 121, 127, 133, 139, 146, 154, 163, 170, 177, 185, 194,
+ 200, 207, 214, 222, 227, 233, 239, 245, 250, 255, 261, 266, 266, 266,
+ 266, 141, 135, 129, 123, 118, 123, 128, 133, 139, 145, 152, 160, 168,
+ 175, 183, 191, 200, 206, 213, 220, 228, 233, 239, 245, 251, 256, 261,
+ 266, 272, 272, 272, 272, 150, 143, 137, 131, 126, 130, 135, 140, 146,
+ 152, 159, 166, 175, 181, 189, 197, 205, 212, 219, 226, 234, 239, 245,
+ 251, 257, 262, 267, 272, 278, 278, 278, 278, 161, 153, 147, 141, 135,
+ 139, 144, 148, 154, 160, 166, 173, 181, 188, 195, 203, 212, 218, 225,
+ 232, 240, 245, 251, 257, 264, 268, 273, 278, 284, 284, 284, 284, 169,
+ 161, 154, 148, 142, 146, 150, 155, 160, 166, 173, 180, 187, 194, 201,
+ 209, 217, 223, 230, 237, 245, 250, 256, 262, 268, 273, 278, 283, 288,
+ 288, 288, 288, 177, 169, 162, 155, 149, 153, 158, 162, 167, 173, 179,
+ 186, 193, 200, 207, 214, 222, 229, 235, 242, 250, 255, 261, 267, 273,
+ 278, 283, 288, 293, 293, 293, 293, 186, 178, 171, 164, 158, 162, 166,
+ 170, 175, 181, 187, 193, 200, 206, 213, 220, 228, 234, 241, 248, 255,
+ 261, 266, 272, 278, 283, 288, 293, 298, 298, 298, 298, 197, 189, 181,
+ 174, 167, 171, 175, 179, 183, 189, 195, 201, 207, 213, 220, 227, 234,
+ 240, 247, 254, 261, 266, 272, 278, 284, 288, 293, 298, 303, 303, 303,
+ 303, 197, 189, 181, 174, 167, 171, 175, 179, 183, 189, 195, 201, 207,
+ 213, 220, 227, 234, 240, 247, 254, 261, 266, 272, 278, 284, 288, 293,
+ 298, 303, 303, 303, 303, 197, 189, 181, 174, 167, 171, 175, 179, 183,
+ 189, 195, 201, 207, 213, 220, 227, 234, 240, 247, 254, 261, 266, 272,
+ 278, 284, 288, 293, 298, 303, 303, 303, 303, 197, 189, 181, 174, 167,
+ 171, 175, 179, 183, 189, 195, 201, 207, 213, 220, 227, 234, 240, 247,
+ 254, 261, 266, 272, 278, 284, 288, 293, 298, 303, 303, 303, 303 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 19, 21, 37, 63, 21, 33, 49, 70, 37, 49, 74, 96, 63, 70, 96, 119,
+ /* Size 8 */
+ 23, 17, 18, 25, 34, 47, 61, 77, 17, 19, 18, 22, 29, 38, 50, 64, 18, 18,
+ 27, 32, 38, 47, 58, 71, 25, 22, 32, 42, 51, 60, 70, 81, 34, 29, 38, 51,
+ 63, 73, 83, 94, 47, 38, 47, 60, 73, 85, 96, 106, 61, 50, 58, 70, 83, 96,
+ 108, 118, 77, 64, 71, 81, 94, 106, 118, 127,
+ /* Size 16 */
+ 22, 18, 16, 17, 17, 20, 23, 27, 33, 38, 45, 51, 58, 65, 73, 73, 18, 18,
+ 17, 17, 18, 20, 22, 25, 30, 34, 40, 46, 53, 59, 67, 67, 16, 17, 19, 18,
+ 18, 19, 21, 24, 28, 32, 37, 42, 48, 54, 61, 61, 17, 17, 18, 19, 21, 23,
+ 25, 28, 31, 35, 40, 45, 52, 57, 64, 64, 17, 18, 18, 21, 26, 28, 31, 33,
+ 37, 40, 45, 50, 55, 61, 68, 68, 20, 20, 19, 23, 28, 31, 35, 38, 42, 46,
+ 50, 55, 61, 66, 72, 72, 23, 22, 21, 25, 31, 35, 40, 44, 48, 52, 57, 61,
+ 67, 72, 78, 78, 27, 25, 24, 28, 33, 38, 44, 48, 54, 58, 63, 67, 73, 78,
+ 83, 83, 33, 30, 28, 31, 37, 42, 48, 54, 60, 64, 70, 74, 80, 84, 90, 90,
+ 38, 34, 32, 35, 40, 46, 52, 58, 64, 69, 75, 80, 85, 90, 95, 95, 45, 40,
+ 37, 40, 45, 50, 57, 63, 70, 75, 82, 87, 92, 97, 102, 102, 51, 46, 42,
+ 45, 50, 55, 61, 67, 74, 80, 87, 92, 97, 102, 107, 107, 58, 53, 48, 52,
+ 55, 61, 67, 73, 80, 85, 92, 97, 103, 107, 112, 112, 65, 59, 54, 57, 61,
+ 66, 72, 78, 84, 90, 97, 102, 107, 112, 117, 117, 73, 67, 61, 64, 68, 72,
+ 78, 83, 90, 95, 102, 107, 112, 117, 122, 122, 73, 67, 61, 64, 68, 72,
+ 78, 83, 90, 95, 102, 107, 112, 117, 122, 122,
+ /* Size 32 */
+ 21, 19, 18, 17, 16, 16, 16, 17, 17, 18, 20, 21, 23, 25, 27, 29, 32, 34,
+ 37, 40, 44, 46, 49, 53, 57, 60, 63, 67, 72, 72, 72, 72, 19, 19, 18, 17,
+ 16, 16, 17, 17, 17, 18, 19, 21, 22, 24, 26, 28, 31, 33, 35, 38, 41, 44,
+ 47, 50, 54, 57, 60, 64, 68, 68, 68, 68, 18, 18, 17, 17, 17, 17, 17, 17,
+ 17, 18, 19, 20, 22, 23, 25, 27, 29, 31, 34, 36, 39, 42, 45, 48, 52, 54,
+ 57, 61, 65, 65, 65, 65, 17, 17, 17, 17, 17, 17, 17, 17, 17, 18, 19, 20,
+ 21, 22, 24, 26, 28, 30, 32, 35, 38, 40, 43, 46, 49, 52, 55, 58, 62, 62,
+ 62, 62, 16, 16, 17, 17, 18, 18, 18, 17, 17, 18, 19, 20, 21, 22, 23, 25,
+ 27, 29, 31, 33, 36, 38, 41, 44, 47, 50, 53, 56, 60, 60, 60, 60, 16, 16,
+ 17, 17, 18, 18, 18, 18, 19, 19, 20, 21, 22, 24, 25, 27, 29, 31, 33, 35,
+ 38, 40, 42, 45, 49, 51, 54, 57, 61, 61, 61, 61, 16, 17, 17, 17, 18, 18,
+ 19, 20, 20, 21, 22, 23, 24, 26, 27, 29, 31, 33, 35, 37, 40, 42, 44, 47,
+ 50, 53, 56, 59, 63, 63, 63, 63, 17, 17, 17, 17, 17, 18, 20, 21, 22, 23,
+ 24, 26, 27, 28, 30, 31, 33, 35, 37, 39, 42, 44, 46, 49, 52, 55, 58, 61,
+ 64, 64, 64, 64, 17, 17, 17, 17, 17, 19, 20, 22, 25, 26, 27, 28, 30, 31,
+ 32, 34, 36, 37, 39, 42, 44, 46, 49, 51, 54, 57, 60, 63, 66, 66, 66, 66,
+ 18, 18, 18, 18, 18, 19, 21, 23, 26, 27, 29, 30, 32, 33, 35, 36, 38, 40,
+ 42, 44, 46, 49, 51, 54, 57, 59, 62, 65, 68, 68, 68, 68, 20, 19, 19, 19,
+ 19, 20, 22, 24, 27, 29, 30, 32, 34, 35, 37, 39, 41, 43, 45, 47, 49, 51,
+ 54, 56, 59, 62, 64, 67, 71, 71, 71, 71, 21, 21, 20, 20, 20, 21, 23, 26,
+ 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 57, 59, 62, 64,
+ 67, 70, 73, 73, 73, 73, 23, 22, 22, 21, 21, 22, 24, 27, 30, 32, 34, 36,
+ 39, 41, 43, 45, 47, 49, 51, 53, 56, 58, 60, 62, 65, 68, 70, 73, 76, 76,
+ 76, 76, 25, 24, 23, 22, 22, 24, 26, 28, 31, 33, 35, 38, 41, 43, 45, 47,
+ 50, 52, 54, 56, 58, 60, 63, 65, 68, 70, 73, 76, 78, 78, 78, 78, 27, 26,
+ 25, 24, 23, 25, 27, 30, 32, 35, 37, 40, 43, 45, 47, 50, 52, 54, 56, 59,
+ 61, 63, 66, 68, 71, 73, 76, 78, 81, 81, 81, 81, 29, 28, 27, 26, 25, 27,
+ 29, 31, 34, 36, 39, 42, 45, 47, 50, 52, 55, 57, 60, 62, 65, 67, 69, 72,
+ 74, 76, 79, 82, 84, 84, 84, 84, 32, 31, 29, 28, 27, 29, 31, 33, 36, 38,
+ 41, 44, 47, 50, 52, 55, 58, 61, 63, 66, 68, 70, 73, 75, 78, 80, 82, 85,
+ 88, 88, 88, 88, 34, 33, 31, 30, 29, 31, 33, 35, 37, 40, 43, 46, 49, 52,
+ 54, 57, 61, 63, 65, 68, 71, 73, 75, 78, 80, 83, 85, 88, 90, 90, 90, 90,
+ 37, 35, 34, 32, 31, 33, 35, 37, 39, 42, 45, 48, 51, 54, 56, 60, 63, 65,
+ 68, 71, 74, 76, 78, 81, 83, 86, 88, 90, 93, 93, 93, 93, 40, 38, 36, 35,
+ 33, 35, 37, 39, 42, 44, 47, 50, 53, 56, 59, 62, 66, 68, 71, 73, 77, 79,
+ 81, 84, 87, 89, 91, 93, 96, 96, 96, 96, 44, 41, 39, 38, 36, 38, 40, 42,
+ 44, 46, 49, 52, 56, 58, 61, 65, 68, 71, 74, 77, 80, 82, 85, 87, 90, 92,
+ 94, 97, 99, 99, 99, 99, 46, 44, 42, 40, 38, 40, 42, 44, 46, 49, 51, 54,
+ 58, 60, 63, 67, 70, 73, 76, 79, 82, 84, 87, 90, 92, 95, 97, 99, 102,
+ 102, 102, 102, 49, 47, 45, 43, 41, 42, 44, 46, 49, 51, 54, 57, 60, 63,
+ 66, 69, 73, 75, 78, 81, 85, 87, 89, 92, 95, 97, 99, 102, 104, 104, 104,
+ 104, 53, 50, 48, 46, 44, 45, 47, 49, 51, 54, 56, 59, 62, 65, 68, 72, 75,
+ 78, 81, 84, 87, 90, 92, 95, 98, 100, 102, 105, 107, 107, 107, 107, 57,
+ 54, 52, 49, 47, 49, 50, 52, 54, 57, 59, 62, 65, 68, 71, 74, 78, 80, 83,
+ 87, 90, 92, 95, 98, 101, 103, 105, 107, 110, 110, 110, 110, 60, 57, 54,
+ 52, 50, 51, 53, 55, 57, 59, 62, 64, 68, 70, 73, 76, 80, 83, 86, 89, 92,
+ 95, 97, 100, 103, 105, 107, 110, 112, 112, 112, 112, 63, 60, 57, 55, 53,
+ 54, 56, 58, 60, 62, 64, 67, 70, 73, 76, 79, 82, 85, 88, 91, 94, 97, 99,
+ 102, 105, 107, 109, 112, 114, 114, 114, 114, 67, 64, 61, 58, 56, 57, 59,
+ 61, 63, 65, 67, 70, 73, 76, 78, 82, 85, 88, 90, 93, 97, 99, 102, 105,
+ 107, 110, 112, 114, 117, 117, 117, 117, 72, 68, 65, 62, 60, 61, 63, 64,
+ 66, 68, 71, 73, 76, 78, 81, 84, 88, 90, 93, 96, 99, 102, 104, 107, 110,
+ 112, 114, 117, 119, 119, 119, 119, 72, 68, 65, 62, 60, 61, 63, 64, 66,
+ 68, 71, 73, 76, 78, 81, 84, 88, 90, 93, 96, 99, 102, 104, 107, 110, 112,
+ 114, 117, 119, 119, 119, 119, 72, 68, 65, 62, 60, 61, 63, 64, 66, 68,
+ 71, 73, 76, 78, 81, 84, 88, 90, 93, 96, 99, 102, 104, 107, 110, 112,
+ 114, 117, 119, 119, 119, 119, 72, 68, 65, 62, 60, 61, 63, 64, 66, 68,
+ 71, 73, 76, 78, 81, 84, 88, 90, 93, 96, 99, 102, 104, 107, 110, 112,
+ 114, 117, 119, 119, 119, 119 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 104, 114, 146, 104, 126, 136, 158, 114, 136, 178, 207, 146, 158,
+ 207, 250,
+ /* Size 8 */
+ 64, 51, 96, 102, 109, 123, 141, 161, 51, 75, 98, 88, 91, 100, 114, 131,
+ 96, 98, 116, 111, 111, 117, 128, 143, 102, 88, 111, 127, 134, 140, 150,
+ 163, 109, 91, 111, 134, 151, 163, 174, 186, 123, 100, 117, 140, 163,
+ 181, 196, 209, 141, 114, 128, 150, 174, 196, 214, 230, 161, 131, 143,
+ 163, 186, 209, 230, 248,
+ /* Size 16 */
+ 64, 57, 51, 67, 96, 99, 102, 106, 109, 116, 123, 132, 141, 151, 161,
+ 161, 57, 59, 61, 75, 97, 96, 94, 97, 99, 105, 111, 118, 126, 135, 144,
+ 144, 51, 61, 75, 85, 98, 92, 88, 89, 91, 95, 100, 107, 114, 122, 131,
+ 131, 67, 75, 85, 94, 106, 102, 98, 99, 100, 104, 108, 114, 121, 128,
+ 137, 137, 96, 97, 98, 106, 116, 113, 111, 111, 111, 114, 117, 123, 128,
+ 135, 143, 143, 99, 96, 92, 102, 113, 116, 119, 120, 121, 125, 128, 133,
+ 138, 145, 152, 152, 102, 94, 88, 98, 111, 119, 127, 131, 134, 137, 140,
+ 145, 150, 156, 163, 163, 106, 97, 89, 99, 111, 120, 131, 136, 142, 146,
+ 151, 156, 161, 167, 173, 173, 109, 99, 91, 100, 111, 121, 134, 142, 151,
+ 157, 163, 168, 174, 179, 186, 186, 116, 105, 95, 104, 114, 125, 137,
+ 146, 157, 164, 172, 178, 184, 190, 196, 196, 123, 111, 100, 108, 117,
+ 128, 140, 151, 163, 172, 181, 188, 196, 202, 209, 209, 132, 118, 107,
+ 114, 123, 133, 145, 156, 168, 178, 188, 196, 205, 211, 219, 219, 141,
+ 126, 114, 121, 128, 138, 150, 161, 174, 184, 196, 205, 214, 222, 230,
+ 230, 151, 135, 122, 128, 135, 145, 156, 167, 179, 190, 202, 211, 222,
+ 230, 238, 238, 161, 144, 131, 137, 143, 152, 163, 173, 186, 196, 209,
+ 219, 230, 238, 248, 248, 161, 144, 131, 137, 143, 152, 163, 173, 186,
+ 196, 209, 219, 230, 238, 248, 248,
+ /* Size 32 */
+ 64, 60, 57, 54, 51, 58, 67, 79, 96, 97, 99, 100, 102, 104, 106, 107,
+ 109, 113, 116, 120, 123, 127, 132, 136, 141, 146, 151, 156, 161, 161,
+ 161, 161, 60, 59, 58, 57, 56, 62, 70, 81, 96, 97, 97, 98, 98, 99, 101,
+ 102, 104, 107, 110, 113, 117, 120, 124, 129, 133, 137, 142, 147, 152,
+ 152, 152, 152, 57, 58, 59, 60, 61, 67, 75, 84, 97, 96, 96, 95, 94, 96,
+ 97, 98, 99, 102, 105, 107, 111, 114, 118, 122, 126, 130, 135, 139, 144,
+ 144, 144, 144, 54, 57, 60, 63, 67, 73, 79, 87, 97, 96, 94, 92, 91, 92,
+ 93, 94, 95, 97, 100, 102, 105, 108, 112, 116, 120, 124, 128, 132, 137,
+ 137, 137, 137, 51, 56, 61, 67, 75, 80, 85, 91, 98, 95, 92, 90, 88, 89,
+ 89, 90, 91, 93, 95, 98, 100, 103, 107, 110, 114, 118, 122, 126, 131,
+ 131, 131, 131, 58, 62, 67, 73, 80, 84, 89, 95, 102, 99, 97, 95, 93, 93,
+ 94, 94, 95, 97, 99, 102, 104, 107, 110, 114, 117, 121, 125, 129, 134,
+ 134, 134, 134, 67, 70, 75, 79, 85, 89, 94, 100, 106, 104, 102, 100, 98,
+ 99, 99, 99, 100, 102, 104, 106, 108, 111, 114, 117, 121, 124, 128, 132,
+ 137, 137, 137, 137, 79, 81, 84, 87, 91, 95, 100, 105, 110, 109, 107,
+ 106, 104, 105, 105, 105, 105, 107, 109, 111, 112, 115, 118, 121, 125,
+ 128, 132, 136, 140, 140, 140, 140, 96, 96, 97, 97, 98, 102, 106, 110,
+ 116, 114, 113, 112, 111, 111, 111, 111, 111, 112, 114, 116, 117, 120,
+ 123, 125, 128, 132, 135, 139, 143, 143, 143, 143, 97, 97, 96, 96, 95,
+ 99, 104, 109, 114, 115, 115, 115, 115, 115, 115, 116, 116, 117, 119,
+ 121, 122, 125, 128, 130, 133, 137, 140, 144, 148, 148, 148, 148, 99, 97,
+ 96, 94, 92, 97, 102, 107, 113, 115, 116, 117, 119, 119, 120, 121, 121,
+ 123, 125, 126, 128, 130, 133, 136, 138, 142, 145, 149, 152, 152, 152,
+ 152, 100, 98, 95, 92, 90, 95, 100, 106, 112, 115, 117, 120, 123, 124,
+ 125, 126, 127, 129, 131, 132, 134, 136, 139, 141, 144, 147, 150, 154,
+ 157, 157, 157, 157, 102, 98, 94, 91, 88, 93, 98, 104, 111, 115, 119,
+ 123, 127, 129, 131, 132, 134, 136, 137, 139, 140, 143, 145, 147, 150,
+ 153, 156, 159, 163, 163, 163, 163, 104, 99, 96, 92, 89, 93, 99, 105,
+ 111, 115, 119, 124, 129, 131, 133, 136, 138, 140, 142, 144, 146, 148,
+ 150, 153, 155, 158, 161, 164, 168, 168, 168, 168, 106, 101, 97, 93, 89,
+ 94, 99, 105, 111, 115, 120, 125, 131, 133, 136, 139, 142, 144, 146, 149,
+ 151, 153, 156, 158, 161, 164, 167, 170, 173, 173, 173, 173, 107, 102,
+ 98, 94, 90, 94, 99, 105, 111, 116, 121, 126, 132, 136, 139, 143, 147,
+ 149, 151, 154, 157, 159, 162, 164, 167, 170, 173, 176, 179, 179, 179,
+ 179, 109, 104, 99, 95, 91, 95, 100, 105, 111, 116, 121, 127, 134, 138,
+ 142, 147, 151, 154, 157, 160, 163, 166, 168, 171, 174, 177, 179, 182,
+ 186, 186, 186, 186, 113, 107, 102, 97, 93, 97, 102, 107, 112, 117, 123,
+ 129, 136, 140, 144, 149, 154, 157, 160, 164, 167, 170, 173, 176, 179,
+ 182, 185, 188, 191, 191, 191, 191, 116, 110, 105, 100, 95, 99, 104, 109,
+ 114, 119, 125, 131, 137, 142, 146, 151, 157, 160, 164, 168, 172, 175,
+ 178, 181, 184, 187, 190, 193, 196, 196, 196, 196, 120, 113, 107, 102,
+ 98, 102, 106, 111, 116, 121, 126, 132, 139, 144, 149, 154, 160, 164,
+ 168, 172, 176, 180, 183, 186, 190, 193, 196, 199, 202, 202, 202, 202,
+ 123, 117, 111, 105, 100, 104, 108, 112, 117, 122, 128, 134, 140, 146,
+ 151, 157, 163, 167, 172, 176, 181, 185, 188, 192, 196, 199, 202, 205,
+ 209, 209, 209, 209, 127, 120, 114, 108, 103, 107, 111, 115, 120, 125,
+ 130, 136, 143, 148, 153, 159, 166, 170, 175, 180, 185, 188, 192, 196,
+ 200, 203, 207, 210, 214, 214, 214, 214, 132, 124, 118, 112, 107, 110,
+ 114, 118, 123, 128, 133, 139, 145, 150, 156, 162, 168, 173, 178, 183,
+ 188, 192, 196, 200, 205, 208, 211, 215, 219, 219, 219, 219, 136, 129,
+ 122, 116, 110, 114, 117, 121, 125, 130, 136, 141, 147, 153, 158, 164,
+ 171, 176, 181, 186, 192, 196, 200, 205, 209, 213, 216, 220, 224, 224,
+ 224, 224, 141, 133, 126, 120, 114, 117, 121, 125, 128, 133, 138, 144,
+ 150, 155, 161, 167, 174, 179, 184, 190, 196, 200, 205, 209, 214, 218,
+ 222, 226, 230, 230, 230, 230, 146, 137, 130, 124, 118, 121, 124, 128,
+ 132, 137, 142, 147, 153, 158, 164, 170, 177, 182, 187, 193, 199, 203,
+ 208, 213, 218, 222, 226, 230, 234, 234, 234, 234, 151, 142, 135, 128,
+ 122, 125, 128, 132, 135, 140, 145, 150, 156, 161, 167, 173, 179, 185,
+ 190, 196, 202, 207, 211, 216, 222, 226, 230, 234, 238, 238, 238, 238,
+ 156, 147, 139, 132, 126, 129, 132, 136, 139, 144, 149, 154, 159, 164,
+ 170, 176, 182, 188, 193, 199, 205, 210, 215, 220, 226, 230, 234, 238,
+ 243, 243, 243, 243, 161, 152, 144, 137, 131, 134, 137, 140, 143, 148,
+ 152, 157, 163, 168, 173, 179, 186, 191, 196, 202, 209, 214, 219, 224,
+ 230, 234, 238, 243, 248, 248, 248, 248, 161, 152, 144, 137, 131, 134,
+ 137, 140, 143, 148, 152, 157, 163, 168, 173, 179, 186, 191, 196, 202,
+ 209, 214, 219, 224, 230, 234, 238, 243, 248, 248, 248, 248, 161, 152,
+ 144, 137, 131, 134, 137, 140, 143, 148, 152, 157, 163, 168, 173, 179,
+ 186, 191, 196, 202, 209, 214, 219, 224, 230, 234, 238, 243, 248, 248,
+ 248, 248, 161, 152, 144, 137, 131, 134, 137, 140, 143, 148, 152, 157,
+ 163, 168, 173, 179, 186, 191, 196, 202, 209, 214, 219, 224, 230, 234,
+ 238, 243, 248, 248, 248, 248 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 25, 42, 46, 60, 42, 51, 55, 65, 46, 55, 75, 88, 60, 65, 88, 109,
+ /* Size 8 */
+ 27, 22, 41, 44, 48, 54, 63, 72, 22, 32, 42, 38, 39, 43, 50, 58, 41, 42,
+ 50, 49, 48, 51, 57, 64, 44, 38, 49, 56, 59, 62, 67, 73, 48, 39, 48, 59,
+ 67, 73, 78, 84, 54, 43, 51, 62, 73, 82, 90, 96, 63, 50, 57, 67, 78, 90,
+ 99, 107, 72, 58, 64, 73, 84, 96, 107, 117,
+ /* Size 16 */
+ 26, 23, 21, 27, 40, 41, 43, 44, 46, 49, 52, 56, 61, 65, 70, 70, 23, 24,
+ 25, 31, 40, 40, 39, 40, 42, 44, 47, 50, 54, 58, 62, 62, 21, 25, 31, 35,
+ 41, 39, 37, 37, 38, 40, 42, 45, 48, 52, 56, 56, 27, 31, 35, 39, 45, 43,
+ 41, 41, 42, 44, 46, 48, 51, 55, 58, 58, 40, 40, 41, 45, 49, 48, 47, 47,
+ 47, 48, 50, 52, 55, 58, 62, 62, 41, 40, 39, 43, 48, 49, 50, 51, 52, 53,
+ 54, 57, 59, 62, 66, 66, 43, 39, 37, 41, 47, 50, 54, 56, 57, 59, 60, 62,
+ 65, 68, 71, 71, 44, 40, 37, 41, 47, 51, 56, 58, 61, 63, 65, 67, 70, 73,
+ 76, 76, 46, 42, 38, 42, 47, 52, 57, 61, 65, 68, 71, 73, 76, 79, 82, 82,
+ 49, 44, 40, 44, 48, 53, 59, 63, 68, 71, 75, 78, 81, 84, 87, 87, 52, 47,
+ 42, 46, 50, 54, 60, 65, 71, 75, 80, 83, 87, 90, 93, 93, 56, 50, 45, 48,
+ 52, 57, 62, 67, 73, 78, 83, 87, 91, 95, 98, 98, 61, 54, 48, 51, 55, 59,
+ 65, 70, 76, 81, 87, 91, 96, 100, 104, 104, 65, 58, 52, 55, 58, 62, 68,
+ 73, 79, 84, 90, 95, 100, 104, 108, 108, 70, 62, 56, 58, 62, 66, 71, 76,
+ 82, 87, 93, 98, 104, 108, 113, 113, 70, 62, 56, 58, 62, 66, 71, 76, 82,
+ 87, 93, 98, 104, 108, 113, 113,
+ /* Size 32 */
+ 26, 24, 23, 22, 21, 23, 27, 32, 39, 40, 41, 41, 42, 43, 44, 44, 45, 47,
+ 48, 50, 52, 53, 55, 57, 60, 62, 64, 66, 69, 69, 69, 69, 24, 24, 23, 23,
+ 22, 25, 29, 33, 40, 40, 40, 40, 40, 41, 42, 42, 43, 44, 46, 47, 49, 50,
+ 52, 54, 56, 58, 60, 62, 65, 65, 65, 65, 23, 23, 24, 24, 25, 27, 30, 34,
+ 40, 40, 39, 39, 39, 39, 40, 40, 41, 42, 43, 45, 46, 47, 49, 51, 53, 55,
+ 57, 59, 61, 61, 61, 61, 22, 23, 24, 26, 27, 30, 32, 36, 40, 39, 39, 38,
+ 37, 38, 38, 39, 39, 40, 41, 42, 43, 45, 46, 48, 50, 52, 54, 56, 58, 58,
+ 58, 58, 21, 22, 25, 27, 31, 33, 35, 37, 40, 39, 38, 37, 36, 36, 37, 37,
+ 37, 38, 39, 40, 41, 43, 44, 46, 47, 49, 51, 53, 55, 55, 55, 55, 23, 25,
+ 27, 30, 33, 34, 37, 39, 42, 41, 40, 39, 38, 38, 39, 39, 39, 40, 41, 42,
+ 43, 44, 46, 47, 49, 50, 52, 54, 56, 56, 56, 56, 27, 29, 30, 32, 35, 37,
+ 39, 41, 44, 43, 42, 41, 40, 41, 41, 41, 41, 42, 43, 44, 45, 46, 47, 49,
+ 50, 52, 54, 56, 58, 58, 58, 58, 32, 33, 34, 36, 37, 39, 41, 43, 46, 45,
+ 44, 44, 43, 43, 43, 43, 43, 44, 45, 46, 47, 48, 49, 51, 52, 54, 55, 57,
+ 59, 59, 59, 59, 39, 40, 40, 40, 40, 42, 44, 46, 48, 48, 47, 47, 46, 46,
+ 46, 46, 46, 47, 47, 48, 49, 50, 51, 52, 54, 55, 57, 59, 61, 61, 61, 61,
+ 40, 40, 40, 39, 39, 41, 43, 45, 48, 48, 48, 48, 48, 48, 48, 48, 48, 49,
+ 50, 50, 51, 52, 53, 55, 56, 57, 59, 61, 63, 63, 63, 63, 41, 40, 39, 39,
+ 38, 40, 42, 44, 47, 48, 48, 49, 50, 50, 50, 50, 51, 51, 52, 53, 54, 55,
+ 56, 57, 58, 60, 61, 63, 65, 65, 65, 65, 41, 40, 39, 38, 37, 39, 41, 44,
+ 47, 48, 49, 50, 51, 52, 52, 53, 53, 54, 55, 56, 56, 57, 58, 60, 61, 62,
+ 64, 65, 67, 67, 67, 67, 42, 40, 39, 37, 36, 38, 40, 43, 46, 48, 50, 51,
+ 53, 54, 55, 56, 56, 57, 58, 59, 59, 60, 61, 62, 64, 65, 66, 68, 69, 69,
+ 69, 69, 43, 41, 39, 38, 36, 38, 41, 43, 46, 48, 50, 52, 54, 55, 56, 57,
+ 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 72, 72, 72, 72, 44, 42,
+ 40, 38, 37, 39, 41, 43, 46, 48, 50, 52, 55, 56, 57, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 69, 70, 71, 73, 74, 74, 74, 74, 44, 42, 40, 39, 37, 39,
+ 41, 43, 46, 48, 50, 53, 56, 57, 59, 60, 62, 63, 64, 66, 67, 68, 69, 70,
+ 72, 73, 74, 76, 77, 77, 77, 77, 45, 43, 41, 39, 37, 39, 41, 43, 46, 48,
+ 51, 53, 56, 58, 60, 62, 64, 65, 67, 68, 70, 71, 72, 73, 75, 76, 77, 79,
+ 80, 80, 80, 80, 47, 44, 42, 40, 38, 40, 42, 44, 47, 49, 51, 54, 57, 59,
+ 61, 63, 65, 67, 68, 70, 72, 73, 74, 76, 77, 78, 80, 81, 83, 83, 83, 83,
+ 48, 46, 43, 41, 39, 41, 43, 45, 47, 50, 52, 55, 58, 60, 62, 64, 67, 68,
+ 70, 72, 74, 75, 77, 78, 80, 81, 83, 84, 86, 86, 86, 86, 50, 47, 45, 42,
+ 40, 42, 44, 46, 48, 50, 53, 56, 59, 61, 63, 66, 68, 70, 72, 74, 76, 77,
+ 79, 81, 82, 84, 85, 87, 89, 89, 89, 89, 52, 49, 46, 43, 41, 43, 45, 47,
+ 49, 51, 54, 56, 59, 62, 64, 67, 70, 72, 74, 76, 78, 80, 82, 83, 85, 87,
+ 88, 90, 92, 92, 92, 92, 53, 50, 47, 45, 43, 44, 46, 48, 50, 52, 55, 57,
+ 60, 63, 65, 68, 71, 73, 75, 77, 80, 82, 83, 85, 87, 89, 91, 92, 94, 94,
+ 94, 94, 55, 52, 49, 46, 44, 46, 47, 49, 51, 53, 56, 58, 61, 64, 66, 69,
+ 72, 74, 77, 79, 82, 83, 85, 87, 90, 91, 93, 95, 97, 97, 97, 97, 57, 54,
+ 51, 48, 46, 47, 49, 51, 52, 55, 57, 60, 62, 65, 67, 70, 73, 76, 78, 81,
+ 83, 85, 87, 90, 92, 94, 95, 97, 99, 99, 99, 99, 60, 56, 53, 50, 47, 49,
+ 50, 52, 54, 56, 58, 61, 64, 66, 69, 72, 75, 77, 80, 82, 85, 87, 90, 92,
+ 94, 96, 98, 100, 102, 102, 102, 102, 62, 58, 55, 52, 49, 50, 52, 54, 55,
+ 57, 60, 62, 65, 67, 70, 73, 76, 78, 81, 84, 87, 89, 91, 94, 96, 98, 100,
+ 102, 104, 104, 104, 104, 64, 60, 57, 54, 51, 52, 54, 55, 57, 59, 61, 64,
+ 66, 69, 71, 74, 77, 80, 83, 85, 88, 91, 93, 95, 98, 100, 102, 104, 107,
+ 107, 107, 107, 66, 62, 59, 56, 53, 54, 56, 57, 59, 61, 63, 65, 68, 70,
+ 73, 76, 79, 81, 84, 87, 90, 92, 95, 97, 100, 102, 104, 107, 109, 109,
+ 109, 109, 69, 65, 61, 58, 55, 56, 58, 59, 61, 63, 65, 67, 69, 72, 74,
+ 77, 80, 83, 86, 89, 92, 94, 97, 99, 102, 104, 107, 109, 111, 111, 111,
+ 111, 69, 65, 61, 58, 55, 56, 58, 59, 61, 63, 65, 67, 69, 72, 74, 77, 80,
+ 83, 86, 89, 92, 94, 97, 99, 102, 104, 107, 109, 111, 111, 111, 111, 69,
+ 65, 61, 58, 55, 56, 58, 59, 61, 63, 65, 67, 69, 72, 74, 77, 80, 83, 86,
+ 89, 92, 94, 97, 99, 102, 104, 107, 109, 111, 111, 111, 111, 69, 65, 61,
+ 58, 55, 56, 58, 59, 61, 63, 65, 67, 69, 72, 74, 77, 80, 83, 86, 89, 92,
+ 94, 97, 99, 102, 104, 107, 109, 111, 111, 111, 111 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 70, 116, 182, 70, 106, 148, 199, 116, 148, 207, 251, 182, 199, 251,
+ 292,
+ /* Size 8 */
+ 64, 48, 52, 69, 92, 120, 150, 179, 48, 55, 53, 62, 79, 102, 128, 155,
+ 52, 53, 74, 87, 101, 121, 144, 168, 69, 62, 87, 110, 129, 147, 166, 186,
+ 92, 79, 101, 129, 153, 172, 190, 207, 120, 102, 121, 147, 172, 194, 211,
+ 226, 150, 128, 144, 166, 190, 211, 228, 242, 179, 155, 168, 186, 207,
+ 226, 242, 255,
+ /* Size 16 */
+ 64, 55, 48, 50, 52, 59, 69, 79, 92, 104, 120, 133, 150, 163, 179, 179,
+ 55, 53, 51, 52, 52, 58, 65, 74, 85, 96, 110, 123, 138, 151, 166, 166,
+ 48, 51, 55, 54, 53, 57, 62, 70, 79, 89, 102, 113, 128, 140, 155, 155,
+ 50, 52, 54, 58, 62, 67, 72, 80, 89, 99, 111, 122, 135, 147, 161, 161,
+ 52, 52, 53, 62, 74, 80, 87, 93, 101, 110, 121, 131, 144, 155, 168, 168,
+ 59, 58, 57, 67, 80, 88, 97, 105, 113, 122, 133, 143, 154, 165, 177, 177,
+ 69, 65, 62, 72, 87, 97, 110, 119, 129, 137, 147, 156, 166, 176, 186,
+ 186, 79, 74, 70, 80, 93, 105, 119, 128, 140, 149, 158, 167, 177, 186,
+ 196, 196, 92, 85, 79, 89, 101, 113, 129, 140, 153, 162, 172, 181, 190,
+ 198, 207, 207, 104, 96, 89, 99, 110, 122, 137, 149, 162, 171, 182, 191,
+ 200, 208, 216, 216, 120, 110, 102, 111, 121, 133, 147, 158, 172, 182,
+ 194, 202, 211, 218, 226, 226, 133, 123, 113, 122, 131, 143, 156, 167,
+ 181, 191, 202, 210, 219, 227, 234, 234, 150, 138, 128, 135, 144, 154,
+ 166, 177, 190, 200, 211, 219, 228, 235, 242, 242, 163, 151, 140, 147,
+ 155, 165, 176, 186, 198, 208, 218, 227, 235, 242, 249, 249, 179, 166,
+ 155, 161, 168, 177, 186, 196, 207, 216, 226, 234, 242, 249, 255, 255,
+ 179, 166, 155, 161, 168, 177, 186, 196, 207, 216, 226, 234, 242, 249,
+ 255, 255,
+ /* Size 32 */
+ 64, 59, 55, 51, 48, 49, 50, 51, 52, 55, 59, 64, 69, 73, 79, 85, 92, 98,
+ 104, 112, 120, 126, 133, 141, 150, 156, 163, 170, 179, 179, 179, 179,
+ 59, 56, 54, 52, 50, 50, 51, 52, 52, 55, 59, 62, 67, 71, 76, 82, 89, 94,
+ 100, 107, 115, 121, 128, 135, 144, 150, 156, 164, 172, 172, 172, 172,
+ 55, 54, 53, 52, 51, 52, 52, 52, 52, 55, 58, 61, 65, 69, 74, 79, 85, 90,
+ 96, 103, 110, 116, 123, 130, 138, 144, 151, 158, 166, 166, 166, 166, 51,
+ 52, 52, 53, 53, 53, 53, 53, 53, 55, 58, 60, 63, 67, 72, 77, 82, 87, 93,
+ 99, 106, 112, 118, 125, 133, 139, 145, 152, 160, 160, 160, 160, 48, 50,
+ 51, 53, 55, 55, 54, 53, 53, 55, 57, 59, 62, 66, 70, 74, 79, 84, 89, 95,
+ 102, 107, 113, 120, 128, 134, 140, 147, 155, 155, 155, 155, 49, 50, 52,
+ 53, 55, 55, 56, 56, 57, 59, 61, 64, 67, 70, 74, 79, 84, 89, 94, 100,
+ 106, 112, 117, 124, 131, 137, 144, 150, 158, 158, 158, 158, 50, 51, 52,
+ 53, 54, 56, 58, 60, 62, 64, 67, 69, 72, 76, 80, 84, 89, 94, 99, 104,
+ 111, 116, 122, 128, 135, 141, 147, 154, 161, 161, 161, 161, 51, 52, 52,
+ 53, 53, 56, 60, 63, 67, 70, 73, 76, 79, 82, 86, 90, 95, 99, 104, 110,
+ 116, 121, 126, 133, 139, 145, 151, 157, 164, 164, 164, 164, 52, 52, 52,
+ 53, 53, 57, 62, 67, 74, 77, 80, 83, 87, 90, 93, 97, 101, 106, 110, 115,
+ 121, 126, 131, 137, 144, 149, 155, 161, 168, 168, 168, 168, 55, 55, 55,
+ 55, 55, 59, 64, 70, 77, 80, 84, 87, 92, 95, 99, 103, 107, 111, 116, 121,
+ 126, 131, 137, 142, 149, 154, 160, 166, 172, 172, 172, 172, 59, 59, 58,
+ 58, 57, 61, 67, 73, 80, 84, 88, 92, 97, 101, 105, 109, 113, 118, 122,
+ 127, 133, 137, 143, 148, 154, 159, 165, 170, 177, 177, 177, 177, 64, 62,
+ 61, 60, 59, 64, 69, 76, 83, 87, 92, 97, 103, 107, 111, 116, 121, 125,
+ 129, 134, 139, 144, 149, 154, 160, 165, 170, 176, 181, 181, 181, 181,
+ 69, 67, 65, 63, 62, 67, 72, 79, 87, 92, 97, 103, 110, 114, 119, 124,
+ 129, 133, 137, 142, 147, 151, 156, 161, 166, 171, 176, 181, 186, 186,
+ 186, 186, 73, 71, 69, 67, 66, 70, 76, 82, 90, 95, 101, 107, 114, 119,
+ 123, 129, 134, 138, 143, 147, 152, 157, 161, 166, 172, 176, 181, 186,
+ 191, 191, 191, 191, 79, 76, 74, 72, 70, 74, 80, 86, 93, 99, 105, 111,
+ 119, 123, 128, 134, 140, 144, 149, 153, 158, 163, 167, 172, 177, 182,
+ 186, 191, 196, 196, 196, 196, 85, 82, 79, 77, 74, 79, 84, 90, 97, 103,
+ 109, 116, 124, 129, 134, 140, 146, 150, 155, 160, 165, 169, 174, 178,
+ 183, 188, 192, 197, 202, 202, 202, 202, 92, 89, 85, 82, 79, 84, 89, 95,
+ 101, 107, 113, 121, 129, 134, 140, 146, 153, 157, 162, 167, 172, 176,
+ 181, 185, 190, 194, 198, 203, 207, 207, 207, 207, 98, 94, 90, 87, 84,
+ 89, 94, 99, 106, 111, 118, 125, 133, 138, 144, 150, 157, 162, 167, 172,
+ 177, 181, 186, 190, 195, 199, 203, 207, 212, 212, 212, 212, 104, 100,
+ 96, 93, 89, 94, 99, 104, 110, 116, 122, 129, 137, 143, 149, 155, 162,
+ 167, 171, 177, 182, 186, 191, 195, 200, 204, 208, 212, 216, 216, 216,
+ 216, 112, 107, 103, 99, 95, 100, 104, 110, 115, 121, 127, 134, 142, 147,
+ 153, 160, 167, 172, 177, 182, 188, 192, 196, 201, 205, 209, 213, 217,
+ 221, 221, 221, 221, 120, 115, 110, 106, 102, 106, 111, 116, 121, 126,
+ 133, 139, 147, 152, 158, 165, 172, 177, 182, 188, 194, 198, 202, 206,
+ 211, 215, 218, 222, 226, 226, 226, 226, 126, 121, 116, 112, 107, 112,
+ 116, 121, 126, 131, 137, 144, 151, 157, 163, 169, 176, 181, 186, 192,
+ 198, 202, 206, 211, 215, 219, 222, 226, 230, 230, 230, 230, 133, 128,
+ 123, 118, 113, 117, 122, 126, 131, 137, 143, 149, 156, 161, 167, 174,
+ 181, 186, 191, 196, 202, 206, 210, 215, 219, 223, 227, 230, 234, 234,
+ 234, 234, 141, 135, 130, 125, 120, 124, 128, 133, 137, 142, 148, 154,
+ 161, 166, 172, 178, 185, 190, 195, 201, 206, 211, 215, 219, 224, 227,
+ 231, 234, 238, 238, 238, 238, 150, 144, 138, 133, 128, 131, 135, 139,
+ 144, 149, 154, 160, 166, 172, 177, 183, 190, 195, 200, 205, 211, 215,
+ 219, 224, 228, 232, 235, 239, 242, 242, 242, 242, 156, 150, 144, 139,
+ 134, 137, 141, 145, 149, 154, 159, 165, 171, 176, 182, 188, 194, 199,
+ 204, 209, 215, 219, 223, 227, 232, 235, 238, 242, 246, 246, 246, 246,
+ 163, 156, 151, 145, 140, 144, 147, 151, 155, 160, 165, 170, 176, 181,
+ 186, 192, 198, 203, 208, 213, 218, 222, 227, 231, 235, 238, 242, 245,
+ 249, 249, 249, 249, 170, 164, 158, 152, 147, 150, 154, 157, 161, 166,
+ 170, 176, 181, 186, 191, 197, 203, 207, 212, 217, 222, 226, 230, 234,
+ 239, 242, 245, 249, 252, 252, 252, 252, 179, 172, 166, 160, 155, 158,
+ 161, 164, 168, 172, 177, 181, 186, 191, 196, 202, 207, 212, 216, 221,
+ 226, 230, 234, 238, 242, 246, 249, 252, 255, 255, 255, 255, 179, 172,
+ 166, 160, 155, 158, 161, 164, 168, 172, 177, 181, 186, 191, 196, 202,
+ 207, 212, 216, 221, 226, 230, 234, 238, 242, 246, 249, 252, 255, 255,
+ 255, 255, 179, 172, 166, 160, 155, 158, 161, 164, 168, 172, 177, 181,
+ 186, 191, 196, 202, 207, 212, 216, 221, 226, 230, 234, 238, 242, 246,
+ 249, 252, 255, 255, 255, 255, 179, 172, 166, 160, 155, 158, 161, 164,
+ 168, 172, 177, 181, 186, 191, 196, 202, 207, 212, 216, 221, 226, 230,
+ 234, 238, 242, 246, 249, 252, 255, 255, 255, 255 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 21, 23, 40, 65, 23, 36, 52, 72, 40, 52, 75, 93, 65, 72, 93, 111,
+ /* Size 8 */
+ 26, 19, 21, 28, 38, 50, 63, 77, 19, 22, 21, 25, 32, 42, 53, 66, 21, 21,
+ 30, 35, 42, 50, 61, 72, 28, 25, 35, 46, 54, 62, 71, 81, 38, 32, 42, 54,
+ 65, 74, 83, 92, 50, 42, 50, 62, 74, 85, 94, 101, 63, 53, 61, 71, 83, 94,
+ 103, 110, 77, 66, 72, 81, 92, 101, 110, 117,
+ /* Size 16 */
+ 25, 21, 18, 19, 20, 23, 26, 31, 36, 41, 48, 54, 61, 67, 74, 74, 21, 20,
+ 20, 20, 20, 22, 25, 29, 33, 38, 44, 49, 56, 61, 68, 68, 18, 20, 21, 21,
+ 20, 22, 24, 27, 31, 35, 40, 45, 51, 57, 63, 63, 19, 20, 21, 22, 24, 26,
+ 28, 31, 35, 39, 44, 49, 55, 60, 66, 66, 20, 20, 20, 24, 29, 31, 34, 37,
+ 40, 44, 48, 53, 58, 63, 69, 69, 23, 22, 22, 26, 31, 34, 38, 41, 45, 49,
+ 53, 58, 63, 68, 73, 73, 26, 25, 24, 28, 34, 38, 44, 47, 52, 55, 60, 64,
+ 68, 73, 78, 78, 31, 29, 27, 31, 37, 41, 47, 52, 57, 60, 65, 69, 74, 78,
+ 83, 83, 36, 33, 31, 35, 40, 45, 52, 57, 62, 67, 71, 75, 80, 84, 88, 88,
+ 41, 38, 35, 39, 44, 49, 55, 60, 67, 71, 76, 80, 84, 88, 92, 92, 48, 44,
+ 40, 44, 48, 53, 60, 65, 71, 76, 81, 85, 90, 94, 97, 97, 54, 49, 45, 49,
+ 53, 58, 64, 69, 75, 80, 85, 90, 94, 98, 101, 101, 61, 56, 51, 55, 58,
+ 63, 68, 74, 80, 84, 90, 94, 99, 102, 106, 106, 67, 61, 57, 60, 63, 68,
+ 73, 78, 84, 88, 94, 98, 102, 105, 109, 109, 74, 68, 63, 66, 69, 73, 78,
+ 83, 88, 92, 97, 101, 106, 109, 112, 112, 74, 68, 63, 66, 69, 73, 78, 83,
+ 88, 92, 97, 101, 106, 109, 112, 112,
+ /* Size 32 */
+ 24, 22, 21, 19, 18, 18, 19, 19, 19, 21, 22, 24, 26, 28, 30, 32, 35, 38,
+ 40, 43, 47, 50, 53, 56, 60, 63, 66, 69, 73, 73, 73, 73, 22, 21, 20, 19,
+ 19, 19, 19, 19, 19, 21, 22, 23, 25, 27, 29, 31, 34, 36, 39, 41, 45, 47,
+ 50, 53, 57, 60, 63, 66, 70, 70, 70, 70, 21, 20, 20, 19, 19, 19, 19, 19,
+ 20, 21, 22, 23, 25, 26, 28, 30, 33, 35, 37, 40, 43, 45, 48, 51, 55, 57,
+ 60, 63, 67, 67, 67, 67, 19, 19, 19, 20, 20, 20, 20, 20, 20, 21, 22, 23,
+ 24, 25, 27, 29, 31, 33, 36, 38, 41, 43, 46, 49, 52, 55, 58, 61, 64, 64,
+ 64, 64, 18, 19, 19, 20, 21, 20, 20, 20, 20, 21, 21, 22, 23, 25, 26, 28,
+ 30, 32, 34, 37, 39, 42, 44, 47, 50, 53, 56, 59, 62, 62, 62, 62, 18, 19,
+ 19, 20, 20, 21, 21, 21, 21, 22, 23, 24, 25, 27, 28, 30, 32, 34, 36, 38,
+ 41, 43, 46, 49, 52, 54, 57, 60, 63, 63, 63, 63, 19, 19, 19, 20, 20, 21,
+ 22, 22, 23, 24, 25, 26, 27, 29, 30, 32, 34, 36, 38, 40, 43, 45, 48, 50,
+ 53, 56, 59, 62, 65, 65, 65, 65, 19, 19, 19, 20, 20, 21, 22, 24, 25, 26,
+ 27, 29, 30, 31, 33, 35, 36, 38, 40, 43, 45, 47, 50, 52, 55, 58, 60, 63,
+ 66, 66, 66, 66, 19, 19, 20, 20, 20, 21, 23, 25, 28, 29, 30, 32, 33, 34,
+ 36, 37, 39, 41, 43, 45, 47, 49, 52, 54, 57, 59, 62, 65, 68, 68, 68, 68,
+ 21, 21, 21, 21, 21, 22, 24, 26, 29, 30, 32, 33, 35, 37, 38, 40, 42, 43,
+ 45, 47, 50, 52, 54, 57, 59, 62, 64, 67, 70, 70, 70, 70, 22, 22, 22, 22,
+ 21, 23, 25, 27, 30, 32, 34, 35, 37, 39, 40, 42, 44, 46, 48, 50, 52, 54,
+ 57, 59, 62, 64, 66, 69, 72, 72, 72, 72, 24, 23, 23, 23, 22, 24, 26, 29,
+ 32, 33, 35, 38, 40, 42, 43, 45, 47, 49, 51, 53, 55, 57, 59, 62, 64, 66,
+ 69, 71, 74, 74, 74, 74, 26, 25, 25, 24, 23, 25, 27, 30, 33, 35, 37, 40,
+ 43, 45, 46, 48, 51, 52, 54, 56, 58, 60, 62, 65, 67, 69, 71, 74, 76, 76,
+ 76, 76, 28, 27, 26, 25, 25, 27, 29, 31, 34, 37, 39, 42, 45, 46, 48, 51,
+ 53, 55, 57, 59, 61, 63, 65, 67, 70, 72, 74, 76, 79, 79, 79, 79, 30, 29,
+ 28, 27, 26, 28, 30, 33, 36, 38, 40, 43, 46, 48, 51, 53, 55, 57, 59, 61,
+ 64, 66, 68, 70, 72, 74, 76, 79, 81, 81, 81, 81, 32, 31, 30, 29, 28, 30,
+ 32, 35, 37, 40, 42, 45, 48, 51, 53, 55, 58, 60, 62, 64, 67, 69, 71, 73,
+ 75, 77, 79, 81, 83, 83, 83, 83, 35, 34, 33, 31, 30, 32, 34, 36, 39, 42,
+ 44, 47, 51, 53, 55, 58, 61, 63, 65, 67, 70, 72, 74, 76, 78, 80, 82, 84,
+ 86, 86, 86, 86, 38, 36, 35, 33, 32, 34, 36, 38, 41, 43, 46, 49, 52, 55,
+ 57, 60, 63, 65, 67, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 88, 88, 88,
+ 40, 39, 37, 36, 34, 36, 38, 40, 43, 45, 48, 51, 54, 57, 59, 62, 65, 67,
+ 70, 72, 74, 76, 78, 81, 83, 85, 86, 88, 91, 91, 91, 91, 43, 41, 40, 38,
+ 37, 38, 40, 43, 45, 47, 50, 53, 56, 59, 61, 64, 67, 70, 72, 74, 77, 79,
+ 81, 83, 85, 87, 89, 91, 93, 93, 93, 93, 47, 45, 43, 41, 39, 41, 43, 45,
+ 47, 50, 52, 55, 58, 61, 64, 67, 70, 72, 74, 77, 80, 82, 84, 86, 88, 90,
+ 92, 94, 95, 95, 95, 95, 50, 47, 45, 43, 42, 43, 45, 47, 49, 52, 54, 57,
+ 60, 63, 66, 69, 72, 74, 76, 79, 82, 84, 86, 88, 90, 92, 94, 95, 97, 97,
+ 97, 97, 53, 50, 48, 46, 44, 46, 48, 50, 52, 54, 57, 59, 62, 65, 68, 71,
+ 74, 76, 78, 81, 84, 86, 88, 90, 92, 94, 96, 97, 99, 99, 99, 99, 56, 53,
+ 51, 49, 47, 49, 50, 52, 54, 57, 59, 62, 65, 67, 70, 73, 76, 78, 81, 83,
+ 86, 88, 90, 92, 94, 96, 98, 100, 101, 101, 101, 101, 60, 57, 55, 52, 50,
+ 52, 53, 55, 57, 59, 62, 64, 67, 70, 72, 75, 78, 80, 83, 85, 88, 90, 92,
+ 94, 97, 98, 100, 102, 104, 104, 104, 104, 63, 60, 57, 55, 53, 54, 56,
+ 58, 59, 62, 64, 66, 69, 72, 74, 77, 80, 82, 85, 87, 90, 92, 94, 96, 98,
+ 100, 102, 103, 105, 105, 105, 105, 66, 63, 60, 58, 56, 57, 59, 60, 62,
+ 64, 66, 69, 71, 74, 76, 79, 82, 84, 86, 89, 92, 94, 96, 98, 100, 102,
+ 103, 105, 107, 107, 107, 107, 69, 66, 63, 61, 59, 60, 62, 63, 65, 67,
+ 69, 71, 74, 76, 79, 81, 84, 86, 88, 91, 94, 95, 97, 100, 102, 103, 105,
+ 107, 108, 108, 108, 108, 73, 70, 67, 64, 62, 63, 65, 66, 68, 70, 72, 74,
+ 76, 79, 81, 83, 86, 88, 91, 93, 95, 97, 99, 101, 104, 105, 107, 108,
+ 110, 110, 110, 110, 73, 70, 67, 64, 62, 63, 65, 66, 68, 70, 72, 74, 76,
+ 79, 81, 83, 86, 88, 91, 93, 95, 97, 99, 101, 104, 105, 107, 108, 110,
+ 110, 110, 110, 73, 70, 67, 64, 62, 63, 65, 66, 68, 70, 72, 74, 76, 79,
+ 81, 83, 86, 88, 91, 93, 95, 97, 99, 101, 104, 105, 107, 108, 110, 110,
+ 110, 110, 73, 70, 67, 64, 62, 63, 65, 66, 68, 70, 72, 74, 76, 79, 81,
+ 83, 86, 88, 91, 93, 95, 97, 99, 101, 104, 105, 107, 108, 110, 110, 110,
+ 110 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 101, 110, 139, 101, 122, 130, 149, 110, 130, 167, 190, 139, 149,
+ 190, 225,
+ /* Size 8 */
+ 64, 52, 94, 99, 106, 119, 134, 152, 52, 75, 96, 87, 89, 98, 110, 125,
+ 94, 96, 112, 108, 108, 113, 123, 136, 99, 87, 108, 122, 128, 134, 142,
+ 153, 106, 89, 108, 128, 143, 153, 162, 172, 119, 98, 113, 134, 153, 168,
+ 180, 191, 134, 110, 123, 142, 162, 180, 195, 207, 152, 125, 136, 153,
+ 172, 191, 207, 221,
+ /* Size 16 */
+ 64, 57, 52, 67, 94, 97, 99, 103, 106, 112, 119, 126, 134, 142, 152, 152,
+ 57, 59, 61, 74, 95, 94, 93, 95, 97, 102, 107, 114, 121, 129, 137, 137,
+ 52, 61, 75, 84, 96, 91, 87, 88, 89, 93, 98, 104, 110, 117, 125, 125, 67,
+ 74, 84, 92, 103, 99, 96, 97, 97, 101, 105, 110, 116, 123, 130, 130, 94,
+ 95, 96, 103, 112, 110, 108, 108, 108, 110, 113, 118, 123, 129, 136, 136,
+ 97, 94, 91, 99, 110, 112, 115, 116, 117, 120, 123, 127, 132, 138, 144,
+ 144, 99, 93, 87, 96, 108, 115, 122, 125, 128, 131, 134, 138, 142, 147,
+ 153, 153, 103, 95, 88, 97, 108, 116, 125, 130, 135, 139, 143, 147, 151,
+ 156, 162, 162, 106, 97, 89, 97, 108, 117, 128, 135, 143, 148, 153, 157,
+ 162, 167, 172, 172, 112, 102, 93, 101, 110, 120, 131, 139, 148, 154,
+ 160, 165, 171, 176, 181, 181, 119, 107, 98, 105, 113, 123, 134, 143,
+ 153, 160, 168, 174, 180, 185, 191, 191, 126, 114, 104, 110, 118, 127,
+ 138, 147, 157, 165, 174, 180, 187, 193, 199, 199, 134, 121, 110, 116,
+ 123, 132, 142, 151, 162, 171, 180, 187, 195, 201, 207, 207, 142, 129,
+ 117, 123, 129, 138, 147, 156, 167, 176, 185, 193, 201, 207, 214, 214,
+ 152, 137, 125, 130, 136, 144, 153, 162, 172, 181, 191, 199, 207, 214,
+ 221, 221, 152, 137, 125, 130, 136, 144, 153, 162, 172, 181, 191, 199,
+ 207, 214, 221, 221,
+ /* Size 32 */
+ 64, 60, 57, 54, 52, 58, 67, 78, 94, 95, 97, 98, 99, 101, 103, 104, 106,
+ 109, 112, 115, 119, 122, 126, 130, 134, 138, 142, 147, 152, 152, 152,
+ 152, 60, 59, 58, 57, 56, 62, 70, 80, 94, 95, 95, 95, 96, 97, 99, 100,
+ 101, 104, 107, 110, 113, 116, 120, 123, 127, 131, 135, 139, 144, 144,
+ 144, 144, 57, 58, 59, 60, 61, 67, 74, 83, 95, 94, 94, 93, 93, 94, 95,
+ 96, 97, 99, 102, 104, 107, 110, 114, 117, 121, 125, 129, 133, 137, 137,
+ 137, 137, 54, 57, 60, 63, 67, 72, 79, 86, 95, 94, 92, 91, 89, 90, 91,
+ 92, 93, 95, 97, 100, 102, 105, 109, 112, 115, 119, 123, 127, 131, 131,
+ 131, 131, 52, 56, 61, 67, 75, 79, 84, 89, 96, 93, 91, 89, 87, 87, 88,
+ 88, 89, 91, 93, 96, 98, 101, 104, 107, 110, 114, 117, 121, 125, 125,
+ 125, 125, 58, 62, 67, 72, 79, 83, 88, 93, 99, 97, 95, 93, 91, 92, 92,
+ 93, 93, 95, 97, 99, 101, 104, 107, 110, 113, 117, 120, 124, 128, 128,
+ 128, 128, 67, 70, 74, 79, 84, 88, 92, 97, 103, 101, 99, 98, 96, 96, 97,
+ 97, 97, 99, 101, 103, 105, 108, 110, 113, 116, 120, 123, 127, 130, 130,
+ 130, 130, 78, 80, 83, 86, 89, 93, 97, 102, 107, 106, 104, 103, 102, 102,
+ 102, 102, 102, 104, 106, 107, 109, 112, 114, 117, 120, 123, 126, 130,
+ 133, 133, 133, 133, 94, 94, 95, 95, 96, 99, 103, 107, 112, 111, 110,
+ 109, 108, 108, 108, 108, 108, 109, 110, 112, 113, 116, 118, 121, 123,
+ 126, 129, 133, 136, 136, 136, 136, 95, 95, 94, 94, 93, 97, 101, 106,
+ 111, 111, 111, 111, 111, 111, 112, 112, 112, 113, 115, 116, 118, 120,
+ 122, 125, 127, 130, 133, 137, 140, 140, 140, 140, 97, 95, 94, 92, 91,
+ 95, 99, 104, 110, 111, 112, 113, 115, 115, 116, 116, 117, 118, 120, 121,
+ 123, 125, 127, 129, 132, 135, 138, 141, 144, 144, 144, 144, 98, 95, 93,
+ 91, 89, 93, 98, 103, 109, 111, 113, 116, 118, 119, 120, 121, 122, 124,
+ 125, 127, 128, 130, 132, 134, 137, 139, 142, 145, 148, 148, 148, 148,
+ 99, 96, 93, 89, 87, 91, 96, 102, 108, 111, 115, 118, 122, 124, 125, 127,
+ 128, 130, 131, 132, 134, 136, 138, 140, 142, 144, 147, 150, 153, 153,
+ 153, 153, 101, 97, 94, 90, 87, 92, 96, 102, 108, 111, 115, 119, 124,
+ 125, 127, 129, 132, 133, 135, 136, 138, 140, 142, 144, 146, 149, 152,
+ 154, 157, 157, 157, 157, 103, 99, 95, 91, 88, 92, 97, 102, 108, 112,
+ 116, 120, 125, 127, 130, 133, 135, 137, 139, 141, 143, 145, 147, 149,
+ 151, 154, 156, 159, 162, 162, 162, 162, 104, 100, 96, 92, 88, 93, 97,
+ 102, 108, 112, 116, 121, 127, 129, 133, 136, 139, 141, 143, 145, 148,
+ 150, 152, 154, 156, 159, 161, 164, 167, 167, 167, 167, 106, 101, 97, 93,
+ 89, 93, 97, 102, 108, 112, 117, 122, 128, 132, 135, 139, 143, 145, 148,
+ 150, 153, 155, 157, 160, 162, 164, 167, 169, 172, 172, 172, 172, 109,
+ 104, 99, 95, 91, 95, 99, 104, 109, 113, 118, 124, 130, 133, 137, 141,
+ 145, 148, 151, 154, 157, 159, 161, 164, 166, 169, 171, 174, 176, 176,
+ 176, 176, 112, 107, 102, 97, 93, 97, 101, 106, 110, 115, 120, 125, 131,
+ 135, 139, 143, 148, 151, 154, 157, 160, 163, 165, 168, 171, 173, 176,
+ 178, 181, 181, 181, 181, 115, 110, 104, 100, 96, 99, 103, 107, 112, 116,
+ 121, 127, 132, 136, 141, 145, 150, 154, 157, 161, 164, 167, 170, 172,
+ 175, 178, 180, 183, 186, 186, 186, 186, 119, 113, 107, 102, 98, 101,
+ 105, 109, 113, 118, 123, 128, 134, 138, 143, 148, 153, 157, 160, 164,
+ 168, 171, 174, 177, 180, 183, 185, 188, 191, 191, 191, 191, 122, 116,
+ 110, 105, 101, 104, 108, 112, 116, 120, 125, 130, 136, 140, 145, 150,
+ 155, 159, 163, 167, 171, 174, 177, 180, 184, 186, 189, 192, 195, 195,
+ 195, 195, 126, 120, 114, 109, 104, 107, 110, 114, 118, 122, 127, 132,
+ 138, 142, 147, 152, 157, 161, 165, 170, 174, 177, 180, 184, 187, 190,
+ 193, 196, 199, 199, 199, 199, 130, 123, 117, 112, 107, 110, 113, 117,
+ 121, 125, 129, 134, 140, 144, 149, 154, 160, 164, 168, 172, 177, 180,
+ 184, 187, 191, 194, 197, 200, 203, 203, 203, 203, 134, 127, 121, 115,
+ 110, 113, 116, 120, 123, 127, 132, 137, 142, 146, 151, 156, 162, 166,
+ 171, 175, 180, 184, 187, 191, 195, 198, 201, 204, 207, 207, 207, 207,
+ 138, 131, 125, 119, 114, 117, 120, 123, 126, 130, 135, 139, 144, 149,
+ 154, 159, 164, 169, 173, 178, 183, 186, 190, 194, 198, 201, 204, 207,
+ 211, 211, 211, 211, 142, 135, 129, 123, 117, 120, 123, 126, 129, 133,
+ 138, 142, 147, 152, 156, 161, 167, 171, 176, 180, 185, 189, 193, 197,
+ 201, 204, 207, 211, 214, 214, 214, 214, 147, 139, 133, 127, 121, 124,
+ 127, 130, 133, 137, 141, 145, 150, 154, 159, 164, 169, 174, 178, 183,
+ 188, 192, 196, 200, 204, 207, 211, 214, 217, 217, 217, 217, 152, 144,
+ 137, 131, 125, 128, 130, 133, 136, 140, 144, 148, 153, 157, 162, 167,
+ 172, 176, 181, 186, 191, 195, 199, 203, 207, 211, 214, 217, 221, 221,
+ 221, 221, 152, 144, 137, 131, 125, 128, 130, 133, 136, 140, 144, 148,
+ 153, 157, 162, 167, 172, 176, 181, 186, 191, 195, 199, 203, 207, 211,
+ 214, 217, 221, 221, 221, 221, 152, 144, 137, 131, 125, 128, 130, 133,
+ 136, 140, 144, 148, 153, 157, 162, 167, 172, 176, 181, 186, 191, 195,
+ 199, 203, 207, 211, 214, 217, 221, 221, 221, 221, 152, 144, 137, 131,
+ 125, 128, 130, 133, 136, 140, 144, 148, 153, 157, 162, 167, 172, 176,
+ 181, 186, 191, 195, 199, 203, 207, 211, 214, 217, 221, 221, 221, 221 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 27, 44, 48, 61, 44, 53, 57, 66, 48, 57, 74, 86, 61, 66, 86, 103,
+ /* Size 8 */
+ 29, 23, 43, 46, 49, 56, 63, 72, 23, 34, 44, 40, 41, 45, 51, 59, 43, 44,
+ 52, 50, 50, 53, 58, 64, 46, 40, 50, 57, 60, 63, 67, 73, 49, 41, 50, 60,
+ 68, 73, 78, 83, 56, 45, 53, 63, 73, 81, 88, 93, 63, 51, 58, 67, 78, 88,
+ 96, 102, 72, 59, 64, 73, 83, 93, 102, 110,
+ /* Size 16 */
+ 28, 25, 23, 29, 42, 43, 45, 46, 48, 51, 54, 58, 62, 66, 70, 70, 25, 26,
+ 27, 33, 42, 42, 42, 43, 44, 46, 49, 52, 55, 59, 63, 63, 23, 27, 33, 37,
+ 43, 41, 39, 39, 40, 42, 44, 47, 50, 53, 57, 57, 29, 33, 37, 41, 46, 45,
+ 43, 44, 44, 46, 47, 50, 53, 56, 60, 60, 42, 42, 43, 46, 51, 50, 49, 49,
+ 49, 50, 51, 54, 56, 59, 63, 63, 43, 42, 41, 45, 50, 51, 52, 53, 53, 55,
+ 56, 58, 61, 63, 67, 67, 45, 42, 39, 43, 49, 52, 56, 57, 59, 60, 61, 63,
+ 65, 68, 71, 71, 46, 43, 39, 44, 49, 53, 57, 60, 62, 64, 66, 68, 70, 73,
+ 75, 75, 48, 44, 40, 44, 49, 53, 59, 62, 66, 69, 71, 73, 76, 78, 81, 81,
+ 51, 46, 42, 46, 50, 55, 60, 64, 69, 72, 75, 77, 80, 83, 85, 85, 54, 49,
+ 44, 47, 51, 56, 61, 66, 71, 75, 79, 82, 85, 88, 91, 91, 58, 52, 47, 50,
+ 54, 58, 63, 68, 73, 77, 82, 85, 89, 92, 95, 95, 62, 55, 50, 53, 56, 61,
+ 65, 70, 76, 80, 85, 89, 93, 96, 99, 99, 66, 59, 53, 56, 59, 63, 68, 73,
+ 78, 83, 88, 92, 96, 99, 103, 103, 70, 63, 57, 60, 63, 67, 71, 75, 81,
+ 85, 91, 95, 99, 103, 107, 107, 70, 63, 57, 60, 63, 67, 71, 75, 81, 85,
+ 91, 95, 99, 103, 107, 107,
+ /* Size 32 */
+ 28, 26, 25, 23, 22, 25, 29, 34, 41, 42, 43, 43, 44, 45, 46, 46, 47, 49,
+ 50, 52, 53, 55, 57, 59, 61, 63, 65, 67, 69, 69, 69, 69, 26, 26, 25, 25,
+ 24, 27, 31, 35, 42, 42, 42, 42, 42, 43, 44, 44, 45, 46, 48, 49, 50, 52,
+ 54, 55, 57, 59, 61, 63, 66, 66, 66, 66, 25, 25, 26, 26, 26, 29, 32, 37,
+ 42, 42, 41, 41, 41, 41, 42, 42, 43, 44, 45, 46, 48, 49, 51, 53, 54, 56,
+ 58, 60, 62, 62, 62, 62, 23, 25, 26, 28, 29, 32, 34, 38, 42, 41, 41, 40,
+ 39, 40, 40, 41, 41, 42, 43, 44, 45, 47, 48, 50, 52, 53, 55, 57, 59, 59,
+ 59, 59, 22, 24, 26, 29, 33, 35, 37, 39, 42, 41, 40, 39, 38, 38, 39, 39,
+ 39, 40, 41, 42, 43, 45, 46, 48, 49, 51, 53, 54, 56, 56, 56, 56, 25, 27,
+ 29, 32, 35, 37, 39, 41, 44, 43, 42, 41, 40, 40, 41, 41, 41, 42, 43, 44,
+ 45, 46, 48, 49, 51, 52, 54, 56, 58, 58, 58, 58, 29, 31, 32, 34, 37, 39,
+ 41, 43, 46, 45, 44, 43, 43, 43, 43, 43, 43, 44, 45, 46, 47, 48, 49, 51,
+ 52, 54, 55, 57, 59, 59, 59, 59, 34, 35, 37, 38, 39, 41, 43, 45, 48, 47,
+ 46, 46, 45, 45, 45, 45, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 57, 58,
+ 60, 60, 60, 60, 41, 42, 42, 42, 42, 44, 46, 48, 50, 49, 49, 49, 48, 48,
+ 48, 48, 48, 49, 49, 50, 51, 52, 53, 54, 55, 57, 58, 60, 62, 62, 62, 62,
+ 42, 42, 42, 41, 41, 43, 45, 47, 49, 50, 50, 50, 50, 50, 50, 50, 50, 51,
+ 51, 52, 53, 54, 55, 56, 57, 59, 60, 62, 64, 64, 64, 64, 43, 42, 41, 41,
+ 40, 42, 44, 46, 49, 50, 50, 51, 51, 52, 52, 52, 52, 53, 54, 54, 55, 56,
+ 57, 58, 60, 61, 62, 64, 65, 65, 65, 65, 43, 42, 41, 40, 39, 41, 43, 46,
+ 49, 50, 51, 52, 53, 54, 54, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63,
+ 65, 66, 68, 68, 68, 68, 44, 42, 41, 39, 38, 40, 43, 45, 48, 50, 51, 53,
+ 55, 56, 56, 57, 58, 58, 59, 60, 61, 61, 62, 63, 64, 66, 67, 68, 70, 70,
+ 70, 70, 45, 43, 41, 40, 38, 40, 43, 45, 48, 50, 52, 54, 56, 57, 57, 58,
+ 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 72, 72, 72, 46, 44,
+ 42, 40, 39, 41, 43, 45, 48, 50, 52, 54, 56, 57, 59, 60, 61, 62, 63, 64,
+ 65, 66, 67, 68, 69, 70, 72, 73, 74, 74, 74, 74, 46, 44, 42, 41, 39, 41,
+ 43, 45, 48, 50, 52, 54, 57, 58, 60, 61, 63, 64, 65, 66, 67, 68, 70, 71,
+ 72, 73, 74, 75, 77, 77, 77, 77, 47, 45, 43, 41, 39, 41, 43, 45, 48, 50,
+ 52, 55, 58, 59, 61, 63, 65, 66, 67, 69, 70, 71, 72, 73, 75, 76, 77, 78,
+ 79, 79, 79, 79, 49, 46, 44, 42, 40, 42, 44, 46, 49, 51, 53, 56, 58, 60,
+ 62, 64, 66, 68, 69, 70, 72, 73, 74, 75, 77, 78, 79, 80, 82, 82, 82, 82,
+ 50, 48, 45, 43, 41, 43, 45, 47, 49, 51, 54, 56, 59, 61, 63, 65, 67, 69,
+ 70, 72, 74, 75, 76, 78, 79, 80, 81, 83, 84, 84, 84, 84, 52, 49, 46, 44,
+ 42, 44, 46, 48, 50, 52, 54, 57, 60, 62, 64, 66, 69, 70, 72, 74, 76, 77,
+ 78, 80, 81, 83, 84, 85, 87, 87, 87, 87, 53, 50, 48, 45, 43, 45, 47, 49,
+ 51, 53, 55, 58, 61, 63, 65, 67, 70, 72, 74, 76, 78, 79, 81, 82, 84, 85,
+ 86, 88, 89, 89, 89, 89, 55, 52, 49, 47, 45, 46, 48, 50, 52, 54, 56, 59,
+ 61, 64, 66, 68, 71, 73, 75, 77, 79, 81, 82, 84, 86, 87, 88, 90, 91, 91,
+ 91, 91, 57, 54, 51, 48, 46, 48, 49, 51, 53, 55, 57, 60, 62, 65, 67, 70,
+ 72, 74, 76, 78, 81, 82, 84, 86, 87, 89, 90, 92, 93, 93, 93, 93, 59, 55,
+ 53, 50, 48, 49, 51, 52, 54, 56, 58, 61, 63, 66, 68, 71, 73, 75, 78, 80,
+ 82, 84, 86, 87, 89, 91, 92, 94, 96, 96, 96, 96, 61, 57, 54, 52, 49, 51,
+ 52, 54, 55, 57, 60, 62, 64, 67, 69, 72, 75, 77, 79, 81, 84, 86, 87, 89,
+ 91, 93, 95, 96, 98, 98, 98, 98, 63, 59, 56, 53, 51, 52, 54, 55, 57, 59,
+ 61, 63, 66, 68, 70, 73, 76, 78, 80, 83, 85, 87, 89, 91, 93, 95, 96, 98,
+ 100, 100, 100, 100, 65, 61, 58, 55, 53, 54, 55, 57, 58, 60, 62, 65, 67,
+ 69, 72, 74, 77, 79, 81, 84, 86, 88, 90, 92, 95, 96, 98, 100, 102, 102,
+ 102, 102, 67, 63, 60, 57, 54, 56, 57, 58, 60, 62, 64, 66, 68, 71, 73,
+ 75, 78, 80, 83, 85, 88, 90, 92, 94, 96, 98, 100, 102, 103, 103, 103,
+ 103, 69, 66, 62, 59, 56, 58, 59, 60, 62, 64, 65, 68, 70, 72, 74, 77, 79,
+ 82, 84, 87, 89, 91, 93, 96, 98, 100, 102, 103, 105, 105, 105, 105, 69,
+ 66, 62, 59, 56, 58, 59, 60, 62, 64, 65, 68, 70, 72, 74, 77, 79, 82, 84,
+ 87, 89, 91, 93, 96, 98, 100, 102, 103, 105, 105, 105, 105, 69, 66, 62,
+ 59, 56, 58, 59, 60, 62, 64, 65, 68, 70, 72, 74, 77, 79, 82, 84, 87, 89,
+ 91, 93, 96, 98, 100, 102, 103, 105, 105, 105, 105, 69, 66, 62, 59, 56,
+ 58, 59, 60, 62, 64, 65, 68, 70, 72, 74, 77, 79, 82, 84, 87, 89, 91, 93,
+ 96, 98, 100, 102, 103, 105, 105, 105, 105 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 70, 112, 168, 70, 103, 139, 181, 112, 139, 188, 221, 168, 181, 221,
+ 251,
+ /* Size 8 */
+ 64, 49, 53, 68, 90, 115, 139, 163, 49, 56, 53, 62, 78, 99, 121, 144, 53,
+ 53, 74, 85, 98, 115, 135, 154, 68, 62, 85, 106, 122, 137, 153, 169, 90,
+ 78, 98, 122, 142, 158, 172, 185, 115, 99, 115, 137, 158, 174, 188, 199,
+ 139, 121, 135, 153, 172, 188, 200, 210, 163, 144, 154, 169, 185, 199,
+ 210, 220,
+ /* Size 16 */
+ 64, 55, 49, 51, 53, 59, 68, 78, 90, 101, 115, 126, 139, 150, 163, 163,
+ 55, 54, 52, 52, 53, 58, 65, 73, 84, 94, 106, 117, 130, 140, 153, 153,
+ 49, 52, 56, 54, 53, 57, 62, 69, 78, 87, 99, 109, 121, 132, 144, 144, 51,
+ 52, 54, 58, 62, 66, 72, 79, 87, 96, 106, 116, 128, 137, 149, 149, 53,
+ 53, 53, 62, 74, 79, 85, 91, 98, 106, 115, 124, 135, 144, 154, 154, 59,
+ 58, 57, 66, 79, 86, 94, 101, 109, 117, 125, 134, 143, 152, 161, 161, 68,
+ 65, 62, 72, 85, 94, 106, 114, 122, 129, 137, 145, 153, 160, 169, 169,
+ 78, 73, 69, 79, 91, 101, 114, 122, 131, 139, 147, 154, 162, 169, 176,
+ 176, 90, 84, 78, 87, 98, 109, 122, 131, 142, 149, 158, 164, 172, 178,
+ 185, 185, 101, 94, 87, 96, 106, 117, 129, 139, 149, 157, 166, 172, 179,
+ 185, 191, 191, 115, 106, 99, 106, 115, 125, 137, 147, 158, 166, 174,
+ 181, 188, 193, 199, 199, 126, 117, 109, 116, 124, 134, 145, 154, 164,
+ 172, 181, 187, 194, 199, 204, 204, 139, 130, 121, 128, 135, 143, 153,
+ 162, 172, 179, 188, 194, 200, 205, 210, 210, 150, 140, 132, 137, 144,
+ 152, 160, 169, 178, 185, 193, 199, 205, 210, 215, 215, 163, 153, 144,
+ 149, 154, 161, 169, 176, 185, 191, 199, 204, 210, 215, 220, 220, 163,
+ 153, 144, 149, 154, 161, 169, 176, 185, 191, 199, 204, 210, 215, 220,
+ 220,
+ /* Size 32 */
+ 64, 59, 55, 52, 49, 50, 51, 52, 53, 56, 59, 64, 68, 73, 78, 83, 90, 95,
+ 101, 107, 115, 120, 126, 132, 139, 145, 150, 156, 163, 163, 163, 163,
+ 59, 57, 54, 52, 50, 51, 51, 52, 53, 56, 59, 63, 67, 71, 75, 81, 87, 92,
+ 97, 103, 110, 115, 121, 127, 134, 140, 145, 151, 158, 158, 158, 158, 55,
+ 54, 54, 53, 52, 52, 52, 53, 53, 55, 58, 62, 65, 69, 73, 78, 84, 88, 94,
+ 99, 106, 111, 117, 123, 130, 135, 140, 146, 153, 153, 153, 153, 52, 52,
+ 53, 53, 54, 54, 53, 53, 53, 55, 58, 61, 63, 67, 71, 76, 81, 85, 90, 96,
+ 102, 107, 113, 119, 125, 130, 136, 142, 148, 148, 148, 148, 49, 50, 52,
+ 54, 56, 55, 54, 54, 53, 55, 57, 60, 62, 65, 69, 73, 78, 83, 87, 93, 99,
+ 104, 109, 115, 121, 126, 132, 137, 144, 144, 144, 144, 50, 51, 52, 54,
+ 55, 56, 56, 57, 57, 59, 62, 64, 67, 70, 74, 78, 82, 87, 91, 97, 102,
+ 107, 112, 118, 124, 129, 134, 140, 146, 146, 146, 146, 51, 51, 52, 53,
+ 54, 56, 58, 60, 62, 64, 66, 69, 72, 75, 79, 83, 87, 91, 96, 101, 106,
+ 111, 116, 122, 128, 132, 137, 143, 149, 149, 149, 149, 52, 52, 53, 53,
+ 54, 57, 60, 63, 67, 70, 72, 75, 78, 81, 84, 88, 92, 96, 101, 105, 111,
+ 115, 120, 125, 131, 136, 140, 146, 151, 151, 151, 151, 53, 53, 53, 53,
+ 53, 57, 62, 67, 74, 76, 79, 82, 85, 88, 91, 95, 98, 102, 106, 111, 115,
+ 120, 124, 129, 135, 139, 144, 149, 154, 154, 154, 154, 56, 56, 55, 55,
+ 55, 59, 64, 70, 76, 79, 82, 86, 89, 93, 96, 99, 103, 107, 111, 115, 120,
+ 124, 129, 134, 139, 143, 148, 152, 158, 158, 158, 158, 59, 59, 58, 58,
+ 57, 62, 66, 72, 79, 82, 86, 90, 94, 98, 101, 105, 109, 113, 117, 121,
+ 125, 129, 134, 138, 143, 147, 152, 156, 161, 161, 161, 161, 64, 63, 62,
+ 61, 60, 64, 69, 75, 82, 86, 90, 95, 100, 103, 107, 111, 115, 119, 122,
+ 127, 131, 135, 139, 143, 148, 152, 156, 160, 165, 165, 165, 165, 68, 67,
+ 65, 63, 62, 67, 72, 78, 85, 89, 94, 100, 106, 110, 114, 118, 122, 125,
+ 129, 133, 137, 141, 145, 149, 153, 157, 160, 165, 169, 169, 169, 169,
+ 73, 71, 69, 67, 65, 70, 75, 81, 88, 93, 98, 103, 110, 113, 118, 122,
+ 126, 130, 134, 138, 142, 145, 149, 153, 157, 161, 164, 168, 173, 173,
+ 173, 173, 78, 75, 73, 71, 69, 74, 79, 84, 91, 96, 101, 107, 114, 118,
+ 122, 126, 131, 135, 139, 142, 147, 150, 154, 158, 162, 165, 169, 172,
+ 176, 176, 176, 176, 83, 81, 78, 76, 73, 78, 83, 88, 95, 99, 105, 111,
+ 118, 122, 126, 131, 136, 140, 144, 148, 152, 155, 159, 163, 166, 170,
+ 173, 177, 180, 180, 180, 180, 90, 87, 84, 81, 78, 82, 87, 92, 98, 103,
+ 109, 115, 122, 126, 131, 136, 142, 146, 149, 153, 158, 161, 164, 168,
+ 172, 175, 178, 181, 185, 185, 185, 185, 95, 92, 88, 85, 83, 87, 91, 96,
+ 102, 107, 113, 119, 125, 130, 135, 140, 146, 149, 153, 157, 162, 165,
+ 168, 172, 175, 178, 181, 185, 188, 188, 188, 188, 101, 97, 94, 90, 87,
+ 91, 96, 101, 106, 111, 117, 122, 129, 134, 139, 144, 149, 153, 157, 161,
+ 166, 169, 172, 176, 179, 182, 185, 188, 191, 191, 191, 191, 107, 103,
+ 99, 96, 93, 97, 101, 105, 111, 115, 121, 127, 133, 138, 142, 148, 153,
+ 157, 161, 165, 170, 173, 176, 180, 183, 186, 189, 192, 195, 195, 195,
+ 195, 115, 110, 106, 102, 99, 102, 106, 111, 115, 120, 125, 131, 137,
+ 142, 147, 152, 158, 162, 166, 170, 174, 177, 181, 184, 188, 190, 193,
+ 196, 199, 199, 199, 199, 120, 115, 111, 107, 104, 107, 111, 115, 120,
+ 124, 129, 135, 141, 145, 150, 155, 161, 165, 169, 173, 177, 181, 184,
+ 187, 191, 193, 196, 199, 202, 202, 202, 202, 126, 121, 117, 113, 109,
+ 112, 116, 120, 124, 129, 134, 139, 145, 149, 154, 159, 164, 168, 172,
+ 176, 181, 184, 187, 190, 194, 196, 199, 202, 204, 204, 204, 204, 132,
+ 127, 123, 119, 115, 118, 122, 125, 129, 134, 138, 143, 149, 153, 158,
+ 163, 168, 172, 176, 180, 184, 187, 190, 194, 197, 199, 202, 205, 207,
+ 207, 207, 207, 139, 134, 130, 125, 121, 124, 128, 131, 135, 139, 143,
+ 148, 153, 157, 162, 166, 172, 175, 179, 183, 188, 191, 194, 197, 200,
+ 203, 205, 208, 210, 210, 210, 210, 145, 140, 135, 130, 126, 129, 132,
+ 136, 139, 143, 147, 152, 157, 161, 165, 170, 175, 178, 182, 186, 190,
+ 193, 196, 199, 203, 205, 208, 210, 213, 213, 213, 213, 150, 145, 140,
+ 136, 132, 134, 137, 140, 144, 148, 152, 156, 160, 164, 169, 173, 178,
+ 181, 185, 189, 193, 196, 199, 202, 205, 208, 210, 212, 215, 215, 215,
+ 215, 156, 151, 146, 142, 137, 140, 143, 146, 149, 152, 156, 160, 165,
+ 168, 172, 177, 181, 185, 188, 192, 196, 199, 202, 205, 208, 210, 212,
+ 215, 217, 217, 217, 217, 163, 158, 153, 148, 144, 146, 149, 151, 154,
+ 158, 161, 165, 169, 173, 176, 180, 185, 188, 191, 195, 199, 202, 204,
+ 207, 210, 213, 215, 217, 220, 220, 220, 220, 163, 158, 153, 148, 144,
+ 146, 149, 151, 154, 158, 161, 165, 169, 173, 176, 180, 185, 188, 191,
+ 195, 199, 202, 204, 207, 210, 213, 215, 217, 220, 220, 220, 220, 163,
+ 158, 153, 148, 144, 146, 149, 151, 154, 158, 161, 165, 169, 173, 176,
+ 180, 185, 188, 191, 195, 199, 202, 204, 207, 210, 213, 215, 217, 220,
+ 220, 220, 220, 163, 158, 153, 148, 144, 146, 149, 151, 154, 158, 161,
+ 165, 169, 173, 176, 180, 185, 188, 191, 195, 199, 202, 204, 207, 210,
+ 213, 215, 217, 220, 220, 220, 220 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 24, 26, 43, 67, 26, 39, 54, 72, 43, 54, 75, 91, 67, 72, 91, 105,
+ /* Size 8 */
+ 28, 21, 23, 30, 41, 53, 65, 77, 21, 25, 23, 27, 35, 45, 56, 68, 23, 23,
+ 33, 38, 45, 53, 63, 73, 30, 27, 38, 49, 56, 64, 72, 81, 41, 35, 45, 56,
+ 67, 75, 82, 89, 53, 45, 53, 64, 75, 84, 91, 97, 65, 56, 63, 72, 82, 91,
+ 98, 104, 77, 68, 73, 81, 89, 97, 104, 109,
+ /* Size 16 */
+ 27, 24, 21, 21, 22, 25, 29, 34, 39, 44, 51, 56, 63, 68, 75, 75, 24, 23,
+ 22, 22, 22, 25, 28, 32, 36, 41, 47, 52, 58, 63, 70, 70, 21, 22, 24, 23,
+ 23, 24, 27, 30, 34, 38, 43, 48, 54, 59, 65, 65, 21, 22, 23, 25, 26, 28,
+ 31, 34, 38, 42, 47, 52, 57, 62, 68, 68, 22, 22, 23, 26, 32, 34, 37, 40,
+ 43, 47, 51, 55, 61, 65, 70, 70, 25, 25, 24, 28, 34, 37, 41, 44, 48, 52,
+ 56, 60, 65, 69, 74, 74, 29, 28, 27, 31, 37, 41, 47, 50, 54, 58, 62, 65,
+ 70, 74, 78, 78, 34, 32, 30, 34, 40, 44, 50, 54, 59, 62, 67, 70, 74, 78,
+ 82, 82, 39, 36, 34, 38, 43, 48, 54, 59, 64, 68, 72, 76, 79, 83, 86, 86,
+ 44, 41, 38, 42, 47, 52, 58, 62, 68, 72, 76, 80, 83, 86, 90, 90, 51, 47,
+ 43, 47, 51, 56, 62, 67, 72, 76, 81, 84, 88, 91, 94, 94, 56, 52, 48, 52,
+ 55, 60, 65, 70, 76, 80, 84, 87, 91, 94, 97, 97, 63, 58, 54, 57, 61, 65,
+ 70, 74, 79, 83, 88, 91, 95, 97, 100, 100, 68, 63, 59, 62, 65, 69, 74,
+ 78, 83, 86, 91, 94, 97, 100, 103, 103, 75, 70, 65, 68, 70, 74, 78, 82,
+ 86, 90, 94, 97, 100, 103, 105, 105, 75, 70, 65, 68, 70, 74, 78, 82, 86,
+ 90, 94, 97, 100, 103, 105, 105,
+ /* Size 32 */
+ 27, 25, 23, 22, 20, 21, 21, 21, 22, 23, 25, 27, 29, 31, 33, 36, 39, 41,
+ 43, 46, 50, 52, 55, 58, 62, 64, 67, 70, 73, 73, 73, 73, 25, 24, 23, 22,
+ 21, 21, 21, 22, 22, 23, 25, 26, 28, 30, 32, 34, 37, 39, 42, 45, 48, 50,
+ 53, 56, 59, 62, 65, 67, 71, 71, 71, 71, 23, 23, 22, 22, 22, 22, 22, 22,
+ 22, 23, 24, 26, 27, 29, 31, 33, 36, 38, 40, 43, 46, 48, 51, 54, 57, 60,
+ 62, 65, 68, 68, 68, 68, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 24, 25,
+ 27, 28, 30, 32, 34, 36, 39, 41, 44, 46, 49, 52, 55, 57, 60, 63, 66, 66,
+ 66, 66, 20, 21, 22, 22, 23, 23, 23, 22, 22, 23, 24, 25, 26, 28, 29, 31,
+ 33, 35, 37, 40, 43, 45, 47, 50, 53, 55, 58, 61, 64, 64, 64, 64, 21, 21,
+ 22, 22, 23, 23, 23, 24, 24, 25, 26, 27, 28, 30, 31, 33, 35, 37, 39, 42,
+ 44, 46, 49, 52, 54, 57, 59, 62, 65, 65, 65, 65, 21, 21, 22, 22, 23, 23,
+ 24, 25, 26, 27, 28, 29, 30, 32, 33, 35, 37, 39, 41, 43, 46, 48, 51, 53,
+ 56, 58, 61, 63, 66, 66, 66, 66, 21, 22, 22, 22, 22, 24, 25, 27, 28, 29,
+ 30, 32, 33, 34, 36, 38, 40, 41, 43, 46, 48, 50, 52, 55, 58, 60, 62, 65,
+ 68, 68, 68, 68, 22, 22, 22, 22, 22, 24, 26, 28, 31, 32, 33, 35, 36, 38,
+ 39, 41, 42, 44, 46, 48, 50, 52, 54, 57, 59, 62, 64, 66, 69, 69, 69, 69,
+ 23, 23, 23, 23, 23, 25, 27, 29, 32, 34, 35, 37, 38, 40, 41, 43, 45, 46,
+ 48, 50, 52, 54, 57, 59, 61, 64, 66, 68, 71, 71, 71, 71, 25, 25, 24, 24,
+ 24, 26, 28, 30, 33, 35, 37, 39, 41, 42, 44, 45, 47, 49, 51, 53, 55, 57,
+ 59, 61, 64, 66, 68, 70, 73, 73, 73, 73, 27, 26, 26, 25, 25, 27, 29, 32,
+ 35, 37, 39, 41, 43, 45, 46, 48, 50, 52, 54, 56, 58, 60, 61, 64, 66, 68,
+ 70, 72, 74, 74, 74, 74, 29, 28, 27, 27, 26, 28, 30, 33, 36, 38, 41, 43,
+ 46, 48, 49, 51, 53, 55, 57, 59, 61, 62, 64, 66, 68, 70, 72, 74, 76, 76,
+ 76, 76, 31, 30, 29, 28, 28, 30, 32, 34, 38, 40, 42, 45, 48, 49, 51, 53,
+ 56, 57, 59, 61, 63, 65, 66, 68, 71, 72, 74, 76, 78, 78, 78, 78, 33, 32,
+ 31, 30, 29, 31, 33, 36, 39, 41, 44, 46, 49, 51, 53, 55, 58, 60, 61, 63,
+ 65, 67, 69, 71, 73, 75, 76, 78, 80, 80, 80, 80, 36, 34, 33, 32, 31, 33,
+ 35, 38, 41, 43, 45, 48, 51, 53, 55, 58, 60, 62, 64, 66, 68, 70, 71, 73,
+ 75, 77, 79, 80, 82, 82, 82, 82, 39, 37, 36, 34, 33, 35, 37, 40, 42, 45,
+ 47, 50, 53, 56, 58, 60, 63, 65, 67, 69, 71, 72, 74, 76, 78, 79, 81, 83,
+ 85, 85, 85, 85, 41, 39, 38, 36, 35, 37, 39, 41, 44, 46, 49, 52, 55, 57,
+ 60, 62, 65, 67, 69, 71, 73, 74, 76, 78, 80, 81, 83, 85, 86, 86, 86, 86,
+ 43, 42, 40, 39, 37, 39, 41, 43, 46, 48, 51, 54, 57, 59, 61, 64, 67, 69,
+ 70, 73, 75, 76, 78, 80, 82, 83, 85, 86, 88, 88, 88, 88, 46, 45, 43, 41,
+ 40, 42, 43, 46, 48, 50, 53, 56, 59, 61, 63, 66, 69, 71, 73, 75, 77, 79,
+ 80, 82, 84, 85, 87, 88, 90, 90, 90, 90, 50, 48, 46, 44, 43, 44, 46, 48,
+ 50, 52, 55, 58, 61, 63, 65, 68, 71, 73, 75, 77, 79, 81, 83, 84, 86, 87,
+ 89, 90, 92, 92, 92, 92, 52, 50, 48, 46, 45, 46, 48, 50, 52, 54, 57, 60,
+ 62, 65, 67, 70, 72, 74, 76, 79, 81, 82, 84, 86, 88, 89, 90, 92, 93, 93,
+ 93, 93, 55, 53, 51, 49, 47, 49, 51, 52, 54, 57, 59, 61, 64, 66, 69, 71,
+ 74, 76, 78, 80, 83, 84, 86, 88, 89, 91, 92, 94, 95, 95, 95, 95, 58, 56,
+ 54, 52, 50, 52, 53, 55, 57, 59, 61, 64, 66, 68, 71, 73, 76, 78, 80, 82,
+ 84, 86, 88, 89, 91, 92, 94, 95, 97, 97, 97, 97, 62, 59, 57, 55, 53, 54,
+ 56, 58, 59, 61, 64, 66, 68, 71, 73, 75, 78, 80, 82, 84, 86, 88, 89, 91,
+ 93, 94, 95, 97, 98, 98, 98, 98, 64, 62, 60, 57, 55, 57, 58, 60, 62, 64,
+ 66, 68, 70, 72, 75, 77, 79, 81, 83, 85, 87, 89, 91, 92, 94, 95, 97, 98,
+ 99, 99, 99, 99, 67, 65, 62, 60, 58, 59, 61, 62, 64, 66, 68, 70, 72, 74,
+ 76, 79, 81, 83, 85, 87, 89, 90, 92, 94, 95, 97, 98, 99, 101, 101, 101,
+ 101, 70, 67, 65, 63, 61, 62, 63, 65, 66, 68, 70, 72, 74, 76, 78, 80, 83,
+ 85, 86, 88, 90, 92, 94, 95, 97, 98, 99, 101, 102, 102, 102, 102, 73, 71,
+ 68, 66, 64, 65, 66, 68, 69, 71, 73, 74, 76, 78, 80, 82, 85, 86, 88, 90,
+ 92, 93, 95, 97, 98, 99, 101, 102, 103, 103, 103, 103, 73, 71, 68, 66,
+ 64, 65, 66, 68, 69, 71, 73, 74, 76, 78, 80, 82, 85, 86, 88, 90, 92, 93,
+ 95, 97, 98, 99, 101, 102, 103, 103, 103, 103, 73, 71, 68, 66, 64, 65,
+ 66, 68, 69, 71, 73, 74, 76, 78, 80, 82, 85, 86, 88, 90, 92, 93, 95, 97,
+ 98, 99, 101, 102, 103, 103, 103, 103, 73, 71, 68, 66, 64, 65, 66, 68,
+ 69, 71, 73, 74, 76, 78, 80, 82, 85, 86, 88, 90, 92, 93, 95, 97, 98, 99,
+ 101, 102, 103, 103, 103, 103 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 99, 107, 133, 99, 117, 124, 141, 107, 124, 156, 175, 133, 141, 175,
+ 203,
+ /* Size 8 */
+ 64, 52, 92, 97, 103, 114, 128, 143, 52, 74, 93, 85, 87, 96, 107, 120,
+ 92, 93, 108, 105, 104, 109, 118, 129, 97, 85, 105, 117, 122, 127, 134,
+ 143, 103, 87, 104, 122, 135, 144, 151, 159, 114, 96, 109, 127, 144, 156,
+ 166, 175, 128, 107, 118, 134, 151, 166, 178, 188, 143, 120, 129, 143,
+ 159, 175, 188, 198,
+ /* Size 16 */
+ 64, 57, 52, 66, 92, 94, 97, 100, 103, 108, 114, 121, 128, 135, 143, 143,
+ 57, 59, 61, 74, 93, 92, 91, 93, 95, 99, 104, 110, 116, 123, 130, 130,
+ 52, 61, 74, 83, 93, 89, 85, 86, 87, 91, 96, 101, 107, 113, 120, 120, 66,
+ 74, 83, 91, 100, 97, 94, 95, 95, 98, 102, 107, 112, 118, 124, 124, 92,
+ 93, 93, 100, 108, 106, 105, 104, 104, 107, 109, 114, 118, 123, 129, 129,
+ 94, 92, 89, 97, 106, 108, 111, 112, 113, 115, 118, 122, 126, 131, 136,
+ 136, 97, 91, 85, 94, 105, 111, 117, 120, 122, 125, 127, 131, 134, 139,
+ 143, 143, 100, 93, 86, 95, 104, 112, 120, 124, 129, 132, 135, 139, 142,
+ 146, 151, 151, 103, 95, 87, 95, 104, 113, 122, 129, 135, 139, 144, 147,
+ 151, 155, 159, 159, 108, 99, 91, 98, 107, 115, 125, 132, 139, 144, 150,
+ 154, 158, 162, 167, 167, 114, 104, 96, 102, 109, 118, 127, 135, 144,
+ 150, 156, 161, 166, 170, 175, 175, 121, 110, 101, 107, 114, 122, 131,
+ 139, 147, 154, 161, 166, 172, 176, 181, 181, 128, 116, 107, 112, 118,
+ 126, 134, 142, 151, 158, 166, 172, 178, 183, 188, 188, 135, 123, 113,
+ 118, 123, 131, 139, 146, 155, 162, 170, 176, 183, 188, 193, 193, 143,
+ 130, 120, 124, 129, 136, 143, 151, 159, 167, 175, 181, 188, 193, 198,
+ 198, 143, 130, 120, 124, 129, 136, 143, 151, 159, 167, 175, 181, 188,
+ 193, 198, 198,
+ /* Size 32 */
+ 64, 61, 57, 55, 52, 58, 66, 77, 92, 93, 94, 96, 97, 98, 100, 101, 103,
+ 106, 108, 111, 114, 117, 121, 124, 128, 131, 135, 139, 143, 143, 143,
+ 143, 61, 59, 58, 57, 56, 62, 70, 79, 92, 93, 93, 93, 94, 95, 96, 97, 99,
+ 101, 104, 106, 109, 112, 115, 118, 122, 125, 129, 132, 136, 136, 136,
+ 136, 57, 58, 59, 60, 61, 67, 74, 82, 93, 92, 92, 91, 91, 92, 93, 94, 95,
+ 97, 99, 101, 104, 107, 110, 113, 116, 119, 123, 126, 130, 130, 130, 130,
+ 55, 57, 60, 63, 67, 72, 78, 85, 93, 92, 90, 89, 88, 89, 89, 90, 91, 93,
+ 95, 97, 100, 102, 105, 108, 111, 114, 118, 121, 125, 125, 125, 125, 52,
+ 56, 61, 67, 74, 78, 83, 88, 93, 91, 89, 87, 85, 86, 86, 87, 87, 89, 91,
+ 93, 96, 98, 101, 104, 107, 110, 113, 116, 120, 120, 120, 120, 58, 62,
+ 67, 72, 78, 82, 86, 91, 97, 95, 93, 91, 89, 90, 90, 91, 91, 93, 95, 97,
+ 99, 101, 104, 106, 109, 112, 115, 119, 122, 122, 122, 122, 66, 70, 74,
+ 78, 83, 86, 91, 95, 100, 99, 97, 95, 94, 94, 95, 95, 95, 97, 98, 100,
+ 102, 104, 107, 109, 112, 115, 118, 121, 124, 124, 124, 124, 77, 79, 82,
+ 85, 88, 91, 95, 99, 104, 103, 101, 100, 99, 99, 99, 99, 100, 101, 102,
+ 104, 106, 108, 110, 113, 115, 118, 121, 124, 127, 127, 127, 127, 92, 92,
+ 93, 93, 93, 97, 100, 104, 108, 107, 106, 105, 105, 105, 104, 104, 104,
+ 106, 107, 108, 109, 111, 114, 116, 118, 121, 123, 126, 129, 129, 129,
+ 129, 93, 93, 92, 92, 91, 95, 99, 103, 107, 107, 107, 107, 108, 108, 108,
+ 108, 108, 110, 111, 112, 113, 115, 117, 120, 122, 124, 127, 130, 133,
+ 133, 133, 133, 94, 93, 92, 90, 89, 93, 97, 101, 106, 107, 108, 109, 111,
+ 111, 112, 112, 113, 114, 115, 116, 118, 120, 122, 124, 126, 128, 131,
+ 133, 136, 136, 136, 136, 96, 93, 91, 89, 87, 91, 95, 100, 105, 107, 109,
+ 112, 114, 115, 116, 116, 117, 119, 120, 121, 122, 124, 126, 128, 130,
+ 132, 135, 137, 140, 140, 140, 140, 97, 94, 91, 88, 85, 89, 94, 99, 105,
+ 108, 111, 114, 117, 118, 120, 121, 122, 124, 125, 126, 127, 129, 131,
+ 132, 134, 136, 139, 141, 143, 143, 143, 143, 98, 95, 92, 89, 86, 90, 94,
+ 99, 105, 108, 111, 115, 118, 120, 122, 124, 125, 127, 128, 130, 131,
+ 133, 134, 136, 138, 140, 142, 145, 147, 147, 147, 147, 100, 96, 93, 89,
+ 86, 90, 95, 99, 104, 108, 112, 116, 120, 122, 124, 126, 129, 130, 132,
+ 133, 135, 137, 139, 140, 142, 144, 146, 149, 151, 151, 151, 151, 101,
+ 97, 94, 90, 87, 91, 95, 99, 104, 108, 112, 116, 121, 124, 126, 129, 132,
+ 134, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 155, 155,
+ 155, 103, 99, 95, 91, 87, 91, 95, 100, 104, 108, 113, 117, 122, 125,
+ 129, 132, 135, 137, 139, 142, 144, 146, 147, 149, 151, 153, 155, 157,
+ 159, 159, 159, 159, 106, 101, 97, 93, 89, 93, 97, 101, 106, 110, 114,
+ 119, 124, 127, 130, 134, 137, 140, 142, 144, 147, 149, 151, 153, 155,
+ 157, 159, 161, 163, 163, 163, 163, 108, 104, 99, 95, 91, 95, 98, 102,
+ 107, 111, 115, 120, 125, 128, 132, 135, 139, 142, 144, 147, 150, 152,
+ 154, 156, 158, 160, 162, 164, 167, 167, 167, 167, 111, 106, 101, 97, 93,
+ 97, 100, 104, 108, 112, 116, 121, 126, 130, 133, 137, 142, 144, 147,
+ 150, 153, 155, 157, 160, 162, 164, 166, 168, 170, 170, 170, 170, 114,
+ 109, 104, 100, 96, 99, 102, 106, 109, 113, 118, 122, 127, 131, 135, 139,
+ 144, 147, 150, 153, 156, 159, 161, 164, 166, 168, 170, 172, 175, 175,
+ 175, 175, 117, 112, 107, 102, 98, 101, 104, 108, 111, 115, 120, 124,
+ 129, 133, 137, 141, 146, 149, 152, 155, 159, 161, 164, 166, 169, 171,
+ 173, 175, 178, 178, 178, 178, 121, 115, 110, 105, 101, 104, 107, 110,
+ 114, 117, 122, 126, 131, 134, 139, 143, 147, 151, 154, 157, 161, 164,
+ 166, 169, 172, 174, 176, 178, 181, 181, 181, 181, 124, 118, 113, 108,
+ 104, 106, 109, 113, 116, 120, 124, 128, 132, 136, 140, 145, 149, 153,
+ 156, 160, 164, 166, 169, 172, 175, 177, 179, 182, 184, 184, 184, 184,
+ 128, 122, 116, 111, 107, 109, 112, 115, 118, 122, 126, 130, 134, 138,
+ 142, 147, 151, 155, 158, 162, 166, 169, 172, 175, 178, 180, 183, 185,
+ 188, 188, 188, 188, 131, 125, 119, 114, 110, 112, 115, 118, 121, 124,
+ 128, 132, 136, 140, 144, 149, 153, 157, 160, 164, 168, 171, 174, 177,
+ 180, 183, 185, 188, 190, 190, 190, 190, 135, 129, 123, 118, 113, 115,
+ 118, 121, 123, 127, 131, 135, 139, 142, 146, 151, 155, 159, 162, 166,
+ 170, 173, 176, 179, 183, 185, 188, 190, 193, 193, 193, 193, 139, 132,
+ 126, 121, 116, 119, 121, 124, 126, 130, 133, 137, 141, 145, 149, 153,
+ 157, 161, 164, 168, 172, 175, 178, 182, 185, 188, 190, 193, 196, 196,
+ 196, 196, 143, 136, 130, 125, 120, 122, 124, 127, 129, 133, 136, 140,
+ 143, 147, 151, 155, 159, 163, 167, 170, 175, 178, 181, 184, 188, 190,
+ 193, 196, 198, 198, 198, 198, 143, 136, 130, 125, 120, 122, 124, 127,
+ 129, 133, 136, 140, 143, 147, 151, 155, 159, 163, 167, 170, 175, 178,
+ 181, 184, 188, 190, 193, 196, 198, 198, 198, 198, 143, 136, 130, 125,
+ 120, 122, 124, 127, 129, 133, 136, 140, 143, 147, 151, 155, 159, 163,
+ 167, 170, 175, 178, 181, 184, 188, 190, 193, 196, 198, 198, 198, 198,
+ 143, 136, 130, 125, 120, 122, 124, 127, 129, 133, 136, 140, 143, 147,
+ 151, 155, 159, 163, 167, 170, 175, 178, 181, 184, 188, 190, 193, 196,
+ 198, 198, 198, 198 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 29, 46, 49, 62, 46, 54, 58, 66, 49, 58, 74, 84, 62, 66, 84, 99,
+ /* Size 8 */
+ 31, 25, 45, 48, 51, 57, 64, 72, 25, 36, 46, 42, 43, 47, 53, 60, 45, 46,
+ 54, 52, 52, 54, 59, 65, 48, 42, 52, 59, 61, 64, 68, 73, 51, 43, 52, 61,
+ 68, 73, 77, 82, 57, 47, 54, 64, 73, 80, 86, 90, 64, 53, 59, 68, 77, 86,
+ 92, 98, 72, 60, 65, 73, 82, 90, 98, 104,
+ /* Size 16 */
+ 30, 27, 24, 31, 44, 45, 47, 48, 50, 53, 56, 59, 63, 66, 71, 71, 27, 28,
+ 29, 35, 44, 44, 43, 44, 45, 48, 50, 53, 57, 60, 64, 64, 24, 29, 35, 39,
+ 45, 43, 41, 41, 42, 44, 46, 49, 52, 55, 58, 58, 31, 35, 39, 43, 48, 47,
+ 45, 45, 46, 47, 49, 52, 54, 58, 61, 61, 44, 44, 45, 48, 52, 51, 51, 51,
+ 50, 52, 53, 55, 58, 60, 63, 63, 45, 44, 43, 47, 51, 53, 54, 54, 55, 56,
+ 57, 59, 62, 64, 67, 67, 47, 43, 41, 45, 51, 54, 57, 58, 60, 61, 62, 64,
+ 66, 68, 71, 71, 48, 44, 41, 45, 51, 54, 58, 61, 63, 65, 67, 68, 70, 73,
+ 75, 75, 50, 45, 42, 46, 50, 55, 60, 63, 67, 69, 71, 73, 75, 77, 80, 80,
+ 53, 48, 44, 47, 52, 56, 61, 65, 69, 72, 74, 77, 79, 81, 84, 84, 56, 50,
+ 46, 49, 53, 57, 62, 67, 71, 74, 78, 81, 83, 86, 88, 88, 59, 53, 49, 52,
+ 55, 59, 64, 68, 73, 77, 81, 83, 87, 89, 92, 92, 63, 57, 52, 54, 58, 62,
+ 66, 70, 75, 79, 83, 87, 90, 93, 95, 95, 66, 60, 55, 58, 60, 64, 68, 73,
+ 77, 81, 86, 89, 93, 95, 98, 98, 71, 64, 58, 61, 63, 67, 71, 75, 80, 84,
+ 88, 92, 95, 98, 102, 102, 71, 64, 58, 61, 63, 67, 71, 75, 80, 84, 88,
+ 92, 95, 98, 102, 102,
+ /* Size 32 */
+ 30, 28, 27, 25, 24, 27, 31, 36, 43, 44, 45, 45, 46, 47, 48, 48, 49, 50,
+ 52, 53, 55, 56, 58, 60, 62, 64, 65, 67, 70, 70, 70, 70, 28, 28, 27, 27,
+ 26, 29, 33, 37, 44, 44, 44, 44, 44, 45, 46, 46, 47, 48, 49, 51, 52, 54,
+ 55, 57, 59, 60, 62, 64, 66, 66, 66, 66, 27, 27, 28, 28, 28, 31, 35, 39,
+ 44, 44, 43, 43, 43, 43, 44, 44, 45, 46, 47, 48, 50, 51, 53, 54, 56, 58,
+ 59, 61, 63, 63, 63, 63, 25, 27, 28, 30, 31, 34, 37, 40, 44, 43, 43, 42,
+ 41, 42, 42, 43, 43, 44, 45, 46, 47, 49, 50, 52, 53, 55, 57, 58, 60, 60,
+ 60, 60, 24, 26, 28, 31, 35, 37, 39, 41, 44, 43, 42, 41, 40, 40, 41, 41,
+ 41, 42, 43, 44, 45, 47, 48, 49, 51, 53, 54, 56, 58, 58, 58, 58, 27, 29,
+ 31, 34, 37, 39, 41, 43, 46, 45, 44, 43, 42, 42, 43, 43, 43, 44, 45, 46,
+ 47, 48, 49, 51, 52, 54, 55, 57, 59, 59, 59, 59, 31, 33, 35, 37, 39, 41,
+ 43, 45, 48, 47, 46, 45, 45, 45, 45, 45, 45, 46, 47, 48, 49, 50, 51, 52,
+ 54, 55, 57, 58, 60, 60, 60, 60, 36, 37, 39, 40, 41, 43, 45, 47, 50, 49,
+ 48, 48, 47, 47, 47, 47, 47, 48, 49, 50, 50, 52, 53, 54, 55, 57, 58, 60,
+ 61, 61, 61, 61, 43, 44, 44, 44, 44, 46, 48, 50, 52, 51, 51, 50, 50, 50,
+ 50, 50, 50, 50, 51, 52, 52, 53, 54, 56, 57, 58, 60, 61, 63, 63, 63, 63,
+ 44, 44, 44, 43, 43, 45, 47, 49, 51, 51, 51, 51, 51, 51, 52, 52, 52, 52,
+ 53, 54, 54, 55, 56, 58, 59, 60, 61, 63, 64, 64, 64, 64, 45, 44, 43, 43,
+ 42, 44, 46, 48, 51, 51, 52, 52, 53, 53, 53, 54, 54, 55, 55, 56, 57, 58,
+ 59, 60, 61, 62, 63, 65, 66, 66, 66, 66, 45, 44, 43, 42, 41, 43, 45, 48,
+ 50, 51, 52, 53, 55, 55, 55, 56, 56, 57, 58, 58, 59, 60, 61, 62, 63, 64,
+ 65, 67, 68, 68, 68, 68, 46, 44, 43, 41, 40, 42, 45, 47, 50, 51, 53, 55,
+ 56, 57, 58, 58, 59, 60, 60, 61, 62, 62, 63, 64, 65, 66, 68, 69, 70, 70,
+ 70, 70, 47, 45, 43, 42, 40, 42, 45, 47, 50, 51, 53, 55, 57, 58, 59, 60,
+ 61, 61, 62, 63, 64, 64, 65, 66, 67, 68, 70, 71, 72, 72, 72, 72, 48, 46,
+ 44, 42, 41, 43, 45, 47, 50, 52, 53, 55, 58, 59, 60, 61, 62, 63, 64, 65,
+ 66, 67, 67, 68, 69, 71, 72, 73, 74, 74, 74, 74, 48, 46, 44, 43, 41, 43,
+ 45, 47, 50, 52, 54, 56, 58, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71,
+ 72, 73, 74, 75, 76, 76, 76, 76, 49, 47, 45, 43, 41, 43, 45, 47, 50, 52,
+ 54, 56, 59, 61, 62, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
+ 79, 79, 79, 79, 50, 48, 46, 44, 42, 44, 46, 48, 50, 52, 55, 57, 60, 61,
+ 63, 65, 67, 68, 69, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 80, 80, 80,
+ 52, 49, 47, 45, 43, 45, 47, 49, 51, 53, 55, 58, 60, 62, 64, 66, 68, 69,
+ 71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 82, 82, 82, 82, 53, 51, 48, 46,
+ 44, 46, 48, 50, 52, 54, 56, 58, 61, 63, 65, 67, 69, 70, 72, 74, 75, 76,
+ 78, 79, 80, 81, 82, 83, 85, 85, 85, 85, 55, 52, 50, 47, 45, 47, 49, 50,
+ 52, 54, 57, 59, 62, 64, 66, 68, 70, 72, 73, 75, 77, 78, 80, 81, 82, 83,
+ 84, 86, 87, 87, 87, 87, 56, 54, 51, 49, 47, 48, 50, 52, 53, 55, 58, 60,
+ 62, 64, 67, 69, 71, 73, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 89, 89,
+ 89, 89, 58, 55, 53, 50, 48, 49, 51, 53, 54, 56, 59, 61, 63, 65, 67, 70,
+ 72, 74, 76, 78, 80, 81, 82, 84, 85, 87, 88, 89, 90, 90, 90, 90, 60, 57,
+ 54, 52, 49, 51, 52, 54, 56, 58, 60, 62, 64, 66, 68, 71, 73, 75, 77, 79,
+ 81, 82, 84, 85, 87, 88, 90, 91, 92, 92, 92, 92, 62, 59, 56, 53, 51, 52,
+ 54, 55, 57, 59, 61, 63, 65, 67, 69, 72, 74, 76, 78, 80, 82, 84, 85, 87,
+ 89, 90, 91, 93, 94, 94, 94, 94, 64, 60, 58, 55, 53, 54, 55, 57, 58, 60,
+ 62, 64, 66, 68, 71, 73, 75, 77, 79, 81, 83, 85, 87, 88, 90, 91, 93, 94,
+ 96, 96, 96, 96, 65, 62, 59, 57, 54, 55, 57, 58, 60, 61, 63, 65, 68, 70,
+ 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 91, 93, 94, 96, 97, 97, 97, 97,
+ 67, 64, 61, 58, 56, 57, 58, 60, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
+ 81, 83, 86, 87, 89, 91, 93, 94, 96, 97, 99, 99, 99, 99, 70, 66, 63, 60,
+ 58, 59, 60, 61, 63, 64, 66, 68, 70, 72, 74, 76, 79, 80, 82, 85, 87, 89,
+ 90, 92, 94, 96, 97, 99, 100, 100, 100, 100, 70, 66, 63, 60, 58, 59, 60,
+ 61, 63, 64, 66, 68, 70, 72, 74, 76, 79, 80, 82, 85, 87, 89, 90, 92, 94,
+ 96, 97, 99, 100, 100, 100, 100, 70, 66, 63, 60, 58, 59, 60, 61, 63, 64,
+ 66, 68, 70, 72, 74, 76, 79, 80, 82, 85, 87, 89, 90, 92, 94, 96, 97, 99,
+ 100, 100, 100, 100, 70, 66, 63, 60, 58, 59, 60, 61, 63, 64, 66, 68, 70,
+ 72, 74, 76, 79, 80, 82, 85, 87, 89, 90, 92, 94, 96, 97, 99, 100, 100,
+ 100, 100 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 69, 108, 155, 69, 100, 131, 166, 108, 131, 171, 197, 155, 166, 197,
+ 218,
+ /* Size 8 */
+ 64, 49, 53, 68, 88, 109, 130, 149, 49, 56, 54, 62, 77, 96, 115, 134, 53,
+ 54, 73, 83, 95, 110, 126, 142, 68, 62, 83, 102, 116, 128, 141, 154, 88,
+ 77, 95, 116, 132, 145, 156, 166, 109, 96, 110, 128, 145, 158, 168, 176,
+ 130, 115, 126, 141, 156, 168, 177, 185, 149, 134, 142, 154, 166, 176,
+ 185, 191,
+ /* Size 16 */
+ 64, 56, 49, 51, 53, 60, 68, 77, 88, 97, 109, 119, 130, 139, 149, 149,
+ 56, 54, 52, 53, 53, 59, 65, 73, 82, 91, 102, 111, 122, 131, 141, 141,
+ 49, 52, 56, 55, 54, 58, 62, 69, 77, 85, 96, 104, 115, 124, 134, 134, 51,
+ 53, 55, 58, 62, 66, 71, 78, 85, 93, 102, 111, 120, 128, 138, 138, 53,
+ 53, 54, 62, 73, 78, 83, 89, 95, 102, 110, 117, 126, 134, 142, 142, 60,
+ 59, 58, 66, 78, 84, 92, 98, 104, 111, 118, 125, 133, 140, 148, 148, 68,
+ 65, 62, 71, 83, 92, 102, 108, 116, 122, 128, 134, 141, 147, 154, 154,
+ 77, 73, 69, 78, 89, 98, 108, 115, 123, 129, 136, 142, 148, 153, 159,
+ 159, 88, 82, 77, 85, 95, 104, 116, 123, 132, 138, 145, 150, 156, 161,
+ 166, 166, 97, 91, 85, 93, 102, 111, 122, 129, 138, 144, 151, 156, 162,
+ 166, 171, 171, 109, 102, 96, 102, 110, 118, 128, 136, 145, 151, 158,
+ 163, 168, 172, 176, 176, 119, 111, 104, 111, 117, 125, 134, 142, 150,
+ 156, 163, 168, 173, 176, 180, 180, 130, 122, 115, 120, 126, 133, 141,
+ 148, 156, 162, 168, 173, 177, 181, 185, 185, 139, 131, 124, 128, 134,
+ 140, 147, 153, 161, 166, 172, 176, 181, 184, 188, 188, 149, 141, 134,
+ 138, 142, 148, 154, 159, 166, 171, 176, 180, 185, 188, 191, 191, 149,
+ 141, 134, 138, 142, 148, 154, 159, 166, 171, 176, 180, 185, 188, 191,
+ 191,
+ /* Size 32 */
+ 64, 60, 56, 52, 49, 50, 51, 52, 53, 56, 60, 64, 68, 72, 77, 82, 88, 92,
+ 97, 103, 109, 114, 119, 124, 130, 134, 139, 144, 149, 149, 149, 149, 60,
+ 57, 55, 53, 51, 51, 52, 53, 53, 56, 59, 63, 66, 70, 75, 79, 85, 89, 94,
+ 100, 106, 110, 115, 120, 126, 130, 135, 140, 145, 145, 145, 145, 56, 55,
+ 54, 53, 52, 53, 53, 53, 53, 56, 59, 62, 65, 69, 73, 77, 82, 86, 91, 96,
+ 102, 106, 111, 116, 122, 126, 131, 136, 141, 141, 141, 141, 52, 53, 53,
+ 54, 54, 54, 54, 54, 54, 56, 58, 61, 64, 67, 71, 75, 80, 84, 88, 93, 99,
+ 103, 108, 113, 118, 123, 127, 132, 137, 137, 137, 137, 49, 51, 52, 54,
+ 56, 55, 55, 54, 54, 56, 58, 60, 62, 65, 69, 73, 77, 81, 85, 90, 96, 100,
+ 104, 109, 115, 119, 124, 128, 134, 134, 134, 134, 50, 51, 53, 54, 55,
+ 56, 56, 57, 58, 60, 62, 64, 66, 70, 73, 77, 81, 85, 89, 94, 99, 103,
+ 107, 112, 118, 122, 126, 131, 136, 136, 136, 136, 51, 52, 53, 54, 55,
+ 56, 58, 60, 62, 64, 66, 69, 71, 74, 78, 81, 85, 89, 93, 97, 102, 106,
+ 111, 115, 120, 124, 128, 133, 138, 138, 138, 138, 52, 53, 53, 54, 54,
+ 57, 60, 63, 67, 69, 72, 74, 77, 80, 83, 86, 90, 94, 97, 101, 106, 110,
+ 114, 118, 123, 127, 131, 135, 140, 140, 140, 140, 53, 53, 53, 54, 54,
+ 58, 62, 67, 73, 75, 78, 80, 83, 86, 89, 92, 95, 99, 102, 106, 110, 114,
+ 117, 122, 126, 130, 134, 138, 142, 142, 142, 142, 56, 56, 56, 56, 56,
+ 60, 64, 69, 75, 78, 81, 84, 87, 90, 93, 96, 100, 103, 106, 110, 114,
+ 118, 121, 125, 130, 133, 137, 141, 145, 145, 145, 145, 60, 59, 59, 58,
+ 58, 62, 66, 72, 78, 81, 84, 88, 92, 95, 98, 101, 104, 108, 111, 115,
+ 118, 122, 125, 129, 133, 136, 140, 144, 148, 148, 148, 148, 64, 63, 62,
+ 61, 60, 64, 69, 74, 80, 84, 88, 92, 97, 100, 103, 106, 110, 113, 116,
+ 119, 123, 126, 130, 133, 137, 140, 143, 147, 151, 151, 151, 151, 68, 66,
+ 65, 64, 62, 66, 71, 77, 83, 87, 92, 97, 102, 105, 108, 112, 116, 119,
+ 122, 125, 128, 131, 134, 138, 141, 144, 147, 150, 154, 154, 154, 154,
+ 72, 70, 69, 67, 65, 70, 74, 80, 86, 90, 95, 100, 105, 108, 112, 116,
+ 119, 122, 125, 129, 132, 135, 138, 141, 144, 147, 150, 153, 156, 156,
+ 156, 156, 77, 75, 73, 71, 69, 73, 78, 83, 89, 93, 98, 103, 108, 112,
+ 115, 119, 123, 126, 129, 133, 136, 139, 142, 145, 148, 151, 153, 156,
+ 159, 159, 159, 159, 82, 79, 77, 75, 73, 77, 81, 86, 92, 96, 101, 106,
+ 112, 116, 119, 123, 128, 131, 134, 137, 140, 143, 146, 149, 152, 154,
+ 157, 160, 162, 162, 162, 162, 88, 85, 82, 80, 77, 81, 85, 90, 95, 100,
+ 104, 110, 116, 119, 123, 128, 132, 135, 138, 141, 145, 147, 150, 153,
+ 156, 158, 161, 163, 166, 166, 166, 166, 92, 89, 86, 84, 81, 85, 89, 94,
+ 99, 103, 108, 113, 119, 122, 126, 131, 135, 138, 141, 144, 148, 150,
+ 153, 156, 159, 161, 163, 166, 168, 168, 168, 168, 97, 94, 91, 88, 85,
+ 89, 93, 97, 102, 106, 111, 116, 122, 125, 129, 134, 138, 141, 144, 148,
+ 151, 154, 156, 159, 162, 164, 166, 168, 171, 171, 171, 171, 103, 100,
+ 96, 93, 90, 94, 97, 101, 106, 110, 115, 119, 125, 129, 133, 137, 141,
+ 144, 148, 151, 154, 157, 159, 162, 165, 167, 169, 171, 174, 174, 174,
+ 174, 109, 106, 102, 99, 96, 99, 102, 106, 110, 114, 118, 123, 128, 132,
+ 136, 140, 145, 148, 151, 154, 158, 160, 163, 165, 168, 170, 172, 174,
+ 176, 176, 176, 176, 114, 110, 106, 103, 100, 103, 106, 110, 114, 118,
+ 122, 126, 131, 135, 139, 143, 147, 150, 154, 157, 160, 163, 165, 168,
+ 170, 172, 174, 176, 178, 178, 178, 178, 119, 115, 111, 108, 104, 107,
+ 111, 114, 117, 121, 125, 130, 134, 138, 142, 146, 150, 153, 156, 159,
+ 163, 165, 168, 170, 173, 174, 176, 178, 180, 180, 180, 180, 124, 120,
+ 116, 113, 109, 112, 115, 118, 122, 125, 129, 133, 138, 141, 145, 149,
+ 153, 156, 159, 162, 165, 168, 170, 172, 175, 177, 179, 181, 183, 183,
+ 183, 183, 130, 126, 122, 118, 115, 118, 120, 123, 126, 130, 133, 137,
+ 141, 144, 148, 152, 156, 159, 162, 165, 168, 170, 173, 175, 177, 179,
+ 181, 183, 185, 185, 185, 185, 134, 130, 126, 123, 119, 122, 124, 127,
+ 130, 133, 136, 140, 144, 147, 151, 154, 158, 161, 164, 167, 170, 172,
+ 174, 177, 179, 181, 183, 185, 186, 186, 186, 186, 139, 135, 131, 127,
+ 124, 126, 128, 131, 134, 137, 140, 143, 147, 150, 153, 157, 161, 163,
+ 166, 169, 172, 174, 176, 179, 181, 183, 184, 186, 188, 188, 188, 188,
+ 144, 140, 136, 132, 128, 131, 133, 135, 138, 141, 144, 147, 150, 153,
+ 156, 160, 163, 166, 168, 171, 174, 176, 178, 181, 183, 185, 186, 188,
+ 190, 190, 190, 190, 149, 145, 141, 137, 134, 136, 138, 140, 142, 145,
+ 148, 151, 154, 156, 159, 162, 166, 168, 171, 174, 176, 178, 180, 183,
+ 185, 186, 188, 190, 191, 191, 191, 191, 149, 145, 141, 137, 134, 136,
+ 138, 140, 142, 145, 148, 151, 154, 156, 159, 162, 166, 168, 171, 174,
+ 176, 178, 180, 183, 185, 186, 188, 190, 191, 191, 191, 191, 149, 145,
+ 141, 137, 134, 136, 138, 140, 142, 145, 148, 151, 154, 156, 159, 162,
+ 166, 168, 171, 174, 176, 178, 180, 183, 185, 186, 188, 190, 191, 191,
+ 191, 191, 149, 145, 141, 137, 134, 136, 138, 140, 142, 145, 148, 151,
+ 154, 156, 159, 162, 166, 168, 171, 174, 176, 178, 180, 183, 185, 186,
+ 188, 190, 191, 191, 191, 191 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 26, 29, 46, 68, 29, 42, 57, 73, 46, 57, 75, 88, 68, 73, 88, 99,
+ /* Size 8 */
+ 31, 24, 26, 33, 44, 55, 67, 77, 24, 27, 26, 30, 38, 48, 58, 69, 26, 26,
+ 36, 41, 47, 55, 64, 73, 33, 30, 41, 51, 59, 65, 73, 80, 44, 38, 47, 59,
+ 68, 75, 81, 87, 55, 48, 55, 65, 75, 82, 88, 93, 67, 58, 64, 73, 81, 88,
+ 94, 98, 77, 69, 73, 80, 87, 93, 98, 102,
+ /* Size 16 */
+ 30, 26, 23, 24, 25, 28, 32, 37, 42, 47, 53, 58, 64, 69, 75, 75, 26, 25,
+ 25, 25, 25, 28, 31, 34, 39, 44, 49, 54, 60, 65, 70, 70, 23, 25, 26, 26,
+ 25, 27, 29, 33, 37, 41, 46, 51, 56, 61, 66, 66, 24, 25, 26, 27, 29, 31,
+ 34, 37, 41, 45, 50, 54, 59, 63, 69, 69, 25, 25, 25, 29, 35, 37, 40, 43,
+ 46, 49, 54, 58, 62, 66, 71, 71, 28, 28, 27, 31, 37, 40, 44, 47, 51, 54,
+ 58, 62, 66, 70, 74, 74, 32, 31, 29, 34, 40, 44, 50, 53, 57, 60, 63, 67,
+ 70, 74, 77, 77, 37, 34, 33, 37, 43, 47, 53, 57, 61, 64, 68, 71, 74, 77,
+ 81, 81, 42, 39, 37, 41, 46, 51, 57, 61, 66, 69, 72, 75, 79, 81, 84, 84,
+ 47, 44, 41, 45, 49, 54, 60, 64, 69, 72, 76, 79, 82, 84, 87, 87, 53, 49,
+ 46, 50, 54, 58, 63, 68, 72, 76, 80, 83, 85, 88, 90, 90, 58, 54, 51, 54,
+ 58, 62, 67, 71, 75, 79, 83, 85, 88, 90, 93, 93, 64, 60, 56, 59, 62, 66,
+ 70, 74, 79, 82, 85, 88, 91, 93, 95, 95, 69, 65, 61, 63, 66, 70, 74, 77,
+ 81, 84, 88, 90, 93, 95, 97, 97, 75, 70, 66, 69, 71, 74, 77, 81, 84, 87,
+ 90, 93, 95, 97, 99, 99, 75, 70, 66, 69, 71, 74, 77, 81, 84, 87, 90, 93,
+ 95, 97, 99, 99,
+ /* Size 32 */
+ 30, 28, 26, 24, 23, 23, 24, 24, 24, 26, 28, 30, 32, 34, 36, 39, 42, 44,
+ 46, 49, 52, 55, 57, 60, 63, 66, 68, 71, 73, 73, 73, 73, 28, 26, 25, 24,
+ 23, 24, 24, 24, 25, 26, 27, 29, 31, 33, 35, 37, 40, 42, 45, 47, 50, 53,
+ 55, 58, 61, 63, 66, 68, 71, 71, 71, 71, 26, 25, 25, 25, 24, 24, 24, 25,
+ 25, 26, 27, 29, 30, 32, 34, 36, 39, 41, 43, 46, 49, 51, 53, 56, 59, 61,
+ 64, 66, 69, 69, 69, 69, 24, 24, 25, 25, 25, 25, 25, 25, 25, 26, 27, 28,
+ 29, 31, 33, 35, 37, 39, 42, 44, 47, 49, 52, 54, 57, 59, 62, 64, 67, 67,
+ 67, 67, 23, 23, 24, 25, 26, 26, 25, 25, 25, 26, 27, 28, 29, 30, 32, 34,
+ 36, 38, 40, 43, 45, 48, 50, 52, 55, 58, 60, 62, 65, 65, 65, 65, 23, 24,
+ 24, 25, 26, 26, 26, 26, 27, 28, 29, 30, 31, 32, 34, 36, 38, 40, 42, 44,
+ 47, 49, 51, 54, 57, 59, 61, 64, 66, 66, 66, 66, 24, 24, 24, 25, 25, 26,
+ 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 42, 44, 46, 49, 51, 53, 55,
+ 58, 60, 62, 65, 67, 67, 67, 67, 24, 24, 25, 25, 25, 26, 28, 29, 31, 32,
+ 33, 35, 36, 37, 39, 41, 43, 44, 46, 48, 51, 53, 55, 57, 60, 62, 64, 66,
+ 69, 69, 69, 69, 24, 25, 25, 25, 25, 27, 29, 31, 34, 35, 36, 38, 39, 41,
+ 42, 44, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 70, 70, 70, 70,
+ 26, 26, 26, 26, 26, 28, 30, 32, 35, 37, 38, 40, 41, 43, 44, 46, 47, 49,
+ 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 71, 71, 71, 28, 27, 27, 27,
+ 27, 29, 31, 33, 36, 38, 40, 41, 43, 45, 46, 48, 50, 52, 53, 55, 57, 59,
+ 61, 63, 65, 67, 69, 71, 73, 73, 73, 73, 30, 29, 29, 28, 28, 30, 32, 35,
+ 38, 40, 41, 44, 46, 47, 49, 51, 53, 54, 56, 58, 60, 61, 63, 65, 67, 69,
+ 71, 72, 74, 74, 74, 74, 32, 31, 30, 29, 29, 31, 33, 36, 39, 41, 43, 46,
+ 49, 50, 52, 54, 56, 57, 59, 61, 62, 64, 66, 67, 69, 71, 72, 74, 76, 76,
+ 76, 76, 34, 33, 32, 31, 30, 32, 35, 37, 41, 43, 45, 47, 50, 52, 54, 56,
+ 58, 59, 61, 63, 64, 66, 68, 69, 71, 73, 74, 76, 78, 78, 78, 78, 36, 35,
+ 34, 33, 32, 34, 36, 39, 42, 44, 46, 49, 52, 54, 56, 58, 60, 61, 63, 65,
+ 66, 68, 70, 71, 73, 74, 76, 78, 79, 79, 79, 79, 39, 37, 36, 35, 34, 36,
+ 38, 41, 44, 46, 48, 51, 54, 56, 58, 60, 62, 64, 65, 67, 69, 70, 72, 73,
+ 75, 76, 78, 79, 81, 81, 81, 81, 42, 40, 39, 37, 36, 38, 40, 43, 45, 47,
+ 50, 53, 56, 58, 60, 62, 64, 66, 68, 69, 71, 73, 74, 76, 77, 79, 80, 81,
+ 83, 83, 83, 83, 44, 42, 41, 39, 38, 40, 42, 44, 47, 49, 52, 54, 57, 59,
+ 61, 64, 66, 68, 69, 71, 73, 74, 76, 77, 79, 80, 81, 83, 84, 84, 84, 84,
+ 46, 45, 43, 42, 40, 42, 44, 46, 49, 51, 53, 56, 59, 61, 63, 65, 68, 69,
+ 71, 73, 75, 76, 77, 79, 81, 82, 83, 84, 86, 86, 86, 86, 49, 47, 46, 44,
+ 43, 44, 46, 48, 51, 53, 55, 58, 61, 63, 65, 67, 69, 71, 73, 75, 77, 78,
+ 79, 81, 82, 83, 85, 86, 87, 87, 87, 87, 52, 50, 49, 47, 45, 47, 49, 51,
+ 53, 55, 57, 60, 62, 64, 66, 69, 71, 73, 75, 77, 78, 80, 81, 83, 84, 85,
+ 86, 88, 89, 89, 89, 89, 55, 53, 51, 49, 48, 49, 51, 53, 55, 57, 59, 61,
+ 64, 66, 68, 70, 73, 74, 76, 78, 80, 81, 82, 84, 85, 86, 88, 89, 90, 90,
+ 90, 90, 57, 55, 53, 52, 50, 51, 53, 55, 57, 59, 61, 63, 66, 68, 70, 72,
+ 74, 76, 77, 79, 81, 82, 84, 85, 87, 88, 89, 90, 91, 91, 91, 91, 60, 58,
+ 56, 54, 52, 54, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 76, 77, 79, 81,
+ 83, 84, 85, 87, 88, 89, 90, 91, 92, 92, 92, 92, 63, 61, 59, 57, 55, 57,
+ 58, 60, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, 81, 82, 84, 85, 87, 88,
+ 89, 90, 92, 93, 94, 94, 94, 94, 66, 63, 61, 59, 58, 59, 60, 62, 63, 65,
+ 67, 69, 71, 73, 74, 76, 79, 80, 82, 83, 85, 86, 88, 89, 90, 91, 93, 94,
+ 95, 95, 95, 95, 68, 66, 64, 62, 60, 61, 62, 64, 65, 67, 69, 71, 72, 74,
+ 76, 78, 80, 81, 83, 85, 86, 88, 89, 90, 92, 93, 94, 95, 96, 96, 96, 96,
+ 71, 68, 66, 64, 62, 64, 65, 66, 67, 69, 71, 72, 74, 76, 78, 79, 81, 83,
+ 84, 86, 88, 89, 90, 91, 93, 94, 95, 96, 97, 97, 97, 97, 73, 71, 69, 67,
+ 65, 66, 67, 69, 70, 71, 73, 74, 76, 78, 79, 81, 83, 84, 86, 87, 89, 90,
+ 91, 92, 94, 95, 96, 97, 98, 98, 98, 98, 73, 71, 69, 67, 65, 66, 67, 69,
+ 70, 71, 73, 74, 76, 78, 79, 81, 83, 84, 86, 87, 89, 90, 91, 92, 94, 95,
+ 96, 97, 98, 98, 98, 98, 73, 71, 69, 67, 65, 66, 67, 69, 70, 71, 73, 74,
+ 76, 78, 79, 81, 83, 84, 86, 87, 89, 90, 91, 92, 94, 95, 96, 97, 98, 98,
+ 98, 98, 73, 71, 69, 67, 65, 66, 67, 69, 70, 71, 73, 74, 76, 78, 79, 81,
+ 83, 84, 86, 87, 89, 90, 91, 92, 94, 95, 96, 97, 98, 98, 98, 98 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 96, 104, 126, 96, 113, 119, 133, 104, 119, 146, 162, 126, 133, 162,
+ 183,
+ /* Size 8 */
+ 64, 53, 90, 94, 100, 110, 121, 134, 53, 73, 91, 84, 86, 93, 103, 115,
+ 90, 91, 104, 101, 101, 105, 113, 123, 94, 84, 101, 112, 117, 121, 127,
+ 135, 100, 86, 101, 117, 128, 135, 141, 148, 110, 93, 105, 121, 135, 145,
+ 153, 160, 121, 103, 113, 127, 141, 153, 163, 170, 134, 115, 123, 135,
+ 148, 160, 170, 179,
+ /* Size 16 */
+ 64, 58, 53, 66, 90, 92, 94, 97, 100, 105, 110, 115, 121, 127, 134, 134,
+ 58, 59, 61, 73, 90, 90, 89, 90, 92, 96, 101, 106, 112, 117, 123, 123,
+ 53, 61, 73, 81, 91, 87, 84, 85, 86, 89, 93, 98, 103, 109, 115, 115, 66,
+ 73, 81, 89, 97, 94, 92, 92, 93, 96, 99, 103, 108, 113, 119, 119, 90, 90,
+ 91, 97, 104, 103, 101, 101, 101, 103, 105, 109, 113, 118, 123, 123, 92,
+ 90, 87, 94, 103, 105, 107, 107, 108, 110, 113, 116, 120, 124, 128, 128,
+ 94, 89, 84, 92, 101, 107, 112, 115, 117, 119, 121, 124, 127, 131, 135,
+ 135, 97, 90, 85, 92, 101, 107, 115, 118, 122, 125, 128, 131, 134, 137,
+ 141, 141, 100, 92, 86, 93, 101, 108, 117, 122, 128, 131, 135, 138, 141,
+ 144, 148, 148, 105, 96, 89, 96, 103, 110, 119, 125, 131, 135, 140, 143,
+ 147, 150, 154, 154, 110, 101, 93, 99, 105, 113, 121, 128, 135, 140, 145,
+ 149, 153, 157, 160, 160, 115, 106, 98, 103, 109, 116, 124, 131, 138,
+ 143, 149, 153, 158, 161, 165, 165, 121, 112, 103, 108, 113, 120, 127,
+ 134, 141, 147, 153, 158, 163, 166, 170, 170, 127, 117, 109, 113, 118,
+ 124, 131, 137, 144, 150, 157, 161, 166, 170, 174, 174, 134, 123, 115,
+ 119, 123, 128, 135, 141, 148, 154, 160, 165, 170, 174, 179, 179, 134,
+ 123, 115, 119, 123, 128, 135, 141, 148, 154, 160, 165, 170, 174, 179,
+ 179,
+ /* Size 32 */
+ 64, 61, 58, 55, 53, 59, 66, 76, 90, 91, 92, 93, 94, 96, 97, 98, 100,
+ 102, 105, 107, 110, 112, 115, 118, 121, 124, 127, 131, 134, 134, 134,
+ 134, 61, 60, 59, 58, 57, 62, 70, 78, 90, 90, 91, 91, 91, 93, 94, 95, 96,
+ 98, 100, 103, 105, 108, 110, 113, 116, 119, 122, 125, 129, 129, 129,
+ 129, 58, 59, 59, 60, 61, 67, 73, 81, 90, 90, 90, 89, 89, 90, 90, 91, 92,
+ 94, 96, 98, 101, 103, 106, 109, 112, 114, 117, 120, 123, 123, 123, 123,
+ 55, 58, 60, 63, 67, 72, 77, 83, 91, 90, 88, 87, 86, 87, 87, 88, 89, 91,
+ 93, 95, 97, 99, 102, 104, 107, 110, 113, 116, 119, 119, 119, 119, 53,
+ 57, 61, 67, 73, 77, 81, 86, 91, 89, 87, 85, 84, 84, 85, 85, 86, 88, 89,
+ 91, 93, 95, 98, 100, 103, 106, 109, 111, 115, 115, 115, 115, 59, 62, 67,
+ 72, 77, 81, 85, 89, 94, 92, 91, 89, 87, 88, 88, 89, 89, 91, 92, 94, 96,
+ 98, 100, 103, 105, 108, 111, 114, 117, 117, 117, 117, 66, 70, 73, 77,
+ 81, 85, 89, 93, 97, 96, 94, 93, 92, 92, 92, 92, 93, 94, 96, 97, 99, 101,
+ 103, 105, 108, 110, 113, 116, 119, 119, 119, 119, 76, 78, 81, 83, 86,
+ 89, 93, 97, 101, 99, 98, 97, 96, 96, 96, 97, 97, 98, 99, 101, 102, 104,
+ 106, 108, 110, 113, 115, 118, 121, 121, 121, 121, 90, 90, 90, 91, 91,
+ 94, 97, 101, 104, 103, 103, 102, 101, 101, 101, 101, 101, 102, 103, 104,
+ 105, 107, 109, 111, 113, 115, 118, 120, 123, 123, 123, 123, 91, 90, 90,
+ 90, 89, 92, 96, 99, 103, 104, 104, 104, 104, 104, 104, 104, 105, 106,
+ 107, 108, 109, 111, 112, 114, 116, 118, 121, 123, 126, 126, 126, 126,
+ 92, 91, 90, 88, 87, 91, 94, 98, 103, 104, 105, 106, 107, 107, 107, 108,
+ 108, 109, 110, 112, 113, 114, 116, 118, 120, 122, 124, 126, 128, 128,
+ 128, 128, 93, 91, 89, 87, 85, 89, 93, 97, 102, 104, 106, 107, 109, 110,
+ 111, 112, 112, 113, 115, 116, 117, 118, 120, 121, 123, 125, 127, 129,
+ 131, 131, 131, 131, 94, 91, 89, 86, 84, 87, 92, 96, 101, 104, 107, 109,
+ 112, 113, 115, 116, 117, 118, 119, 120, 121, 122, 124, 125, 127, 129,
+ 131, 133, 135, 135, 135, 135, 96, 93, 90, 87, 84, 88, 92, 96, 101, 104,
+ 107, 110, 113, 115, 116, 118, 119, 121, 122, 123, 124, 126, 127, 129,
+ 130, 132, 134, 136, 138, 138, 138, 138, 97, 94, 90, 87, 85, 88, 92, 96,
+ 101, 104, 107, 111, 115, 116, 118, 120, 122, 123, 125, 126, 128, 129,
+ 131, 132, 134, 135, 137, 139, 141, 141, 141, 141, 98, 95, 91, 88, 85,
+ 89, 92, 97, 101, 104, 108, 112, 116, 118, 120, 122, 125, 126, 128, 130,
+ 131, 133, 134, 136, 137, 139, 141, 142, 144, 144, 144, 144, 100, 96, 92,
+ 89, 86, 89, 93, 97, 101, 105, 108, 112, 117, 119, 122, 125, 128, 130,
+ 131, 133, 135, 136, 138, 140, 141, 143, 144, 146, 148, 148, 148, 148,
+ 102, 98, 94, 91, 88, 91, 94, 98, 102, 106, 109, 113, 118, 121, 123, 126,
+ 130, 131, 133, 135, 137, 139, 141, 142, 144, 146, 147, 149, 151, 151,
+ 151, 151, 105, 100, 96, 93, 89, 92, 96, 99, 103, 107, 110, 115, 119,
+ 122, 125, 128, 131, 133, 135, 138, 140, 142, 143, 145, 147, 149, 150,
+ 152, 154, 154, 154, 154, 107, 103, 98, 95, 91, 94, 97, 101, 104, 108,
+ 112, 116, 120, 123, 126, 130, 133, 135, 138, 140, 143, 144, 146, 148,
+ 150, 152, 153, 155, 157, 157, 157, 157, 110, 105, 101, 97, 93, 96, 99,
+ 102, 105, 109, 113, 117, 121, 124, 128, 131, 135, 137, 140, 143, 145,
+ 147, 149, 151, 153, 155, 157, 158, 160, 160, 160, 160, 112, 108, 103,
+ 99, 95, 98, 101, 104, 107, 111, 114, 118, 122, 126, 129, 133, 136, 139,
+ 142, 144, 147, 149, 151, 153, 156, 157, 159, 161, 162, 162, 162, 162,
+ 115, 110, 106, 102, 98, 100, 103, 106, 109, 112, 116, 120, 124, 127,
+ 131, 134, 138, 141, 143, 146, 149, 151, 153, 156, 158, 160, 161, 163,
+ 165, 165, 165, 165, 118, 113, 109, 104, 100, 103, 105, 108, 111, 114,
+ 118, 121, 125, 129, 132, 136, 140, 142, 145, 148, 151, 153, 156, 158,
+ 160, 162, 164, 166, 168, 168, 168, 168, 121, 116, 112, 107, 103, 105,
+ 108, 110, 113, 116, 120, 123, 127, 130, 134, 137, 141, 144, 147, 150,
+ 153, 156, 158, 160, 163, 165, 166, 168, 170, 170, 170, 170, 124, 119,
+ 114, 110, 106, 108, 110, 113, 115, 118, 122, 125, 129, 132, 135, 139,
+ 143, 146, 149, 152, 155, 157, 160, 162, 165, 166, 168, 170, 172, 172,
+ 172, 172, 127, 122, 117, 113, 109, 111, 113, 115, 118, 121, 124, 127,
+ 131, 134, 137, 141, 144, 147, 150, 153, 157, 159, 161, 164, 166, 168,
+ 170, 172, 174, 174, 174, 174, 131, 125, 120, 116, 111, 114, 116, 118,
+ 120, 123, 126, 129, 133, 136, 139, 142, 146, 149, 152, 155, 158, 161,
+ 163, 166, 168, 170, 172, 174, 177, 177, 177, 177, 134, 129, 123, 119,
+ 115, 117, 119, 121, 123, 126, 128, 131, 135, 138, 141, 144, 148, 151,
+ 154, 157, 160, 162, 165, 168, 170, 172, 174, 177, 179, 179, 179, 179,
+ 134, 129, 123, 119, 115, 117, 119, 121, 123, 126, 128, 131, 135, 138,
+ 141, 144, 148, 151, 154, 157, 160, 162, 165, 168, 170, 172, 174, 177,
+ 179, 179, 179, 179, 134, 129, 123, 119, 115, 117, 119, 121, 123, 126,
+ 128, 131, 135, 138, 141, 144, 148, 151, 154, 157, 160, 162, 165, 168,
+ 170, 172, 174, 177, 179, 179, 179, 179, 134, 129, 123, 119, 115, 117,
+ 119, 121, 123, 126, 128, 131, 135, 138, 141, 144, 148, 151, 154, 157,
+ 160, 162, 165, 168, 170, 172, 174, 177, 179, 179, 179, 179 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 31, 47, 51, 63, 47, 56, 59, 67, 51, 59, 74, 82, 63, 67, 82, 95,
+ /* Size 8 */
+ 33, 27, 47, 50, 53, 58, 65, 72, 27, 38, 48, 44, 45, 49, 55, 61, 47, 48,
+ 55, 54, 53, 56, 60, 66, 50, 44, 54, 60, 62, 65, 68, 73, 53, 45, 53, 62,
+ 69, 73, 76, 80, 58, 49, 56, 65, 73, 79, 84, 88, 65, 55, 60, 68, 76, 84,
+ 89, 94, 72, 61, 66, 73, 80, 88, 94, 99,
+ /* Size 16 */
+ 32, 29, 26, 33, 46, 47, 48, 50, 51, 54, 57, 60, 63, 67, 70, 70, 29, 30,
+ 31, 37, 46, 46, 45, 46, 47, 50, 52, 55, 58, 61, 65, 65, 26, 31, 37, 41,
+ 47, 45, 43, 43, 44, 46, 48, 50, 53, 56, 60, 60, 33, 37, 41, 45, 50, 48,
+ 47, 47, 48, 49, 51, 53, 56, 59, 62, 62, 46, 46, 47, 50, 54, 53, 52, 52,
+ 52, 53, 55, 57, 59, 61, 64, 64, 47, 46, 45, 48, 53, 54, 55, 56, 56, 57,
+ 59, 60, 62, 65, 67, 67, 48, 45, 43, 47, 52, 55, 58, 60, 61, 62, 63, 65,
+ 67, 69, 71, 71, 50, 46, 43, 47, 52, 56, 60, 62, 64, 65, 67, 69, 70, 72,
+ 74, 74, 51, 47, 44, 48, 52, 56, 61, 64, 67, 69, 71, 73, 75, 76, 78, 78,
+ 54, 50, 46, 49, 53, 57, 62, 65, 69, 71, 74, 76, 78, 80, 82, 82, 57, 52,
+ 48, 51, 55, 59, 63, 67, 71, 74, 77, 79, 82, 84, 86, 86, 60, 55, 50, 53,
+ 57, 60, 65, 69, 73, 76, 79, 82, 84, 86, 89, 89, 63, 58, 53, 56, 59, 62,
+ 67, 70, 75, 78, 82, 84, 87, 89, 92, 92, 67, 61, 56, 59, 61, 65, 69, 72,
+ 76, 80, 84, 86, 89, 92, 94, 94, 70, 65, 60, 62, 64, 67, 71, 74, 78, 82,
+ 86, 89, 92, 94, 97, 97, 70, 65, 60, 62, 64, 67, 71, 74, 78, 82, 86, 89,
+ 92, 94, 97, 97,
+ /* Size 32 */
+ 32, 30, 29, 27, 26, 29, 33, 38, 45, 46, 47, 47, 48, 49, 49, 50, 51, 52,
+ 53, 55, 56, 58, 59, 61, 63, 64, 66, 68, 70, 70, 70, 70, 30, 30, 29, 29,
+ 28, 31, 35, 39, 46, 46, 46, 46, 46, 47, 47, 48, 49, 50, 51, 52, 54, 55,
+ 57, 58, 60, 61, 63, 65, 67, 67, 67, 67, 29, 29, 30, 30, 30, 33, 37, 41,
+ 46, 46, 45, 45, 45, 45, 46, 46, 47, 48, 49, 50, 51, 53, 54, 56, 57, 59,
+ 60, 62, 64, 64, 64, 64, 27, 29, 30, 32, 33, 36, 39, 42, 46, 45, 45, 44,
+ 43, 44, 44, 45, 45, 46, 47, 48, 49, 50, 52, 53, 55, 56, 58, 59, 61, 61,
+ 61, 61, 26, 28, 30, 33, 37, 39, 41, 43, 46, 45, 44, 43, 42, 42, 43, 43,
+ 43, 44, 45, 46, 47, 48, 50, 51, 53, 54, 56, 57, 59, 59, 59, 59, 29, 31,
+ 33, 36, 39, 41, 43, 45, 48, 47, 46, 45, 44, 44, 45, 45, 45, 46, 47, 48,
+ 49, 50, 51, 52, 54, 55, 57, 58, 60, 60, 60, 60, 33, 35, 37, 39, 41, 43,
+ 45, 47, 49, 49, 48, 47, 46, 47, 47, 47, 47, 48, 49, 49, 50, 51, 53, 54,
+ 55, 57, 58, 59, 61, 61, 61, 61, 38, 39, 41, 42, 43, 45, 47, 49, 51, 51,
+ 50, 49, 49, 49, 49, 49, 49, 50, 51, 51, 52, 53, 54, 55, 57, 58, 59, 61,
+ 62, 62, 62, 62, 45, 46, 46, 46, 46, 48, 49, 51, 53, 53, 52, 52, 52, 52,
+ 52, 51, 51, 52, 53, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 63, 63, 63,
+ 46, 46, 46, 45, 45, 47, 49, 51, 53, 53, 53, 53, 53, 53, 53, 53, 53, 54,
+ 55, 55, 56, 57, 58, 59, 60, 61, 62, 64, 65, 65, 65, 65, 47, 46, 45, 45,
+ 44, 46, 48, 50, 52, 53, 53, 54, 54, 55, 55, 55, 55, 56, 57, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 67, 67, 67, 67, 47, 46, 45, 44, 43, 45, 47, 49,
+ 52, 53, 54, 55, 56, 56, 57, 57, 58, 58, 59, 59, 60, 61, 62, 63, 64, 65,
+ 66, 67, 68, 68, 68, 68, 48, 46, 45, 43, 42, 44, 46, 49, 52, 53, 54, 56,
+ 58, 58, 59, 59, 60, 61, 61, 62, 62, 63, 64, 65, 66, 67, 68, 69, 70, 70,
+ 70, 70, 49, 47, 45, 44, 42, 44, 47, 49, 52, 53, 55, 56, 58, 59, 60, 61,
+ 62, 62, 63, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 72, 72, 72, 49, 47,
+ 46, 44, 43, 45, 47, 49, 52, 53, 55, 57, 59, 60, 61, 62, 63, 64, 64, 65,
+ 66, 67, 68, 69, 69, 70, 71, 72, 74, 74, 74, 74, 50, 48, 46, 45, 43, 45,
+ 47, 49, 51, 53, 55, 57, 59, 61, 62, 63, 65, 65, 66, 67, 68, 69, 70, 71,
+ 72, 72, 73, 74, 75, 75, 75, 75, 51, 49, 47, 45, 43, 45, 47, 49, 51, 53,
+ 55, 58, 60, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 76,
+ 77, 77, 77, 77, 52, 50, 48, 46, 44, 46, 48, 50, 52, 54, 56, 58, 61, 62,
+ 64, 65, 67, 68, 69, 70, 72, 72, 73, 74, 75, 76, 77, 78, 79, 79, 79, 79,
+ 53, 51, 49, 47, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 64, 66, 68, 69,
+ 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 81, 81, 81, 55, 52, 50, 48,
+ 46, 48, 49, 51, 53, 55, 57, 59, 62, 63, 65, 67, 69, 70, 72, 73, 75, 76,
+ 77, 78, 79, 80, 81, 82, 83, 83, 83, 83, 56, 54, 51, 49, 47, 49, 50, 52,
+ 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 73, 75, 76, 77, 78, 79, 81, 82,
+ 83, 84, 85, 85, 85, 85, 58, 55, 53, 50, 48, 50, 51, 53, 55, 57, 59, 61,
+ 63, 65, 67, 69, 71, 72, 74, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86, 86,
+ 86, 86, 59, 57, 54, 52, 50, 51, 53, 54, 56, 58, 60, 62, 64, 66, 68, 70,
+ 72, 73, 75, 77, 78, 80, 81, 82, 83, 84, 85, 86, 87, 87, 87, 87, 61, 58,
+ 56, 53, 51, 52, 54, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 74, 76, 78,
+ 79, 81, 82, 83, 85, 86, 87, 88, 89, 89, 89, 89, 63, 60, 57, 55, 53, 54,
+ 55, 57, 58, 60, 62, 64, 66, 68, 69, 72, 74, 75, 77, 79, 81, 82, 83, 85,
+ 86, 87, 88, 89, 91, 91, 91, 91, 64, 61, 59, 56, 54, 55, 57, 58, 59, 61,
+ 63, 65, 67, 69, 70, 72, 75, 76, 78, 80, 82, 83, 84, 86, 87, 88, 89, 91,
+ 92, 92, 92, 92, 66, 63, 60, 58, 56, 57, 58, 59, 61, 62, 64, 66, 68, 70,
+ 71, 73, 76, 77, 79, 81, 83, 84, 85, 87, 88, 89, 91, 92, 93, 93, 93, 93,
+ 68, 65, 62, 59, 57, 58, 59, 61, 62, 64, 65, 67, 69, 71, 72, 74, 76, 78,
+ 80, 82, 84, 85, 86, 88, 89, 91, 92, 93, 94, 94, 94, 94, 70, 67, 64, 61,
+ 59, 60, 61, 62, 63, 65, 67, 68, 70, 72, 74, 75, 77, 79, 81, 83, 85, 86,
+ 87, 89, 91, 92, 93, 94, 96, 96, 96, 96, 70, 67, 64, 61, 59, 60, 61, 62,
+ 63, 65, 67, 68, 70, 72, 74, 75, 77, 79, 81, 83, 85, 86, 87, 89, 91, 92,
+ 93, 94, 96, 96, 96, 96, 70, 67, 64, 61, 59, 60, 61, 62, 63, 65, 67, 68,
+ 70, 72, 74, 75, 77, 79, 81, 83, 85, 86, 87, 89, 91, 92, 93, 94, 96, 96,
+ 96, 96, 70, 67, 64, 61, 59, 60, 61, 62, 63, 65, 67, 68, 70, 72, 74, 75,
+ 77, 79, 81, 83, 85, 86, 87, 89, 91, 92, 93, 94, 96, 96, 96, 96 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 69, 104, 143, 69, 97, 124, 152, 104, 124, 156, 176, 143, 152, 176,
+ 192,
+ /* Size 8 */
+ 64, 50, 54, 68, 86, 104, 122, 137, 50, 56, 54, 62, 76, 93, 109, 125, 54,
+ 54, 72, 82, 92, 105, 118, 131, 68, 62, 82, 98, 110, 120, 130, 140, 86,
+ 76, 92, 110, 123, 133, 142, 150, 104, 93, 105, 120, 133, 144, 151, 158,
+ 122, 109, 118, 130, 142, 151, 158, 164, 137, 125, 131, 140, 150, 158,
+ 164, 169,
+ /* Size 16 */
+ 64, 56, 50, 52, 54, 60, 68, 76, 86, 94, 104, 112, 122, 129, 137, 137,
+ 56, 55, 53, 54, 54, 59, 65, 72, 81, 89, 98, 106, 115, 122, 130, 130, 50,
+ 53, 56, 55, 54, 58, 62, 69, 76, 84, 93, 100, 109, 116, 125, 125, 52, 54,
+ 55, 59, 62, 66, 71, 77, 83, 90, 98, 105, 114, 120, 128, 128, 54, 54, 54,
+ 62, 72, 77, 82, 87, 92, 98, 105, 111, 118, 124, 131, 131, 60, 59, 58,
+ 66, 77, 82, 89, 94, 100, 106, 112, 118, 124, 130, 136, 136, 68, 65, 62,
+ 71, 82, 89, 98, 104, 110, 115, 120, 125, 130, 135, 140, 140, 76, 72, 69,
+ 77, 87, 94, 104, 109, 116, 121, 126, 131, 136, 140, 145, 145, 86, 81,
+ 76, 83, 92, 100, 110, 116, 123, 128, 133, 138, 142, 146, 150, 150, 94,
+ 89, 84, 90, 98, 106, 115, 121, 128, 133, 138, 142, 146, 150, 153, 153,
+ 104, 98, 93, 98, 105, 112, 120, 126, 133, 138, 144, 147, 151, 154, 158,
+ 158, 112, 106, 100, 105, 111, 118, 125, 131, 138, 142, 147, 151, 155,
+ 158, 161, 161, 122, 115, 109, 114, 118, 124, 130, 136, 142, 146, 151,
+ 155, 158, 161, 164, 164, 129, 122, 116, 120, 124, 130, 135, 140, 146,
+ 150, 154, 158, 161, 164, 166, 166, 137, 130, 125, 128, 131, 136, 140,
+ 145, 150, 153, 158, 161, 164, 166, 169, 169, 137, 130, 125, 128, 131,
+ 136, 140, 145, 150, 153, 158, 161, 164, 166, 169, 169,
+ /* Size 32 */
+ 64, 60, 56, 53, 50, 51, 52, 53, 54, 57, 60, 64, 68, 72, 76, 80, 86, 90,
+ 94, 99, 104, 108, 112, 117, 122, 125, 129, 132, 137, 137, 137, 137, 60,
+ 58, 55, 53, 52, 52, 53, 53, 54, 57, 59, 63, 66, 70, 74, 78, 83, 87, 91,
+ 96, 101, 105, 109, 113, 118, 122, 125, 129, 133, 133, 133, 133, 56, 55,
+ 55, 54, 53, 53, 54, 54, 54, 56, 59, 62, 65, 68, 72, 76, 81, 84, 89, 93,
+ 98, 102, 106, 110, 115, 118, 122, 126, 130, 130, 130, 130, 53, 53, 54,
+ 54, 55, 55, 55, 54, 54, 56, 59, 61, 64, 67, 70, 74, 78, 82, 86, 90, 95,
+ 99, 103, 107, 112, 115, 119, 123, 127, 127, 127, 127, 50, 52, 53, 55,
+ 56, 56, 55, 55, 54, 56, 58, 60, 62, 65, 69, 72, 76, 80, 84, 88, 93, 96,
+ 100, 104, 109, 113, 116, 120, 125, 125, 125, 125, 51, 52, 53, 55, 56,
+ 56, 57, 57, 58, 60, 62, 64, 66, 69, 72, 76, 80, 83, 87, 91, 95, 99, 103,
+ 107, 111, 115, 118, 122, 126, 126, 126, 126, 52, 53, 54, 55, 55, 57, 59,
+ 60, 62, 64, 66, 68, 71, 73, 77, 80, 83, 87, 90, 94, 98, 102, 105, 109,
+ 114, 117, 120, 124, 128, 128, 128, 128, 53, 53, 54, 54, 55, 57, 60, 63,
+ 67, 69, 71, 73, 76, 78, 81, 84, 88, 91, 94, 98, 101, 105, 108, 112, 116,
+ 119, 122, 126, 129, 129, 129, 129, 54, 54, 54, 54, 54, 58, 62, 67, 72,
+ 74, 77, 79, 82, 84, 87, 89, 92, 95, 98, 101, 105, 108, 111, 115, 118,
+ 121, 124, 128, 131, 131, 131, 131, 57, 57, 56, 56, 56, 60, 64, 69, 74,
+ 77, 79, 82, 85, 88, 90, 93, 96, 99, 102, 105, 108, 111, 114, 118, 121,
+ 124, 127, 130, 133, 133, 133, 133, 60, 59, 59, 59, 58, 62, 66, 71, 77,
+ 79, 82, 86, 89, 92, 94, 97, 100, 103, 106, 109, 112, 115, 118, 121, 124,
+ 127, 130, 133, 136, 136, 136, 136, 64, 63, 62, 61, 60, 64, 68, 73, 79,
+ 82, 86, 89, 93, 96, 99, 102, 105, 107, 110, 113, 116, 118, 121, 124,
+ 127, 130, 132, 135, 138, 138, 138, 138, 68, 66, 65, 64, 62, 66, 71, 76,
+ 82, 85, 89, 93, 98, 101, 104, 107, 110, 112, 115, 117, 120, 122, 125,
+ 128, 130, 133, 135, 138, 140, 140, 140, 140, 72, 70, 68, 67, 65, 69, 73,
+ 78, 84, 88, 92, 96, 101, 104, 106, 110, 113, 115, 118, 120, 123, 125,
+ 128, 130, 133, 135, 138, 140, 143, 143, 143, 143, 76, 74, 72, 70, 69,
+ 72, 77, 81, 87, 90, 94, 99, 104, 106, 109, 113, 116, 119, 121, 124, 126,
+ 129, 131, 133, 136, 138, 140, 142, 145, 145, 145, 145, 80, 78, 76, 74,
+ 72, 76, 80, 84, 89, 93, 97, 102, 107, 110, 113, 116, 120, 122, 124, 127,
+ 130, 132, 134, 136, 139, 141, 143, 145, 147, 147, 147, 147, 86, 83, 81,
+ 78, 76, 80, 83, 88, 92, 96, 100, 105, 110, 113, 116, 120, 123, 126, 128,
+ 131, 133, 135, 138, 140, 142, 144, 146, 148, 150, 150, 150, 150, 90, 87,
+ 84, 82, 80, 83, 87, 91, 95, 99, 103, 107, 112, 115, 119, 122, 126, 128,
+ 131, 133, 136, 138, 140, 142, 144, 146, 148, 150, 151, 151, 151, 151,
+ 94, 91, 89, 86, 84, 87, 90, 94, 98, 102, 106, 110, 115, 118, 121, 124,
+ 128, 131, 133, 136, 138, 140, 142, 144, 146, 148, 150, 152, 153, 153,
+ 153, 153, 99, 96, 93, 90, 88, 91, 94, 98, 101, 105, 109, 113, 117, 120,
+ 124, 127, 131, 133, 136, 138, 141, 143, 145, 147, 149, 150, 152, 154,
+ 155, 155, 155, 155, 104, 101, 98, 95, 93, 95, 98, 101, 105, 108, 112,
+ 116, 120, 123, 126, 130, 133, 136, 138, 141, 144, 145, 147, 149, 151,
+ 153, 154, 156, 158, 158, 158, 158, 108, 105, 102, 99, 96, 99, 102, 105,
+ 108, 111, 115, 118, 122, 125, 129, 132, 135, 138, 140, 143, 145, 147,
+ 149, 151, 153, 154, 156, 158, 159, 159, 159, 159, 112, 109, 106, 103,
+ 100, 103, 105, 108, 111, 114, 118, 121, 125, 128, 131, 134, 138, 140,
+ 142, 145, 147, 149, 151, 153, 155, 156, 158, 159, 161, 161, 161, 161,
+ 117, 113, 110, 107, 104, 107, 109, 112, 115, 118, 121, 124, 128, 130,
+ 133, 136, 140, 142, 144, 147, 149, 151, 153, 155, 157, 158, 159, 161,
+ 162, 162, 162, 162, 122, 118, 115, 112, 109, 111, 114, 116, 118, 121,
+ 124, 127, 130, 133, 136, 139, 142, 144, 146, 149, 151, 153, 155, 157,
+ 158, 160, 161, 162, 164, 164, 164, 164, 125, 122, 118, 115, 113, 115,
+ 117, 119, 121, 124, 127, 130, 133, 135, 138, 141, 144, 146, 148, 150,
+ 153, 154, 156, 158, 160, 161, 162, 164, 165, 165, 165, 165, 129, 125,
+ 122, 119, 116, 118, 120, 122, 124, 127, 130, 132, 135, 138, 140, 143,
+ 146, 148, 150, 152, 154, 156, 158, 159, 161, 162, 164, 165, 166, 166,
+ 166, 166, 132, 129, 126, 123, 120, 122, 124, 126, 128, 130, 133, 135,
+ 138, 140, 142, 145, 148, 150, 152, 154, 156, 158, 159, 161, 162, 164,
+ 165, 166, 167, 167, 167, 167, 137, 133, 130, 127, 125, 126, 128, 129,
+ 131, 133, 136, 138, 140, 143, 145, 147, 150, 151, 153, 155, 158, 159,
+ 161, 162, 164, 165, 166, 167, 169, 169, 169, 169, 137, 133, 130, 127,
+ 125, 126, 128, 129, 131, 133, 136, 138, 140, 143, 145, 147, 150, 151,
+ 153, 155, 158, 159, 161, 162, 164, 165, 166, 167, 169, 169, 169, 169,
+ 137, 133, 130, 127, 125, 126, 128, 129, 131, 133, 136, 138, 140, 143,
+ 145, 147, 150, 151, 153, 155, 158, 159, 161, 162, 164, 165, 166, 167,
+ 169, 169, 169, 169, 137, 133, 130, 127, 125, 126, 128, 129, 131, 133,
+ 136, 138, 140, 143, 145, 147, 150, 151, 153, 155, 158, 159, 161, 162,
+ 164, 165, 166, 167, 169, 169, 169, 169 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 29, 32, 48, 68, 32, 45, 58, 73, 48, 58, 75, 86, 68, 73, 86, 94,
+ /* Size 8 */
+ 34, 26, 28, 36, 46, 57, 67, 77, 26, 30, 29, 33, 41, 50, 60, 69, 28, 29,
+ 39, 44, 50, 57, 65, 73, 36, 33, 44, 53, 60, 66, 73, 79, 46, 41, 50, 60,
+ 68, 75, 80, 85, 57, 50, 57, 66, 75, 81, 86, 90, 67, 60, 65, 73, 80, 86,
+ 90, 94, 77, 69, 73, 79, 85, 90, 94, 97,
+ /* Size 16 */
+ 33, 29, 26, 27, 28, 31, 35, 39, 45, 50, 55, 60, 65, 70, 74, 74, 29, 28,
+ 27, 27, 28, 30, 34, 37, 42, 47, 52, 56, 62, 66, 71, 71, 26, 27, 29, 28,
+ 28, 30, 32, 35, 40, 44, 49, 53, 58, 62, 67, 67, 27, 27, 28, 30, 32, 34,
+ 37, 40, 44, 47, 52, 56, 61, 65, 69, 69, 28, 28, 28, 32, 37, 40, 43, 45,
+ 49, 52, 56, 59, 64, 67, 71, 71, 31, 30, 30, 34, 40, 43, 47, 50, 53, 56,
+ 60, 63, 67, 70, 74, 74, 35, 34, 32, 37, 43, 47, 52, 55, 59, 61, 65, 67,
+ 71, 74, 77, 77, 39, 37, 35, 40, 45, 50, 55, 58, 62, 65, 68, 71, 74, 77,
+ 79, 79, 45, 42, 40, 44, 49, 53, 59, 62, 66, 69, 72, 75, 78, 80, 82, 82,
+ 50, 47, 44, 47, 52, 56, 61, 65, 69, 72, 75, 78, 80, 82, 85, 85, 55, 52,
+ 49, 52, 56, 60, 65, 68, 72, 75, 79, 81, 83, 85, 87, 87, 60, 56, 53, 56,
+ 59, 63, 67, 71, 75, 78, 81, 83, 85, 87, 89, 89, 65, 62, 58, 61, 64, 67,
+ 71, 74, 78, 80, 83, 85, 88, 89, 91, 91, 70, 66, 62, 65, 67, 70, 74, 77,
+ 80, 82, 85, 87, 89, 91, 93, 93, 74, 71, 67, 69, 71, 74, 77, 79, 82, 85,
+ 87, 89, 91, 93, 94, 94, 74, 71, 67, 69, 71, 74, 77, 79, 82, 85, 87, 89,
+ 91, 93, 94, 94,
+ /* Size 32 */
+ 33, 30, 28, 27, 25, 26, 26, 27, 27, 29, 30, 32, 35, 37, 39, 41, 44, 47,
+ 49, 52, 55, 57, 59, 62, 65, 67, 69, 71, 73, 73, 73, 73, 30, 29, 28, 27,
+ 26, 26, 27, 27, 27, 29, 30, 32, 34, 36, 38, 40, 43, 45, 47, 50, 53, 55,
+ 57, 60, 63, 65, 67, 69, 71, 71, 71, 71, 28, 28, 28, 27, 27, 27, 27, 27,
+ 27, 29, 30, 31, 33, 35, 37, 39, 42, 44, 46, 48, 51, 53, 56, 58, 61, 63,
+ 65, 67, 70, 70, 70, 70, 27, 27, 27, 27, 28, 28, 28, 27, 27, 28, 30, 31,
+ 32, 34, 36, 38, 40, 42, 44, 47, 50, 52, 54, 56, 59, 61, 63, 65, 68, 68,
+ 68, 68, 25, 26, 27, 28, 29, 28, 28, 28, 27, 28, 29, 30, 32, 33, 35, 37,
+ 39, 41, 43, 45, 48, 50, 52, 55, 57, 59, 61, 64, 66, 66, 66, 66, 26, 26,
+ 27, 28, 28, 29, 29, 29, 29, 30, 31, 33, 34, 35, 37, 39, 41, 43, 45, 47,
+ 50, 52, 54, 56, 59, 61, 63, 65, 67, 67, 67, 67, 26, 27, 27, 28, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 38, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57,
+ 60, 62, 64, 66, 68, 68, 68, 68, 27, 27, 27, 27, 28, 29, 31, 32, 34, 35,
+ 36, 38, 39, 40, 42, 44, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
+ 69, 69, 69, 69, 27, 27, 27, 27, 27, 29, 32, 34, 37, 38, 39, 41, 42, 43,
+ 45, 46, 48, 49, 51, 53, 55, 57, 59, 61, 63, 64, 66, 68, 70, 70, 70, 70,
+ 29, 29, 29, 28, 28, 30, 33, 35, 38, 39, 41, 42, 44, 45, 47, 48, 50, 52,
+ 53, 55, 57, 59, 60, 62, 64, 66, 68, 69, 71, 71, 71, 71, 30, 30, 30, 30,
+ 29, 31, 34, 36, 39, 41, 42, 44, 46, 48, 49, 51, 52, 54, 55, 57, 59, 61,
+ 62, 64, 66, 68, 69, 71, 73, 73, 73, 73, 32, 32, 31, 31, 30, 33, 35, 38,
+ 41, 42, 44, 46, 49, 50, 52, 53, 55, 56, 58, 59, 61, 63, 64, 66, 68, 69,
+ 71, 72, 74, 74, 74, 74, 35, 34, 33, 32, 32, 34, 36, 39, 42, 44, 46, 49,
+ 51, 53, 54, 56, 58, 59, 60, 62, 64, 65, 66, 68, 70, 71, 72, 74, 75, 75,
+ 75, 75, 37, 36, 35, 34, 33, 35, 38, 40, 43, 45, 48, 50, 53, 54, 56, 58,
+ 59, 61, 62, 64, 65, 67, 68, 70, 71, 73, 74, 75, 77, 77, 77, 77, 39, 38,
+ 37, 36, 35, 37, 39, 42, 45, 47, 49, 52, 54, 56, 58, 59, 61, 63, 64, 66,
+ 67, 69, 70, 71, 73, 74, 75, 77, 78, 78, 78, 78, 41, 40, 39, 38, 37, 39,
+ 41, 44, 46, 48, 51, 53, 56, 58, 59, 61, 63, 65, 66, 68, 69, 71, 72, 73,
+ 75, 76, 77, 78, 80, 80, 80, 80, 44, 43, 42, 40, 39, 41, 43, 45, 48, 50,
+ 52, 55, 58, 59, 61, 63, 65, 67, 68, 70, 71, 73, 74, 75, 76, 78, 79, 80,
+ 81, 81, 81, 81, 47, 45, 44, 42, 41, 43, 45, 47, 49, 52, 54, 56, 59, 61,
+ 63, 65, 67, 68, 70, 71, 73, 74, 75, 76, 78, 79, 80, 81, 82, 82, 82, 82,
+ 49, 47, 46, 44, 43, 45, 47, 49, 51, 53, 55, 58, 60, 62, 64, 66, 68, 70,
+ 71, 73, 74, 75, 77, 78, 79, 80, 81, 82, 83, 83, 83, 83, 52, 50, 48, 47,
+ 45, 47, 49, 51, 53, 55, 57, 59, 62, 64, 66, 68, 70, 71, 73, 74, 76, 77,
+ 78, 79, 81, 82, 83, 84, 85, 85, 85, 85, 55, 53, 51, 50, 48, 50, 51, 53,
+ 55, 57, 59, 61, 64, 65, 67, 69, 71, 73, 74, 76, 77, 79, 80, 81, 82, 83,
+ 84, 85, 86, 86, 86, 86, 57, 55, 53, 52, 50, 52, 53, 55, 57, 59, 61, 63,
+ 65, 67, 69, 71, 73, 74, 75, 77, 79, 80, 81, 82, 83, 84, 85, 86, 87, 87,
+ 87, 87, 59, 57, 56, 54, 52, 54, 55, 57, 59, 60, 62, 64, 66, 68, 70, 72,
+ 74, 75, 77, 78, 80, 81, 82, 83, 84, 85, 86, 87, 88, 88, 88, 88, 62, 60,
+ 58, 56, 55, 56, 57, 59, 61, 62, 64, 66, 68, 70, 71, 73, 75, 76, 78, 79,
+ 81, 82, 83, 84, 85, 86, 87, 88, 89, 89, 89, 89, 65, 63, 61, 59, 57, 59,
+ 60, 61, 63, 64, 66, 68, 70, 71, 73, 75, 76, 78, 79, 81, 82, 83, 84, 85,
+ 86, 87, 88, 89, 90, 90, 90, 90, 67, 65, 63, 61, 59, 61, 62, 63, 64, 66,
+ 68, 69, 71, 73, 74, 76, 78, 79, 80, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+ 90, 90, 90, 90, 69, 67, 65, 63, 61, 63, 64, 65, 66, 68, 69, 71, 72, 74,
+ 75, 77, 79, 80, 81, 83, 84, 85, 86, 87, 88, 89, 90, 90, 91, 91, 91, 91,
+ 71, 69, 67, 65, 64, 65, 66, 67, 68, 69, 71, 72, 74, 75, 77, 78, 80, 81,
+ 82, 84, 85, 86, 87, 88, 89, 90, 90, 91, 92, 92, 92, 92, 73, 71, 70, 68,
+ 66, 67, 68, 69, 70, 71, 73, 74, 75, 77, 78, 80, 81, 82, 83, 85, 86, 87,
+ 88, 89, 90, 90, 91, 92, 93, 93, 93, 93, 73, 71, 70, 68, 66, 67, 68, 69,
+ 70, 71, 73, 74, 75, 77, 78, 80, 81, 82, 83, 85, 86, 87, 88, 89, 90, 90,
+ 91, 92, 93, 93, 93, 93, 73, 71, 70, 68, 66, 67, 68, 69, 70, 71, 73, 74,
+ 75, 77, 78, 80, 81, 82, 83, 85, 86, 87, 88, 89, 90, 90, 91, 92, 93, 93,
+ 93, 93, 73, 71, 70, 68, 66, 67, 68, 69, 70, 71, 73, 74, 75, 77, 78, 80,
+ 81, 82, 83, 85, 86, 87, 88, 89, 90, 90, 91, 92, 93, 93, 93, 93 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 94, 100, 120, 94, 108, 114, 126, 100, 114, 136, 149, 120, 126, 149,
+ 166,
+ /* Size 8 */
+ 64, 53, 88, 92, 97, 105, 115, 126, 53, 73, 89, 82, 84, 91, 100, 109, 88,
+ 89, 100, 98, 98, 102, 108, 116, 92, 82, 98, 107, 111, 115, 120, 126, 97,
+ 84, 98, 111, 121, 127, 132, 137, 105, 91, 102, 115, 127, 135, 142, 147,
+ 115, 100, 108, 120, 132, 142, 149, 155, 126, 109, 116, 126, 137, 147,
+ 155, 162,
+ /* Size 16 */
+ 64, 58, 53, 66, 88, 90, 92, 94, 97, 101, 105, 110, 115, 120, 126, 126,
+ 58, 60, 61, 72, 88, 87, 87, 88, 90, 94, 97, 102, 107, 112, 117, 117, 53,
+ 61, 73, 80, 89, 85, 82, 83, 84, 87, 91, 95, 100, 104, 109, 109, 66, 72,
+ 80, 87, 94, 92, 89, 90, 90, 93, 96, 100, 104, 108, 113, 113, 88, 88, 89,
+ 94, 100, 99, 98, 98, 98, 100, 102, 105, 108, 112, 116, 116, 90, 87, 85,
+ 92, 99, 101, 102, 103, 104, 106, 108, 111, 114, 117, 121, 121, 92, 87,
+ 82, 89, 98, 102, 107, 109, 111, 113, 115, 117, 120, 123, 126, 126, 94,
+ 88, 83, 90, 98, 103, 109, 113, 116, 118, 121, 123, 126, 128, 132, 132,
+ 97, 90, 84, 90, 98, 104, 111, 116, 121, 124, 127, 129, 132, 134, 137,
+ 137, 101, 94, 87, 93, 100, 106, 113, 118, 124, 127, 131, 134, 136, 139,
+ 142, 142, 105, 97, 91, 96, 102, 108, 115, 121, 127, 131, 135, 138, 142,
+ 144, 147, 147, 110, 102, 95, 100, 105, 111, 117, 123, 129, 134, 138,
+ 142, 145, 148, 151, 151, 115, 107, 100, 104, 108, 114, 120, 126, 132,
+ 136, 142, 145, 149, 152, 155, 155, 120, 112, 104, 108, 112, 117, 123,
+ 128, 134, 139, 144, 148, 152, 155, 158, 158, 126, 117, 109, 113, 116,
+ 121, 126, 132, 137, 142, 147, 151, 155, 158, 162, 162, 126, 117, 109,
+ 113, 116, 121, 126, 132, 137, 142, 147, 151, 155, 158, 162, 162,
+ /* Size 32 */
+ 64, 61, 58, 56, 53, 59, 66, 75, 88, 89, 90, 91, 92, 93, 94, 95, 97, 99,
+ 101, 103, 105, 108, 110, 113, 115, 118, 120, 123, 126, 126, 126, 126,
+ 61, 60, 59, 58, 57, 63, 69, 77, 88, 88, 89, 89, 89, 90, 91, 92, 93, 95,
+ 97, 99, 101, 103, 106, 108, 111, 113, 116, 118, 121, 121, 121, 121, 58,
+ 59, 60, 61, 61, 67, 72, 80, 88, 88, 87, 87, 87, 88, 88, 89, 90, 92, 94,
+ 95, 97, 100, 102, 104, 107, 109, 112, 114, 117, 117, 117, 117, 56, 58,
+ 61, 63, 67, 71, 76, 82, 89, 88, 86, 85, 84, 85, 86, 86, 87, 89, 90, 92,
+ 94, 96, 98, 101, 103, 105, 108, 110, 113, 113, 113, 113, 53, 57, 61, 67,
+ 73, 76, 80, 84, 89, 87, 85, 84, 82, 83, 83, 84, 84, 86, 87, 89, 91, 93,
+ 95, 97, 100, 102, 104, 107, 109, 109, 109, 109, 59, 63, 67, 71, 76, 80,
+ 83, 87, 92, 90, 88, 87, 86, 86, 86, 87, 87, 89, 90, 92, 93, 95, 97, 99,
+ 102, 104, 106, 109, 111, 111, 111, 111, 66, 69, 72, 76, 80, 83, 87, 90,
+ 94, 93, 92, 91, 89, 90, 90, 90, 90, 92, 93, 94, 96, 98, 100, 102, 104,
+ 106, 108, 110, 113, 113, 113, 113, 75, 77, 80, 82, 84, 87, 90, 94, 97,
+ 96, 95, 94, 93, 94, 94, 94, 94, 95, 96, 97, 99, 100, 102, 104, 106, 108,
+ 110, 112, 115, 115, 115, 115, 88, 88, 88, 89, 89, 92, 94, 97, 100, 100,
+ 99, 99, 98, 98, 98, 98, 98, 99, 100, 101, 102, 103, 105, 106, 108, 110,
+ 112, 114, 116, 116, 116, 116, 89, 88, 88, 88, 87, 90, 93, 96, 100, 100,
+ 100, 100, 100, 100, 100, 101, 101, 102, 103, 104, 105, 106, 108, 109,
+ 111, 113, 115, 117, 119, 119, 119, 119, 90, 89, 87, 86, 85, 88, 92, 95,
+ 99, 100, 101, 102, 102, 103, 103, 104, 104, 105, 106, 107, 108, 109,
+ 111, 112, 114, 116, 117, 119, 121, 121, 121, 121, 91, 89, 87, 85, 84,
+ 87, 91, 94, 99, 100, 102, 103, 105, 106, 106, 107, 108, 109, 109, 110,
+ 111, 113, 114, 115, 117, 118, 120, 122, 124, 124, 124, 124, 92, 89, 87,
+ 84, 82, 86, 89, 93, 98, 100, 102, 105, 107, 108, 109, 110, 111, 112,
+ 113, 114, 115, 116, 117, 119, 120, 122, 123, 125, 126, 126, 126, 126,
+ 93, 90, 88, 85, 83, 86, 90, 94, 98, 100, 103, 106, 108, 110, 111, 112,
+ 114, 115, 116, 117, 118, 119, 120, 121, 123, 124, 126, 127, 129, 129,
+ 129, 129, 94, 91, 88, 86, 83, 86, 90, 94, 98, 100, 103, 106, 109, 111,
+ 113, 114, 116, 117, 118, 119, 121, 122, 123, 124, 126, 127, 128, 130,
+ 132, 132, 132, 132, 95, 92, 89, 86, 84, 87, 90, 94, 98, 101, 104, 107,
+ 110, 112, 114, 116, 118, 119, 121, 122, 124, 125, 126, 127, 129, 130,
+ 131, 133, 134, 134, 134, 134, 97, 93, 90, 87, 84, 87, 90, 94, 98, 101,
+ 104, 108, 111, 114, 116, 118, 121, 122, 124, 125, 127, 128, 129, 130,
+ 132, 133, 134, 136, 137, 137, 137, 137, 99, 95, 92, 89, 86, 89, 92, 95,
+ 99, 102, 105, 109, 112, 115, 117, 119, 122, 124, 125, 127, 129, 130,
+ 131, 133, 134, 135, 137, 138, 139, 139, 139, 139, 101, 97, 94, 90, 87,
+ 90, 93, 96, 100, 103, 106, 109, 113, 116, 118, 121, 124, 125, 127, 129,
+ 131, 132, 134, 135, 136, 138, 139, 140, 142, 142, 142, 142, 103, 99, 95,
+ 92, 89, 92, 94, 97, 101, 104, 107, 110, 114, 117, 119, 122, 125, 127,
+ 129, 131, 133, 134, 136, 137, 139, 140, 142, 143, 144, 144, 144, 144,
+ 105, 101, 97, 94, 91, 93, 96, 99, 102, 105, 108, 111, 115, 118, 121,
+ 124, 127, 129, 131, 133, 135, 137, 138, 140, 142, 143, 144, 146, 147,
+ 147, 147, 147, 108, 103, 100, 96, 93, 95, 98, 100, 103, 106, 109, 113,
+ 116, 119, 122, 125, 128, 130, 132, 134, 137, 138, 140, 142, 143, 145,
+ 146, 148, 149, 149, 149, 149, 110, 106, 102, 98, 95, 97, 100, 102, 105,
+ 108, 111, 114, 117, 120, 123, 126, 129, 131, 134, 136, 138, 140, 142,
+ 143, 145, 147, 148, 149, 151, 151, 151, 151, 113, 108, 104, 101, 97, 99,
+ 102, 104, 106, 109, 112, 115, 119, 121, 124, 127, 130, 133, 135, 137,
+ 140, 142, 143, 145, 147, 149, 150, 151, 153, 153, 153, 153, 115, 111,
+ 107, 103, 100, 102, 104, 106, 108, 111, 114, 117, 120, 123, 126, 129,
+ 132, 134, 136, 139, 142, 143, 145, 147, 149, 151, 152, 154, 155, 155,
+ 155, 155, 118, 113, 109, 105, 102, 104, 106, 108, 110, 113, 116, 118,
+ 122, 124, 127, 130, 133, 135, 138, 140, 143, 145, 147, 149, 151, 152,
+ 154, 155, 157, 157, 157, 157, 120, 116, 112, 108, 104, 106, 108, 110,
+ 112, 115, 117, 120, 123, 126, 128, 131, 134, 137, 139, 142, 144, 146,
+ 148, 150, 152, 154, 155, 157, 158, 158, 158, 158, 123, 118, 114, 110,
+ 107, 109, 110, 112, 114, 117, 119, 122, 125, 127, 130, 133, 136, 138,
+ 140, 143, 146, 148, 149, 151, 154, 155, 157, 158, 160, 160, 160, 160,
+ 126, 121, 117, 113, 109, 111, 113, 115, 116, 119, 121, 124, 126, 129,
+ 132, 134, 137, 139, 142, 144, 147, 149, 151, 153, 155, 157, 158, 160,
+ 162, 162, 162, 162, 126, 121, 117, 113, 109, 111, 113, 115, 116, 119,
+ 121, 124, 126, 129, 132, 134, 137, 139, 142, 144, 147, 149, 151, 153,
+ 155, 157, 158, 160, 162, 162, 162, 162, 126, 121, 117, 113, 109, 111,
+ 113, 115, 116, 119, 121, 124, 126, 129, 132, 134, 137, 139, 142, 144,
+ 147, 149, 151, 153, 155, 157, 158, 160, 162, 162, 162, 162, 126, 121,
+ 117, 113, 109, 111, 113, 115, 116, 119, 121, 124, 126, 129, 132, 134,
+ 137, 139, 142, 144, 147, 149, 151, 153, 155, 157, 158, 160, 162, 162,
+ 162, 162 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 33, 49, 53, 64, 49, 57, 60, 67, 53, 60, 73, 81, 64, 67, 81, 91,
+ /* Size 8 */
+ 35, 29, 49, 51, 54, 59, 65, 72, 29, 40, 50, 46, 47, 51, 56, 62, 49, 50,
+ 56, 55, 55, 57, 61, 66, 51, 46, 55, 61, 63, 65, 68, 72, 54, 47, 55, 63,
+ 69, 72, 76, 79, 59, 51, 57, 65, 72, 78, 82, 85, 65, 56, 61, 68, 76, 82,
+ 86, 90, 72, 62, 66, 72, 79, 85, 90, 94,
+ /* Size 16 */
+ 34, 31, 28, 36, 48, 49, 50, 52, 53, 55, 58, 61, 64, 67, 70, 70, 31, 32,
+ 33, 39, 48, 48, 47, 48, 49, 51, 53, 56, 59, 62, 65, 65, 28, 33, 39, 43,
+ 49, 47, 45, 45, 46, 48, 50, 52, 55, 57, 61, 61, 36, 39, 43, 47, 52, 50,
+ 49, 49, 49, 51, 53, 55, 57, 60, 63, 63, 48, 48, 49, 52, 55, 55, 54, 54,
+ 54, 55, 56, 58, 60, 62, 65, 65, 49, 48, 47, 50, 55, 55, 56, 57, 57, 58,
+ 60, 61, 63, 65, 68, 68, 50, 47, 45, 49, 54, 56, 59, 61, 62, 63, 64, 65,
+ 67, 69, 71, 71, 52, 48, 45, 49, 54, 57, 61, 62, 64, 66, 67, 69, 70, 72,
+ 74, 74, 53, 49, 46, 49, 54, 57, 62, 64, 67, 69, 71, 72, 74, 76, 77, 77,
+ 55, 51, 48, 51, 55, 58, 63, 66, 69, 71, 73, 75, 77, 78, 80, 80, 58, 53,
+ 50, 53, 56, 60, 64, 67, 71, 73, 76, 78, 80, 82, 83, 83, 61, 56, 52, 55,
+ 58, 61, 65, 69, 72, 75, 78, 80, 82, 84, 86, 86, 64, 59, 55, 57, 60, 63,
+ 67, 70, 74, 77, 80, 82, 85, 86, 88, 88, 67, 62, 57, 60, 62, 65, 69, 72,
+ 76, 78, 82, 84, 86, 88, 90, 90, 70, 65, 61, 63, 65, 68, 71, 74, 77, 80,
+ 83, 86, 88, 90, 92, 92, 70, 65, 61, 63, 65, 68, 71, 74, 77, 80, 83, 86,
+ 88, 90, 92, 92,
+ /* Size 32 */
+ 34, 32, 31, 29, 28, 31, 35, 40, 47, 48, 48, 49, 50, 50, 51, 52, 52, 54,
+ 55, 56, 57, 59, 60, 62, 63, 65, 66, 68, 70, 70, 70, 70, 32, 32, 31, 31,
+ 30, 33, 37, 42, 47, 48, 48, 48, 48, 49, 49, 50, 50, 52, 53, 54, 55, 56,
+ 58, 59, 61, 62, 64, 65, 67, 67, 67, 67, 31, 31, 32, 32, 33, 35, 39, 43,
+ 48, 47, 47, 47, 47, 47, 48, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 60,
+ 61, 63, 64, 64, 64, 64, 29, 31, 32, 34, 35, 38, 41, 44, 48, 47, 47, 46,
+ 45, 46, 46, 47, 47, 48, 49, 50, 51, 52, 53, 55, 56, 57, 59, 60, 62, 62,
+ 62, 62, 28, 30, 33, 35, 39, 41, 43, 45, 48, 47, 46, 45, 44, 44, 45, 45,
+ 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 57, 58, 60, 60, 60, 60, 31, 33,
+ 35, 38, 41, 43, 45, 47, 50, 49, 48, 47, 46, 46, 47, 47, 47, 48, 49, 50,
+ 50, 52, 53, 54, 55, 57, 58, 59, 61, 61, 61, 61, 35, 37, 39, 41, 43, 45,
+ 47, 49, 51, 50, 50, 49, 48, 48, 49, 49, 49, 50, 50, 51, 52, 53, 54, 55,
+ 57, 58, 59, 60, 62, 62, 62, 62, 40, 42, 43, 44, 45, 47, 49, 51, 53, 52,
+ 52, 51, 51, 51, 51, 51, 51, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62,
+ 63, 63, 63, 63, 47, 47, 48, 48, 48, 50, 51, 53, 55, 54, 54, 54, 53, 53,
+ 53, 53, 53, 54, 54, 55, 55, 56, 57, 58, 59, 60, 61, 63, 64, 64, 64, 64,
+ 48, 48, 47, 47, 47, 49, 50, 52, 54, 54, 54, 54, 54, 55, 55, 55, 55, 55,
+ 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 65, 65, 65, 48, 48, 47, 47,
+ 46, 48, 50, 52, 54, 54, 55, 55, 56, 56, 56, 56, 57, 57, 58, 58, 59, 60,
+ 61, 61, 62, 63, 65, 66, 67, 67, 67, 67, 49, 48, 47, 46, 45, 47, 49, 51,
+ 54, 54, 55, 56, 57, 58, 58, 58, 59, 59, 60, 60, 61, 62, 63, 63, 64, 65,
+ 66, 67, 68, 68, 68, 68, 50, 48, 47, 45, 44, 46, 48, 51, 53, 54, 56, 57,
+ 59, 59, 60, 60, 61, 62, 62, 63, 63, 64, 65, 65, 66, 67, 68, 69, 70, 70,
+ 70, 70, 50, 49, 47, 46, 44, 46, 48, 51, 53, 55, 56, 58, 59, 60, 61, 62,
+ 62, 63, 63, 64, 65, 65, 66, 67, 68, 69, 69, 70, 71, 71, 71, 71, 51, 49,
+ 48, 46, 45, 47, 49, 51, 53, 55, 56, 58, 60, 61, 62, 63, 64, 64, 65, 66,
+ 66, 67, 68, 69, 69, 70, 71, 72, 73, 73, 73, 73, 52, 50, 48, 47, 45, 47,
+ 49, 51, 53, 55, 56, 58, 60, 62, 63, 64, 65, 66, 67, 67, 68, 69, 70, 70,
+ 71, 72, 73, 74, 75, 75, 75, 75, 52, 50, 49, 47, 45, 47, 49, 51, 53, 55,
+ 57, 59, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 72, 73, 74, 75, 76,
+ 76, 76, 76, 76, 54, 52, 50, 48, 46, 48, 50, 51, 54, 55, 57, 59, 62, 63,
+ 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 78, 78, 78,
+ 55, 53, 51, 49, 47, 49, 50, 52, 54, 56, 58, 60, 62, 63, 65, 67, 68, 69,
+ 70, 71, 73, 73, 74, 75, 76, 77, 78, 78, 79, 79, 79, 79, 56, 54, 52, 50,
+ 48, 50, 51, 53, 55, 56, 58, 60, 63, 64, 66, 67, 69, 70, 71, 73, 74, 75,
+ 76, 77, 77, 78, 79, 80, 81, 81, 81, 81, 57, 55, 53, 51, 49, 50, 52, 54,
+ 55, 57, 59, 61, 63, 65, 66, 68, 70, 71, 73, 74, 75, 76, 77, 78, 79, 80,
+ 81, 82, 82, 82, 82, 82, 59, 56, 54, 52, 50, 52, 53, 55, 56, 58, 60, 62,
+ 64, 65, 67, 69, 71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 84,
+ 84, 84, 60, 58, 55, 53, 51, 53, 54, 56, 57, 59, 61, 63, 65, 66, 68, 70,
+ 72, 73, 74, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 85, 85, 85, 62, 59,
+ 57, 55, 53, 54, 55, 57, 58, 60, 61, 63, 65, 67, 69, 70, 72, 74, 75, 77,
+ 78, 79, 80, 81, 82, 83, 84, 85, 86, 86, 86, 86, 63, 61, 58, 56, 54, 55,
+ 57, 58, 59, 61, 62, 64, 66, 68, 69, 71, 73, 75, 76, 77, 79, 80, 81, 82,
+ 84, 85, 86, 86, 87, 87, 87, 87, 65, 62, 60, 57, 55, 57, 58, 59, 60, 62,
+ 63, 65, 67, 69, 70, 72, 74, 75, 77, 78, 80, 81, 82, 83, 85, 86, 86, 87,
+ 88, 88, 88, 88, 66, 64, 61, 59, 57, 58, 59, 60, 61, 63, 65, 66, 68, 69,
+ 71, 73, 75, 76, 78, 79, 81, 82, 83, 84, 86, 86, 87, 88, 89, 89, 89, 89,
+ 68, 65, 63, 60, 58, 59, 60, 62, 63, 64, 66, 67, 69, 70, 72, 74, 76, 77,
+ 78, 80, 82, 83, 84, 85, 86, 87, 88, 89, 90, 90, 90, 90, 70, 67, 64, 62,
+ 60, 61, 62, 63, 64, 65, 67, 68, 70, 71, 73, 75, 76, 78, 79, 81, 82, 84,
+ 85, 86, 87, 88, 89, 90, 91, 91, 91, 91, 70, 67, 64, 62, 60, 61, 62, 63,
+ 64, 65, 67, 68, 70, 71, 73, 75, 76, 78, 79, 81, 82, 84, 85, 86, 87, 88,
+ 89, 90, 91, 91, 91, 91, 70, 67, 64, 62, 60, 61, 62, 63, 64, 65, 67, 68,
+ 70, 71, 73, 75, 76, 78, 79, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91, 91,
+ 91, 91, 70, 67, 64, 62, 60, 61, 62, 63, 64, 65, 67, 68, 70, 71, 73, 75,
+ 76, 78, 79, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91, 91, 91, 91 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 69, 100, 132, 69, 93, 117, 139, 100, 117, 142, 157, 132, 139, 157,
+ 169,
+ /* Size 8 */
+ 64, 51, 54, 67, 84, 100, 114, 126, 51, 57, 55, 62, 75, 89, 104, 116, 54,
+ 55, 71, 80, 89, 100, 111, 121, 67, 62, 80, 94, 104, 113, 121, 129, 84,
+ 75, 89, 104, 115, 123, 130, 136, 100, 89, 100, 113, 123, 131, 137, 142,
+ 114, 104, 111, 121, 130, 137, 142, 146, 126, 116, 121, 129, 136, 142,
+ 146, 150,
+ /* Size 16 */
+ 64, 57, 51, 53, 54, 60, 67, 75, 84, 91, 100, 106, 114, 119, 126, 126,
+ 57, 55, 54, 54, 55, 59, 65, 71, 79, 86, 94, 101, 108, 114, 121, 121, 51,
+ 54, 57, 56, 55, 58, 62, 68, 75, 82, 89, 96, 104, 109, 116, 116, 53, 54,
+ 56, 59, 62, 66, 70, 75, 82, 88, 94, 100, 107, 113, 119, 119, 54, 55, 55,
+ 62, 71, 76, 80, 84, 89, 94, 100, 105, 111, 116, 121, 121, 60, 59, 58,
+ 66, 76, 81, 87, 91, 96, 101, 106, 111, 116, 120, 125, 125, 67, 65, 62,
+ 70, 80, 87, 94, 99, 104, 108, 113, 116, 121, 125, 129, 129, 75, 71, 68,
+ 75, 84, 91, 99, 104, 109, 113, 118, 121, 125, 128, 132, 132, 84, 79, 75,
+ 82, 89, 96, 104, 109, 115, 119, 123, 126, 130, 133, 136, 136, 91, 86,
+ 82, 88, 94, 101, 108, 113, 119, 123, 127, 130, 133, 136, 139, 139, 100,
+ 94, 89, 94, 100, 106, 113, 118, 123, 127, 131, 134, 137, 139, 142, 142,
+ 106, 101, 96, 100, 105, 111, 116, 121, 126, 130, 134, 137, 140, 142,
+ 144, 144, 114, 108, 104, 107, 111, 116, 121, 125, 130, 133, 137, 140,
+ 142, 144, 146, 146, 119, 114, 109, 113, 116, 120, 125, 128, 133, 136,
+ 139, 142, 144, 146, 148, 148, 126, 121, 116, 119, 121, 125, 129, 132,
+ 136, 139, 142, 144, 146, 148, 150, 150, 126, 121, 116, 119, 121, 125,
+ 129, 132, 136, 139, 142, 144, 146, 148, 150, 150,
+ /* Size 32 */
+ 64, 60, 57, 54, 51, 52, 53, 54, 54, 57, 60, 64, 67, 71, 75, 79, 84, 87,
+ 91, 95, 100, 103, 106, 110, 114, 117, 119, 122, 126, 126, 126, 126, 60,
+ 58, 56, 54, 52, 53, 53, 54, 55, 57, 60, 63, 66, 69, 73, 77, 81, 85, 88,
+ 92, 97, 100, 103, 107, 111, 114, 117, 120, 123, 123, 123, 123, 57, 56,
+ 55, 55, 54, 54, 54, 54, 55, 57, 59, 62, 65, 68, 71, 75, 79, 82, 86, 90,
+ 94, 97, 101, 104, 108, 111, 114, 117, 121, 121, 121, 121, 54, 54, 55,
+ 55, 55, 55, 55, 55, 55, 57, 59, 61, 64, 66, 70, 73, 77, 80, 84, 88, 92,
+ 95, 98, 102, 106, 109, 112, 115, 118, 118, 118, 118, 51, 52, 54, 55, 57,
+ 56, 56, 56, 55, 57, 58, 60, 62, 65, 68, 71, 75, 78, 82, 85, 89, 93, 96,
+ 100, 104, 106, 109, 113, 116, 116, 116, 116, 52, 53, 54, 55, 56, 57, 57,
+ 58, 58, 60, 62, 64, 66, 69, 72, 75, 78, 81, 84, 88, 92, 95, 98, 102,
+ 105, 108, 111, 114, 117, 117, 117, 117, 53, 53, 54, 55, 56, 57, 59, 61,
+ 62, 64, 66, 68, 70, 73, 75, 78, 82, 84, 88, 91, 94, 97, 100, 104, 107,
+ 110, 113, 116, 119, 119, 119, 119, 54, 54, 54, 55, 56, 58, 61, 63, 67,
+ 68, 70, 72, 75, 77, 80, 82, 85, 88, 91, 94, 97, 100, 103, 106, 109, 112,
+ 114, 117, 120, 120, 120, 120, 54, 55, 55, 55, 55, 58, 62, 67, 71, 73,
+ 76, 78, 80, 82, 84, 87, 89, 92, 94, 97, 100, 103, 105, 108, 111, 114,
+ 116, 119, 121, 121, 121, 121, 57, 57, 57, 57, 57, 60, 64, 68, 73, 76,
+ 78, 80, 83, 85, 88, 90, 92, 95, 97, 100, 103, 105, 108, 111, 113, 116,
+ 118, 121, 123, 123, 123, 123, 60, 60, 59, 59, 58, 62, 66, 70, 76, 78,
+ 81, 84, 87, 89, 91, 93, 96, 98, 101, 103, 106, 108, 111, 113, 116, 118,
+ 120, 122, 125, 125, 125, 125, 64, 63, 62, 61, 60, 64, 68, 72, 78, 80,
+ 84, 87, 90, 92, 95, 97, 100, 102, 104, 107, 109, 111, 113, 116, 118,
+ 120, 122, 124, 127, 127, 127, 127, 67, 66, 65, 64, 62, 66, 70, 75, 80,
+ 83, 87, 90, 94, 97, 99, 101, 104, 106, 108, 110, 113, 114, 116, 119,
+ 121, 123, 125, 127, 129, 129, 129, 129, 71, 69, 68, 66, 65, 69, 73, 77,
+ 82, 85, 89, 92, 97, 99, 101, 104, 107, 109, 111, 113, 115, 117, 119,
+ 121, 123, 125, 126, 128, 130, 130, 130, 130, 75, 73, 71, 70, 68, 72, 75,
+ 80, 84, 88, 91, 95, 99, 101, 104, 106, 109, 111, 113, 115, 118, 119,
+ 121, 123, 125, 127, 128, 130, 132, 132, 132, 132, 79, 77, 75, 73, 71,
+ 75, 78, 82, 87, 90, 93, 97, 101, 104, 106, 109, 112, 114, 116, 118, 120,
+ 122, 124, 126, 127, 129, 131, 132, 134, 134, 134, 134, 84, 81, 79, 77,
+ 75, 78, 82, 85, 89, 92, 96, 100, 104, 107, 109, 112, 115, 117, 119, 121,
+ 123, 125, 126, 128, 130, 131, 133, 134, 136, 136, 136, 136, 87, 85, 82,
+ 80, 78, 81, 84, 88, 92, 95, 98, 102, 106, 109, 111, 114, 117, 119, 121,
+ 123, 125, 127, 128, 130, 132, 133, 134, 136, 137, 137, 137, 137, 91, 88,
+ 86, 84, 82, 84, 88, 91, 94, 97, 101, 104, 108, 111, 113, 116, 119, 121,
+ 123, 125, 127, 129, 130, 132, 133, 135, 136, 137, 139, 139, 139, 139,
+ 95, 92, 90, 88, 85, 88, 91, 94, 97, 100, 103, 107, 110, 113, 115, 118,
+ 121, 123, 125, 127, 129, 131, 132, 134, 135, 136, 138, 139, 140, 140,
+ 140, 140, 100, 97, 94, 92, 89, 92, 94, 97, 100, 103, 106, 109, 113, 115,
+ 118, 120, 123, 125, 127, 129, 131, 133, 134, 135, 137, 138, 139, 141,
+ 142, 142, 142, 142, 103, 100, 97, 95, 93, 95, 97, 100, 103, 105, 108,
+ 111, 114, 117, 119, 122, 125, 127, 129, 131, 133, 134, 135, 137, 138,
+ 139, 141, 142, 143, 143, 143, 143, 106, 103, 101, 98, 96, 98, 100, 103,
+ 105, 108, 111, 113, 116, 119, 121, 124, 126, 128, 130, 132, 134, 135,
+ 137, 138, 140, 141, 142, 143, 144, 144, 144, 144, 110, 107, 104, 102,
+ 100, 102, 104, 106, 108, 111, 113, 116, 119, 121, 123, 126, 128, 130,
+ 132, 134, 135, 137, 138, 140, 141, 142, 143, 144, 145, 145, 145, 145,
+ 114, 111, 108, 106, 104, 105, 107, 109, 111, 113, 116, 118, 121, 123,
+ 125, 127, 130, 132, 133, 135, 137, 138, 140, 141, 142, 143, 144, 145,
+ 146, 146, 146, 146, 117, 114, 111, 109, 106, 108, 110, 112, 114, 116,
+ 118, 120, 123, 125, 127, 129, 131, 133, 135, 136, 138, 139, 141, 142,
+ 143, 144, 145, 146, 147, 147, 147, 147, 119, 117, 114, 112, 109, 111,
+ 113, 114, 116, 118, 120, 122, 125, 126, 128, 131, 133, 134, 136, 138,
+ 139, 141, 142, 143, 144, 145, 146, 147, 148, 148, 148, 148, 122, 120,
+ 117, 115, 113, 114, 116, 117, 119, 121, 122, 124, 127, 128, 130, 132,
+ 134, 136, 137, 139, 141, 142, 143, 144, 145, 146, 147, 148, 149, 149,
+ 149, 149, 126, 123, 121, 118, 116, 117, 119, 120, 121, 123, 125, 127,
+ 129, 130, 132, 134, 136, 137, 139, 140, 142, 143, 144, 145, 146, 147,
+ 148, 149, 150, 150, 150, 150, 126, 123, 121, 118, 116, 117, 119, 120,
+ 121, 123, 125, 127, 129, 130, 132, 134, 136, 137, 139, 140, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 150, 150, 150, 126, 123, 121, 118,
+ 116, 117, 119, 120, 121, 123, 125, 127, 129, 130, 132, 134, 136, 137,
+ 139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 150, 150, 150,
+ 126, 123, 121, 118, 116, 117, 119, 120, 121, 123, 125, 127, 129, 130,
+ 132, 134, 136, 137, 139, 140, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 150, 150, 150 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 32, 34, 51, 69, 34, 47, 60, 73, 51, 60, 75, 83, 69, 73, 83, 90,
+ /* Size 8 */
+ 37, 29, 31, 39, 49, 59, 68, 76, 29, 33, 31, 36, 44, 53, 61, 70, 31, 31,
+ 41, 47, 52, 59, 66, 73, 39, 36, 47, 56, 62, 67, 73, 78, 49, 44, 52, 62,
+ 69, 74, 79, 83, 59, 53, 59, 67, 74, 79, 83, 87, 68, 61, 66, 73, 79, 83,
+ 87, 90, 76, 70, 73, 78, 83, 87, 90, 92,
+ /* Size 16 */
+ 36, 32, 28, 29, 30, 34, 38, 42, 48, 52, 57, 61, 66, 70, 74, 74, 32, 31,
+ 30, 30, 30, 33, 36, 40, 45, 49, 54, 58, 63, 67, 71, 71, 28, 30, 32, 31,
+ 31, 33, 35, 38, 42, 46, 51, 55, 60, 64, 68, 68, 29, 30, 31, 33, 35, 37,
+ 40, 43, 46, 50, 54, 58, 62, 66, 69, 69, 30, 30, 31, 35, 40, 43, 45, 48,
+ 51, 54, 58, 61, 65, 68, 71, 71, 34, 33, 33, 37, 43, 46, 49, 52, 55, 58,
+ 61, 64, 68, 70, 73, 73, 38, 36, 35, 40, 45, 49, 54, 57, 60, 63, 65, 68,
+ 71, 73, 76, 76, 42, 40, 38, 43, 48, 52, 57, 60, 63, 66, 69, 71, 74, 76,
+ 78, 78, 48, 45, 42, 46, 51, 55, 60, 63, 67, 70, 72, 74, 77, 78, 80, 80,
+ 52, 49, 46, 50, 54, 58, 63, 66, 70, 72, 75, 77, 79, 81, 82, 82, 57, 54,
+ 51, 54, 58, 61, 65, 69, 72, 75, 77, 79, 81, 83, 84, 84, 61, 58, 55, 58,
+ 61, 64, 68, 71, 74, 77, 79, 81, 83, 84, 86, 86, 66, 63, 60, 62, 65, 68,
+ 71, 74, 77, 79, 81, 83, 85, 86, 87, 87, 70, 67, 64, 66, 68, 70, 73, 76,
+ 78, 81, 83, 84, 86, 87, 89, 89, 74, 71, 68, 69, 71, 73, 76, 78, 80, 82,
+ 84, 86, 87, 89, 90, 90, 74, 71, 68, 69, 71, 73, 76, 78, 80, 82, 84, 86,
+ 87, 89, 90, 90,
+ /* Size 32 */
+ 35, 33, 31, 30, 28, 28, 29, 29, 30, 31, 33, 35, 37, 39, 42, 44, 47, 49,
+ 51, 54, 57, 59, 61, 63, 65, 67, 69, 71, 73, 73, 73, 73, 33, 32, 31, 30,
+ 29, 29, 29, 30, 30, 31, 33, 35, 37, 39, 41, 43, 46, 48, 50, 52, 55, 57,
+ 59, 61, 64, 65, 67, 69, 71, 71, 71, 71, 31, 31, 30, 30, 30, 30, 30, 30,
+ 30, 31, 33, 34, 36, 38, 40, 42, 44, 46, 48, 51, 53, 55, 57, 60, 62, 64,
+ 66, 68, 70, 70, 70, 70, 30, 30, 30, 30, 30, 30, 30, 30, 30, 31, 32, 34,
+ 35, 37, 39, 41, 43, 45, 47, 49, 52, 54, 56, 58, 61, 62, 64, 66, 68, 68,
+ 68, 68, 28, 29, 30, 30, 31, 31, 31, 31, 30, 31, 32, 33, 34, 36, 38, 40,
+ 42, 44, 46, 48, 50, 52, 54, 57, 59, 61, 63, 65, 67, 67, 67, 67, 28, 29,
+ 30, 30, 31, 31, 32, 32, 32, 33, 34, 35, 37, 38, 40, 42, 44, 46, 47, 50,
+ 52, 54, 56, 58, 60, 62, 64, 66, 68, 68, 68, 68, 29, 29, 30, 30, 31, 32,
+ 33, 33, 34, 35, 37, 38, 39, 40, 42, 44, 46, 47, 49, 51, 53, 55, 57, 59,
+ 61, 63, 65, 67, 68, 68, 68, 68, 29, 30, 30, 30, 31, 32, 33, 35, 37, 38,
+ 39, 40, 42, 43, 45, 46, 48, 50, 51, 53, 55, 57, 59, 60, 62, 64, 66, 67,
+ 69, 69, 69, 69, 30, 30, 30, 30, 30, 32, 34, 37, 40, 41, 42, 43, 45, 46,
+ 47, 49, 50, 52, 53, 55, 57, 58, 60, 62, 64, 65, 67, 68, 70, 70, 70, 70,
+ 31, 31, 31, 31, 31, 33, 35, 38, 41, 42, 44, 45, 47, 48, 49, 51, 52, 54,
+ 55, 57, 59, 60, 62, 63, 65, 67, 68, 70, 71, 71, 71, 71, 33, 33, 33, 32,
+ 32, 34, 37, 39, 42, 44, 45, 47, 49, 50, 51, 53, 54, 56, 57, 59, 60, 62,
+ 63, 65, 67, 68, 69, 71, 72, 72, 72, 72, 35, 35, 34, 34, 33, 35, 38, 40,
+ 43, 45, 47, 49, 51, 52, 54, 55, 57, 58, 59, 61, 62, 64, 65, 67, 68, 69,
+ 71, 72, 74, 74, 74, 74, 37, 37, 36, 35, 34, 37, 39, 42, 45, 47, 49, 51,
+ 53, 55, 56, 58, 59, 61, 62, 63, 65, 66, 67, 68, 70, 71, 72, 73, 75, 75,
+ 75, 75, 39, 39, 38, 37, 36, 38, 40, 43, 46, 48, 50, 52, 55, 56, 58, 59,
+ 61, 62, 63, 65, 66, 67, 69, 70, 71, 72, 73, 75, 76, 76, 76, 76, 42, 41,
+ 40, 39, 38, 40, 42, 45, 47, 49, 51, 54, 56, 58, 59, 61, 63, 64, 65, 66,
+ 68, 69, 70, 71, 73, 74, 75, 76, 77, 77, 77, 77, 44, 43, 42, 41, 40, 42,
+ 44, 46, 49, 51, 53, 55, 58, 59, 61, 63, 64, 66, 67, 68, 69, 71, 72, 73,
+ 74, 75, 76, 77, 78, 78, 78, 78, 47, 46, 44, 43, 42, 44, 46, 48, 50, 52,
+ 54, 57, 59, 61, 63, 64, 66, 67, 69, 70, 71, 72, 73, 74, 76, 76, 77, 78,
+ 79, 79, 79, 79, 49, 48, 46, 45, 44, 46, 47, 50, 52, 54, 56, 58, 61, 62,
+ 64, 66, 67, 69, 70, 71, 73, 73, 75, 76, 77, 78, 78, 79, 80, 80, 80, 80,
+ 51, 50, 48, 47, 46, 47, 49, 51, 53, 55, 57, 59, 62, 63, 65, 67, 69, 70,
+ 71, 72, 74, 75, 76, 77, 78, 79, 79, 80, 81, 81, 81, 81, 54, 52, 51, 49,
+ 48, 50, 51, 53, 55, 57, 59, 61, 63, 65, 66, 68, 70, 71, 72, 74, 75, 76,
+ 77, 78, 79, 80, 81, 81, 82, 82, 82, 82, 57, 55, 53, 52, 50, 52, 53, 55,
+ 57, 59, 60, 62, 65, 66, 68, 69, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81,
+ 82, 82, 83, 83, 83, 83, 59, 57, 55, 54, 52, 54, 55, 57, 58, 60, 62, 64,
+ 66, 67, 69, 71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 82, 82, 83, 84, 84,
+ 84, 84, 61, 59, 57, 56, 54, 56, 57, 59, 60, 62, 63, 65, 67, 69, 70, 72,
+ 73, 75, 76, 77, 78, 79, 80, 81, 82, 83, 83, 84, 85, 85, 85, 85, 63, 61,
+ 60, 58, 57, 58, 59, 60, 62, 63, 65, 67, 68, 70, 71, 73, 74, 76, 77, 78,
+ 79, 80, 81, 82, 83, 83, 84, 85, 85, 85, 85, 85, 65, 64, 62, 61, 59, 60,
+ 61, 62, 64, 65, 67, 68, 70, 71, 73, 74, 76, 77, 78, 79, 80, 81, 82, 83,
+ 84, 84, 85, 86, 86, 86, 86, 86, 67, 65, 64, 62, 61, 62, 63, 64, 65, 67,
+ 68, 69, 71, 72, 74, 75, 76, 78, 79, 80, 81, 82, 83, 83, 84, 85, 86, 86,
+ 87, 87, 87, 87, 69, 67, 66, 64, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73,
+ 75, 76, 77, 78, 79, 81, 82, 82, 83, 84, 85, 86, 86, 87, 87, 87, 87, 87,
+ 71, 69, 68, 66, 65, 66, 67, 67, 68, 70, 71, 72, 73, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 86, 87, 87, 88, 88, 88, 88, 73, 71, 70, 68,
+ 67, 68, 68, 69, 70, 71, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
+ 85, 85, 86, 87, 87, 88, 89, 89, 89, 89, 73, 71, 70, 68, 67, 68, 68, 69,
+ 70, 71, 72, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 85, 86, 87,
+ 87, 88, 89, 89, 89, 89, 73, 71, 70, 68, 67, 68, 68, 69, 70, 71, 72, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 85, 86, 87, 87, 88, 89, 89,
+ 89, 89, 73, 71, 70, 68, 67, 68, 68, 69, 70, 71, 72, 74, 75, 76, 77, 78,
+ 79, 80, 81, 82, 83, 84, 85, 85, 86, 87, 87, 88, 89, 89, 89, 89 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 91, 97, 113, 91, 104, 108, 119, 97, 108, 127, 138, 113, 119, 138,
+ 151,
+ /* Size 8 */
+ 64, 54, 86, 89, 93, 101, 109, 118, 54, 72, 87, 81, 82, 88, 96, 104, 86,
+ 87, 97, 95, 94, 98, 103, 110, 89, 81, 95, 103, 106, 109, 113, 119, 93,
+ 82, 94, 106, 114, 119, 123, 127, 101, 88, 98, 109, 119, 126, 131, 135,
+ 109, 96, 103, 113, 123, 131, 137, 142, 118, 104, 110, 119, 127, 135,
+ 142, 147,
+ /* Size 16 */
+ 64, 58, 54, 66, 86, 87, 89, 91, 93, 97, 101, 105, 109, 114, 118, 118,
+ 58, 60, 62, 72, 86, 85, 85, 86, 88, 91, 94, 98, 102, 106, 111, 111, 54,
+ 62, 72, 79, 87, 84, 81, 81, 82, 85, 88, 92, 96, 100, 104, 104, 66, 72,
+ 79, 85, 91, 89, 87, 87, 88, 90, 93, 96, 100, 103, 107, 107, 86, 86, 87,
+ 91, 97, 96, 95, 94, 94, 96, 98, 100, 103, 107, 110, 110, 87, 85, 84, 89,
+ 96, 97, 98, 99, 100, 101, 103, 106, 108, 111, 114, 114, 89, 85, 81, 87,
+ 95, 98, 103, 104, 106, 108, 109, 111, 113, 116, 119, 119, 91, 86, 81,
+ 87, 94, 99, 104, 107, 110, 112, 114, 116, 118, 120, 123, 123, 93, 88,
+ 82, 88, 94, 100, 106, 110, 114, 116, 119, 121, 123, 125, 127, 127, 97,
+ 91, 85, 90, 96, 101, 108, 112, 116, 119, 122, 124, 127, 129, 131, 131,
+ 101, 94, 88, 93, 98, 103, 109, 114, 119, 122, 126, 128, 131, 133, 135,
+ 135, 105, 98, 92, 96, 100, 106, 111, 116, 121, 124, 128, 131, 134, 136,
+ 138, 138, 109, 102, 96, 100, 103, 108, 113, 118, 123, 127, 131, 134,
+ 137, 139, 142, 142, 114, 106, 100, 103, 107, 111, 116, 120, 125, 129,
+ 133, 136, 139, 142, 144, 144, 118, 111, 104, 107, 110, 114, 119, 123,
+ 127, 131, 135, 138, 142, 144, 147, 147, 118, 111, 104, 107, 110, 114,
+ 119, 123, 127, 131, 135, 138, 142, 144, 147, 147,
+ /* Size 32 */
+ 64, 61, 58, 56, 54, 59, 66, 75, 86, 86, 87, 88, 89, 90, 91, 92, 93, 95,
+ 97, 99, 101, 103, 105, 107, 109, 111, 114, 116, 118, 118, 118, 118, 61,
+ 60, 59, 58, 57, 63, 69, 76, 86, 86, 86, 87, 87, 88, 89, 90, 90, 92, 94,
+ 96, 97, 99, 101, 103, 106, 108, 110, 112, 114, 114, 114, 114, 58, 59,
+ 60, 61, 62, 66, 72, 78, 86, 86, 85, 85, 85, 85, 86, 87, 88, 89, 91, 92,
+ 94, 96, 98, 100, 102, 104, 106, 109, 111, 111, 111, 111, 56, 58, 61, 63,
+ 66, 71, 75, 80, 86, 85, 84, 84, 83, 83, 84, 84, 85, 86, 88, 89, 91, 93,
+ 95, 97, 99, 101, 103, 105, 108, 108, 108, 108, 54, 57, 62, 66, 72, 75,
+ 79, 83, 87, 85, 84, 82, 81, 81, 81, 82, 82, 84, 85, 87, 88, 90, 92, 94,
+ 96, 98, 100, 102, 104, 104, 104, 104, 59, 63, 66, 71, 75, 78, 82, 85,
+ 89, 88, 86, 85, 84, 84, 84, 85, 85, 86, 88, 89, 90, 92, 94, 96, 98, 100,
+ 102, 104, 106, 106, 106, 106, 66, 69, 72, 75, 79, 82, 85, 88, 91, 90,
+ 89, 88, 87, 87, 87, 88, 88, 89, 90, 91, 93, 94, 96, 98, 100, 101, 103,
+ 105, 107, 107, 107, 107, 75, 76, 78, 80, 83, 85, 88, 91, 94, 93, 92, 91,
+ 91, 91, 91, 91, 91, 92, 93, 94, 95, 97, 98, 100, 101, 103, 105, 107,
+ 109, 109, 109, 109, 86, 86, 86, 86, 87, 89, 91, 94, 97, 96, 96, 95, 95,
+ 94, 94, 94, 94, 95, 96, 97, 98, 99, 100, 102, 103, 105, 107, 109, 110,
+ 110, 110, 110, 86, 86, 86, 85, 85, 88, 90, 93, 96, 96, 96, 96, 96, 97,
+ 97, 97, 97, 98, 99, 99, 100, 102, 103, 104, 106, 107, 109, 111, 112,
+ 112, 112, 112, 87, 86, 85, 84, 84, 86, 89, 92, 96, 96, 97, 98, 98, 99,
+ 99, 100, 100, 101, 101, 102, 103, 104, 106, 107, 108, 110, 111, 113,
+ 114, 114, 114, 114, 88, 87, 85, 84, 82, 85, 88, 91, 95, 96, 98, 99, 101,
+ 101, 102, 102, 103, 104, 104, 105, 106, 107, 108, 109, 111, 112, 113,
+ 115, 116, 116, 116, 116, 89, 87, 85, 83, 81, 84, 87, 91, 95, 96, 98,
+ 101, 103, 104, 104, 105, 106, 107, 108, 108, 109, 110, 111, 112, 113,
+ 115, 116, 117, 119, 119, 119, 119, 90, 88, 85, 83, 81, 84, 87, 91, 94,
+ 97, 99, 101, 104, 105, 106, 107, 108, 109, 110, 110, 111, 112, 113, 114,
+ 116, 117, 118, 119, 121, 121, 121, 121, 91, 89, 86, 84, 81, 84, 87, 91,
+ 94, 97, 99, 102, 104, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 122, 123, 123, 123, 123, 92, 90, 87, 84, 82, 85, 88,
+ 91, 94, 97, 100, 102, 105, 107, 108, 110, 112, 113, 114, 115, 116, 117,
+ 118, 119, 120, 122, 123, 124, 125, 125, 125, 125, 93, 90, 88, 85, 82,
+ 85, 88, 91, 94, 97, 100, 103, 106, 108, 110, 112, 114, 115, 116, 118,
+ 119, 120, 121, 122, 123, 124, 125, 126, 127, 127, 127, 127, 95, 92, 89,
+ 86, 84, 86, 89, 92, 95, 98, 101, 104, 107, 109, 111, 113, 115, 116, 118,
+ 119, 120, 122, 123, 124, 125, 126, 127, 128, 129, 129, 129, 129, 97, 94,
+ 91, 88, 85, 88, 90, 93, 96, 99, 101, 104, 108, 110, 112, 114, 116, 118,
+ 119, 121, 122, 123, 124, 126, 127, 128, 129, 130, 131, 131, 131, 131,
+ 99, 96, 92, 89, 87, 89, 91, 94, 97, 99, 102, 105, 108, 110, 113, 115,
+ 118, 119, 121, 122, 124, 125, 126, 128, 129, 130, 131, 132, 133, 133,
+ 133, 133, 101, 97, 94, 91, 88, 90, 93, 95, 98, 100, 103, 106, 109, 111,
+ 114, 116, 119, 120, 122, 124, 126, 127, 128, 130, 131, 132, 133, 134,
+ 135, 135, 135, 135, 103, 99, 96, 93, 90, 92, 94, 97, 99, 102, 104, 107,
+ 110, 112, 115, 117, 120, 122, 123, 125, 127, 128, 130, 131, 132, 133,
+ 134, 136, 137, 137, 137, 137, 105, 101, 98, 95, 92, 94, 96, 98, 100,
+ 103, 106, 108, 111, 113, 116, 118, 121, 123, 124, 126, 128, 130, 131,
+ 132, 134, 135, 136, 137, 138, 138, 138, 138, 107, 103, 100, 97, 94, 96,
+ 98, 100, 102, 104, 107, 109, 112, 114, 117, 119, 122, 124, 126, 128,
+ 130, 131, 132, 134, 135, 136, 138, 139, 140, 140, 140, 140, 109, 106,
+ 102, 99, 96, 98, 100, 101, 103, 106, 108, 111, 113, 116, 118, 120, 123,
+ 125, 127, 129, 131, 132, 134, 135, 137, 138, 139, 140, 142, 142, 142,
+ 142, 111, 108, 104, 101, 98, 100, 101, 103, 105, 107, 110, 112, 115,
+ 117, 119, 122, 124, 126, 128, 130, 132, 133, 135, 136, 138, 139, 140,
+ 142, 143, 143, 143, 143, 114, 110, 106, 103, 100, 102, 103, 105, 107,
+ 109, 111, 113, 116, 118, 120, 123, 125, 127, 129, 131, 133, 134, 136,
+ 138, 139, 140, 142, 143, 144, 144, 144, 144, 116, 112, 109, 105, 102,
+ 104, 105, 107, 109, 111, 113, 115, 117, 119, 122, 124, 126, 128, 130,
+ 132, 134, 136, 137, 139, 140, 142, 143, 144, 145, 145, 145, 145, 118,
+ 114, 111, 108, 104, 106, 107, 109, 110, 112, 114, 116, 119, 121, 123,
+ 125, 127, 129, 131, 133, 135, 137, 138, 140, 142, 143, 144, 145, 147,
+ 147, 147, 147, 118, 114, 111, 108, 104, 106, 107, 109, 110, 112, 114,
+ 116, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 138, 140, 142,
+ 143, 144, 145, 147, 147, 147, 147, 118, 114, 111, 108, 104, 106, 107,
+ 109, 110, 112, 114, 116, 119, 121, 123, 125, 127, 129, 131, 133, 135,
+ 137, 138, 140, 142, 143, 144, 145, 147, 147, 147, 147, 118, 114, 111,
+ 108, 104, 106, 107, 109, 110, 112, 114, 116, 119, 121, 123, 125, 127,
+ 129, 131, 133, 135, 137, 138, 140, 142, 143, 144, 145, 147, 147, 147,
+ 147 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 35, 51, 54, 64, 51, 58, 61, 67, 54, 61, 72, 79, 64, 67, 79, 87,
+ /* Size 8 */
+ 37, 31, 51, 53, 56, 60, 66, 71, 31, 42, 51, 48, 49, 52, 57, 63, 51, 51,
+ 58, 56, 56, 58, 62, 66, 53, 48, 56, 61, 64, 66, 68, 72, 56, 49, 56, 64,
+ 69, 72, 75, 77, 60, 52, 58, 66, 72, 76, 80, 83, 66, 57, 62, 68, 75, 80,
+ 84, 87, 71, 63, 66, 72, 77, 83, 87, 90,
+ /* Size 16 */
+ 37, 33, 31, 38, 50, 51, 52, 53, 55, 57, 59, 62, 64, 67, 70, 70, 33, 34,
+ 35, 41, 50, 50, 49, 50, 51, 53, 55, 57, 60, 63, 65, 65, 31, 35, 41, 45,
+ 50, 48, 47, 47, 48, 49, 51, 54, 56, 59, 61, 61, 38, 41, 45, 49, 53, 52,
+ 51, 51, 51, 53, 54, 56, 58, 61, 63, 63, 50, 50, 50, 53, 57, 56, 55, 55,
+ 55, 56, 57, 59, 61, 63, 65, 65, 51, 50, 48, 52, 56, 57, 58, 58, 58, 59,
+ 60, 62, 64, 66, 68, 68, 52, 49, 47, 51, 55, 58, 60, 61, 62, 63, 64, 66,
+ 67, 69, 70, 70, 53, 50, 47, 51, 55, 58, 61, 63, 65, 66, 67, 69, 70, 71,
+ 73, 73, 55, 51, 48, 51, 55, 58, 62, 65, 67, 69, 70, 72, 73, 75, 76, 76,
+ 57, 53, 49, 53, 56, 59, 63, 66, 69, 71, 73, 74, 76, 77, 78, 78, 59, 55,
+ 51, 54, 57, 60, 64, 67, 70, 73, 75, 77, 78, 80, 81, 81, 62, 57, 54, 56,
+ 59, 62, 66, 69, 72, 74, 77, 78, 80, 82, 83, 83, 64, 60, 56, 58, 61, 64,
+ 67, 70, 73, 76, 78, 80, 82, 84, 85, 85, 67, 63, 59, 61, 63, 66, 69, 71,
+ 75, 77, 80, 82, 84, 85, 87, 87, 70, 65, 61, 63, 65, 68, 70, 73, 76, 78,
+ 81, 83, 85, 87, 89, 89, 70, 65, 61, 63, 65, 68, 70, 73, 76, 78, 81, 83,
+ 85, 87, 89, 89,
+ /* Size 32 */
+ 36, 35, 33, 32, 30, 33, 37, 43, 49, 50, 50, 51, 51, 52, 53, 53, 54, 55,
+ 56, 57, 58, 60, 61, 62, 64, 65, 66, 68, 69, 69, 69, 69, 35, 34, 33, 33,
+ 32, 35, 39, 44, 49, 49, 50, 50, 50, 50, 51, 52, 52, 53, 54, 55, 56, 58,
+ 59, 60, 62, 63, 64, 66, 67, 67, 67, 67, 33, 33, 34, 34, 35, 38, 41, 45,
+ 49, 49, 49, 49, 49, 49, 49, 50, 50, 51, 52, 53, 54, 56, 57, 58, 59, 61,
+ 62, 63, 65, 65, 65, 65, 32, 33, 34, 36, 38, 40, 43, 46, 50, 49, 48, 48,
+ 47, 48, 48, 48, 49, 50, 51, 51, 52, 54, 55, 56, 57, 59, 60, 61, 63, 63,
+ 63, 63, 30, 32, 35, 38, 41, 43, 45, 47, 50, 49, 48, 47, 46, 46, 47, 47,
+ 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 61, 61, 61, 61, 33, 35,
+ 38, 40, 43, 45, 47, 49, 51, 50, 50, 49, 48, 48, 48, 49, 49, 50, 50, 51,
+ 52, 53, 54, 55, 57, 58, 59, 60, 62, 62, 62, 62, 37, 39, 41, 43, 45, 47,
+ 49, 51, 53, 52, 51, 51, 50, 50, 50, 50, 51, 51, 52, 53, 54, 54, 56, 57,
+ 58, 59, 60, 61, 63, 63, 63, 63, 43, 44, 45, 46, 47, 49, 51, 52, 54, 54,
+ 53, 53, 52, 52, 52, 52, 52, 53, 54, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+ 63, 63, 63, 63, 49, 49, 49, 50, 50, 51, 53, 54, 56, 56, 55, 55, 55, 55,
+ 55, 55, 54, 55, 56, 56, 57, 57, 58, 59, 60, 61, 62, 63, 64, 64, 64, 64,
+ 50, 49, 49, 49, 49, 50, 52, 54, 56, 56, 56, 56, 56, 56, 56, 56, 56, 57,
+ 57, 58, 58, 59, 60, 61, 62, 63, 64, 65, 66, 66, 66, 66, 50, 50, 49, 48,
+ 48, 50, 51, 53, 55, 56, 56, 57, 57, 57, 57, 58, 58, 58, 59, 59, 60, 61,
+ 61, 62, 63, 64, 65, 66, 67, 67, 67, 67, 51, 50, 49, 48, 47, 49, 51, 53,
+ 55, 56, 57, 57, 58, 59, 59, 59, 60, 60, 61, 61, 62, 62, 63, 64, 65, 65,
+ 66, 67, 68, 68, 68, 68, 51, 50, 49, 47, 46, 48, 50, 52, 55, 56, 57, 58,
+ 60, 60, 61, 61, 62, 62, 63, 63, 64, 64, 65, 66, 66, 67, 68, 69, 70, 70,
+ 70, 70, 52, 50, 49, 48, 46, 48, 50, 52, 55, 56, 57, 59, 60, 61, 62, 62,
+ 63, 63, 64, 65, 65, 66, 66, 67, 68, 68, 69, 70, 71, 71, 71, 71, 53, 51,
+ 49, 48, 47, 48, 50, 52, 55, 56, 57, 59, 61, 62, 62, 63, 64, 65, 65, 66,
+ 67, 67, 68, 69, 69, 70, 71, 71, 72, 72, 72, 72, 53, 52, 50, 48, 47, 49,
+ 50, 52, 55, 56, 58, 59, 61, 62, 63, 64, 65, 66, 67, 67, 68, 69, 69, 70,
+ 71, 71, 72, 73, 74, 74, 74, 74, 54, 52, 50, 49, 47, 49, 51, 52, 54, 56,
+ 58, 60, 62, 63, 64, 65, 67, 67, 68, 69, 70, 70, 71, 72, 72, 73, 74, 74,
+ 75, 75, 75, 75, 55, 53, 51, 50, 48, 50, 51, 53, 55, 57, 58, 60, 62, 63,
+ 65, 66, 67, 68, 69, 70, 71, 71, 72, 73, 74, 74, 75, 76, 76, 76, 76, 76,
+ 56, 54, 52, 51, 49, 50, 52, 54, 56, 57, 59, 61, 63, 64, 65, 67, 68, 69,
+ 70, 71, 72, 73, 73, 74, 75, 76, 76, 77, 78, 78, 78, 78, 57, 55, 53, 51,
+ 50, 51, 53, 54, 56, 58, 59, 61, 63, 65, 66, 67, 69, 70, 71, 72, 73, 74,
+ 75, 75, 76, 77, 78, 78, 79, 79, 79, 79, 58, 56, 54, 52, 51, 52, 54, 55,
+ 57, 58, 60, 62, 64, 65, 67, 68, 70, 71, 72, 73, 74, 75, 76, 77, 77, 78,
+ 79, 80, 80, 80, 80, 80, 60, 58, 56, 54, 52, 53, 54, 56, 57, 59, 61, 62,
+ 64, 66, 67, 69, 70, 71, 73, 74, 75, 76, 77, 78, 78, 79, 80, 81, 81, 81,
+ 81, 81, 61, 59, 57, 55, 53, 54, 56, 57, 58, 60, 61, 63, 65, 66, 68, 69,
+ 71, 72, 73, 75, 76, 77, 78, 78, 79, 80, 81, 82, 82, 82, 82, 82, 62, 60,
+ 58, 56, 54, 55, 57, 58, 59, 61, 62, 64, 66, 67, 69, 70, 72, 73, 74, 75,
+ 77, 78, 78, 79, 80, 81, 82, 83, 83, 83, 83, 83, 64, 62, 59, 57, 55, 57,
+ 58, 59, 60, 62, 63, 65, 66, 68, 69, 71, 72, 74, 75, 76, 77, 78, 79, 80,
+ 81, 82, 83, 84, 84, 84, 84, 84, 65, 63, 61, 59, 57, 58, 59, 60, 61, 63,
+ 64, 65, 67, 68, 70, 71, 73, 74, 76, 77, 78, 79, 80, 81, 82, 83, 84, 84,
+ 85, 85, 85, 85, 66, 64, 62, 60, 58, 59, 60, 61, 62, 64, 65, 66, 68, 69,
+ 71, 72, 74, 75, 76, 78, 79, 80, 81, 82, 83, 84, 84, 85, 86, 86, 86, 86,
+ 68, 66, 63, 61, 59, 60, 61, 62, 63, 65, 66, 67, 69, 70, 71, 73, 74, 76,
+ 77, 78, 80, 81, 82, 83, 84, 84, 85, 86, 87, 87, 87, 87, 69, 67, 65, 63,
+ 61, 62, 63, 63, 64, 66, 67, 68, 70, 71, 72, 74, 75, 76, 78, 79, 80, 81,
+ 82, 83, 84, 85, 86, 87, 88, 88, 88, 88, 69, 67, 65, 63, 61, 62, 63, 63,
+ 64, 66, 67, 68, 70, 71, 72, 74, 75, 76, 78, 79, 80, 81, 82, 83, 84, 85,
+ 86, 87, 88, 88, 88, 88, 69, 67, 65, 63, 61, 62, 63, 63, 64, 66, 67, 68,
+ 70, 71, 72, 74, 75, 76, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 88,
+ 88, 88, 69, 67, 65, 63, 61, 62, 63, 63, 64, 66, 67, 68, 70, 71, 72, 74,
+ 75, 76, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 88, 88, 88 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 68, 96, 122, 68, 90, 110, 128, 96, 110, 130, 142, 122, 128, 142,
+ 150,
+ /* Size 8 */
+ 64, 52, 55, 67, 81, 95, 107, 116, 52, 58, 56, 63, 74, 86, 98, 108, 55,
+ 56, 71, 78, 86, 95, 104, 113, 67, 63, 78, 91, 99, 106, 112, 118, 81, 74,
+ 86, 99, 108, 114, 119, 124, 95, 86, 95, 106, 114, 120, 125, 128, 107,
+ 98, 104, 112, 119, 125, 129, 132, 116, 108, 113, 118, 124, 128, 132,
+ 134,
+ /* Size 16 */
+ 64, 57, 52, 53, 55, 61, 67, 74, 81, 88, 95, 100, 107, 111, 116, 116, 57,
+ 56, 55, 55, 55, 60, 65, 71, 77, 84, 91, 96, 102, 107, 112, 112, 52, 55,
+ 58, 57, 56, 59, 63, 68, 74, 80, 86, 92, 98, 103, 108, 108, 53, 55, 57,
+ 59, 62, 66, 70, 74, 80, 85, 91, 96, 101, 106, 110, 110, 55, 55, 56, 62,
+ 71, 74, 78, 82, 86, 91, 95, 100, 104, 108, 113, 113, 61, 60, 59, 66, 74,
+ 79, 84, 88, 92, 96, 100, 104, 108, 112, 115, 115, 67, 65, 63, 70, 78,
+ 84, 91, 94, 99, 102, 106, 109, 112, 115, 118, 118, 74, 71, 68, 74, 82,
+ 88, 94, 98, 103, 106, 110, 112, 116, 118, 121, 121, 81, 77, 74, 80, 86,
+ 92, 99, 103, 108, 111, 114, 117, 119, 121, 124, 124, 88, 84, 80, 85, 91,
+ 96, 102, 106, 111, 114, 117, 119, 122, 124, 126, 126, 95, 91, 86, 91,
+ 95, 100, 106, 110, 114, 117, 120, 122, 125, 126, 128, 128, 100, 96, 92,
+ 96, 100, 104, 109, 112, 117, 119, 122, 124, 127, 128, 130, 130, 107,
+ 102, 98, 101, 104, 108, 112, 116, 119, 122, 125, 127, 129, 130, 132,
+ 132, 111, 107, 103, 106, 108, 112, 115, 118, 121, 124, 126, 128, 130,
+ 131, 133, 133, 116, 112, 108, 110, 113, 115, 118, 121, 124, 126, 128,
+ 130, 132, 133, 134, 134, 116, 112, 108, 110, 113, 115, 118, 121, 124,
+ 126, 128, 130, 132, 133, 134, 134,
+ /* Size 32 */
+ 64, 60, 57, 55, 52, 53, 53, 54, 55, 58, 61, 64, 67, 70, 74, 77, 81, 84,
+ 88, 91, 95, 98, 100, 103, 107, 109, 111, 113, 116, 116, 116, 116, 60,
+ 58, 57, 55, 53, 54, 54, 55, 55, 58, 60, 63, 66, 69, 72, 76, 79, 82, 86,
+ 89, 93, 95, 98, 101, 104, 107, 109, 111, 114, 114, 114, 114, 57, 57, 56,
+ 55, 55, 55, 55, 55, 55, 57, 60, 62, 65, 68, 71, 74, 77, 80, 84, 87, 91,
+ 93, 96, 99, 102, 105, 107, 109, 112, 112, 112, 112, 55, 55, 55, 56, 56,
+ 56, 56, 56, 56, 57, 59, 61, 64, 66, 69, 72, 76, 79, 82, 85, 88, 91, 94,
+ 97, 100, 103, 105, 108, 110, 110, 110, 110, 52, 53, 55, 56, 58, 57, 57,
+ 56, 56, 57, 59, 61, 63, 65, 68, 71, 74, 77, 80, 83, 86, 89, 92, 95, 98,
+ 101, 103, 106, 108, 108, 108, 108, 53, 54, 55, 56, 57, 58, 58, 58, 59,
+ 60, 62, 64, 66, 68, 71, 74, 77, 79, 82, 85, 89, 91, 94, 97, 100, 102,
+ 104, 107, 109, 109, 109, 109, 53, 54, 55, 56, 57, 58, 59, 61, 62, 64,
+ 66, 68, 70, 72, 74, 77, 80, 82, 85, 88, 91, 93, 96, 98, 101, 103, 106,
+ 108, 110, 110, 110, 110, 54, 55, 55, 56, 56, 58, 61, 63, 66, 68, 70, 72,
+ 74, 76, 78, 80, 83, 85, 88, 90, 93, 95, 98, 100, 103, 105, 107, 109,
+ 112, 112, 112, 112, 55, 55, 55, 56, 56, 59, 62, 66, 71, 73, 74, 76, 78,
+ 80, 82, 84, 86, 88, 91, 93, 95, 97, 100, 102, 104, 106, 108, 110, 113,
+ 113, 113, 113, 58, 58, 57, 57, 57, 60, 64, 68, 73, 74, 77, 79, 81, 83,
+ 85, 87, 89, 91, 93, 95, 98, 100, 102, 104, 106, 108, 110, 112, 114, 114,
+ 114, 114, 61, 60, 60, 59, 59, 62, 66, 70, 74, 77, 79, 81, 84, 86, 88,
+ 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 113, 115, 115,
+ 115, 115, 64, 63, 62, 61, 61, 64, 68, 72, 76, 79, 81, 84, 87, 89, 91,
+ 93, 95, 97, 99, 101, 103, 105, 106, 108, 110, 112, 113, 115, 117, 117,
+ 117, 117, 67, 66, 65, 64, 63, 66, 70, 74, 78, 81, 84, 87, 91, 92, 94,
+ 96, 99, 100, 102, 104, 106, 107, 109, 110, 112, 114, 115, 117, 118, 118,
+ 118, 118, 70, 69, 68, 66, 65, 68, 72, 76, 80, 83, 86, 89, 92, 94, 96,
+ 99, 101, 102, 104, 106, 108, 109, 111, 112, 114, 115, 117, 118, 119,
+ 119, 119, 119, 74, 72, 71, 69, 68, 71, 74, 78, 82, 85, 88, 91, 94, 96,
+ 98, 101, 103, 105, 106, 108, 110, 111, 112, 114, 116, 117, 118, 119,
+ 121, 121, 121, 121, 77, 76, 74, 72, 71, 74, 77, 80, 84, 87, 90, 93, 96,
+ 99, 101, 103, 105, 107, 108, 110, 112, 113, 114, 116, 117, 118, 120,
+ 121, 122, 122, 122, 122, 81, 79, 77, 76, 74, 77, 80, 83, 86, 89, 92, 95,
+ 99, 101, 103, 105, 108, 109, 111, 112, 114, 115, 117, 118, 119, 120,
+ 121, 122, 124, 124, 124, 124, 84, 82, 80, 79, 77, 79, 82, 85, 88, 91,
+ 94, 97, 100, 102, 105, 107, 109, 111, 112, 114, 115, 117, 118, 119, 120,
+ 121, 123, 124, 125, 125, 125, 125, 88, 86, 84, 82, 80, 82, 85, 88, 91,
+ 93, 96, 99, 102, 104, 106, 108, 111, 112, 114, 115, 117, 118, 119, 121,
+ 122, 123, 124, 125, 126, 126, 126, 126, 91, 89, 87, 85, 83, 85, 88, 90,
+ 93, 95, 98, 101, 104, 106, 108, 110, 112, 114, 115, 117, 119, 120, 121,
+ 122, 123, 124, 125, 126, 127, 127, 127, 127, 95, 93, 91, 88, 86, 89, 91,
+ 93, 95, 98, 100, 103, 106, 108, 110, 112, 114, 115, 117, 119, 120, 121,
+ 122, 123, 125, 125, 126, 127, 128, 128, 128, 128, 98, 95, 93, 91, 89,
+ 91, 93, 95, 97, 100, 102, 105, 107, 109, 111, 113, 115, 117, 118, 120,
+ 121, 122, 123, 124, 126, 126, 127, 128, 129, 129, 129, 129, 100, 98, 96,
+ 94, 92, 94, 96, 98, 100, 102, 104, 106, 109, 111, 112, 114, 117, 118,
+ 119, 121, 122, 123, 124, 125, 127, 127, 128, 129, 130, 130, 130, 130,
+ 103, 101, 99, 97, 95, 97, 98, 100, 102, 104, 106, 108, 110, 112, 114,
+ 116, 118, 119, 121, 122, 123, 124, 125, 127, 128, 128, 129, 130, 131,
+ 131, 131, 131, 107, 104, 102, 100, 98, 100, 101, 103, 104, 106, 108,
+ 110, 112, 114, 116, 117, 119, 120, 122, 123, 125, 126, 127, 128, 129,
+ 129, 130, 131, 132, 132, 132, 132, 109, 107, 105, 103, 101, 102, 103,
+ 105, 106, 108, 110, 112, 114, 115, 117, 118, 120, 121, 123, 124, 125,
+ 126, 127, 128, 129, 130, 131, 131, 132, 132, 132, 132, 111, 109, 107,
+ 105, 103, 104, 106, 107, 108, 110, 112, 113, 115, 117, 118, 120, 121,
+ 123, 124, 125, 126, 127, 128, 129, 130, 131, 131, 132, 133, 133, 133,
+ 133, 113, 111, 109, 108, 106, 107, 108, 109, 110, 112, 113, 115, 117,
+ 118, 119, 121, 122, 124, 125, 126, 127, 128, 129, 130, 131, 131, 132,
+ 133, 134, 134, 134, 134, 116, 114, 112, 110, 108, 109, 110, 112, 113,
+ 114, 115, 117, 118, 119, 121, 122, 124, 125, 126, 127, 128, 129, 130,
+ 131, 132, 132, 133, 134, 134, 134, 134, 134, 116, 114, 112, 110, 108,
+ 109, 110, 112, 113, 114, 115, 117, 118, 119, 121, 122, 124, 125, 126,
+ 127, 128, 129, 130, 131, 132, 132, 133, 134, 134, 134, 134, 134, 116,
+ 114, 112, 110, 108, 109, 110, 112, 113, 114, 115, 117, 118, 119, 121,
+ 122, 124, 125, 126, 127, 128, 129, 130, 131, 132, 132, 133, 134, 134,
+ 134, 134, 134, 116, 114, 112, 110, 108, 109, 110, 112, 113, 114, 115,
+ 117, 118, 119, 121, 122, 124, 125, 126, 127, 128, 129, 130, 131, 132,
+ 132, 133, 134, 134, 134, 134, 134 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 35, 37, 53, 69, 37, 50, 61, 72, 53, 61, 74, 81, 69, 72, 81, 86,
+ /* Size 8 */
+ 40, 32, 34, 42, 51, 60, 68, 75, 32, 36, 34, 39, 46, 55, 63, 70, 34, 34,
+ 44, 49, 54, 61, 67, 73, 42, 39, 49, 57, 63, 68, 72, 76, 51, 46, 54, 63,
+ 69, 74, 77, 80, 60, 55, 61, 68, 74, 78, 81, 84, 68, 63, 67, 72, 77, 81,
+ 84, 86, 75, 70, 73, 76, 80, 84, 86, 88,
+ /* Size 16 */
+ 39, 35, 31, 32, 33, 37, 41, 45, 50, 54, 59, 63, 67, 70, 73, 73, 35, 34,
+ 33, 33, 33, 36, 39, 43, 47, 51, 56, 60, 64, 67, 70, 70, 31, 33, 35, 34,
+ 34, 36, 38, 41, 45, 49, 53, 57, 61, 64, 68, 68, 32, 33, 34, 36, 38, 40,
+ 42, 45, 49, 52, 56, 59, 63, 66, 69, 69, 33, 33, 34, 38, 43, 45, 48, 50,
+ 53, 56, 59, 62, 65, 68, 71, 71, 37, 36, 36, 40, 45, 48, 52, 54, 57, 60,
+ 62, 65, 68, 70, 73, 73, 41, 39, 38, 42, 48, 52, 56, 59, 61, 64, 66, 68,
+ 71, 73, 75, 75, 45, 43, 41, 45, 50, 54, 59, 61, 64, 66, 69, 71, 73, 75,
+ 77, 77, 50, 47, 45, 49, 53, 57, 61, 64, 67, 70, 72, 74, 75, 77, 78, 78,
+ 54, 51, 49, 52, 56, 60, 64, 66, 70, 72, 74, 76, 77, 79, 80, 80, 59, 56,
+ 53, 56, 59, 62, 66, 69, 72, 74, 76, 78, 79, 80, 82, 82, 63, 60, 57, 59,
+ 62, 65, 68, 71, 74, 76, 78, 79, 81, 82, 83, 83, 67, 64, 61, 63, 65, 68,
+ 71, 73, 75, 77, 79, 81, 82, 83, 84, 84, 70, 67, 64, 66, 68, 70, 73, 75,
+ 77, 79, 80, 82, 83, 84, 85, 85, 73, 70, 68, 69, 71, 73, 75, 77, 78, 80,
+ 82, 83, 84, 85, 86, 86, 73, 70, 68, 69, 71, 73, 75, 77, 78, 80, 82, 83,
+ 84, 85, 86, 86,
+ /* Size 32 */
+ 38, 36, 34, 32, 31, 31, 32, 32, 33, 34, 36, 38, 40, 42, 44, 47, 49, 51,
+ 53, 56, 58, 60, 62, 64, 66, 67, 69, 71, 72, 72, 72, 72, 36, 35, 34, 33,
+ 32, 32, 32, 33, 33, 34, 36, 38, 40, 41, 43, 46, 48, 50, 52, 54, 57, 59,
+ 60, 62, 64, 66, 68, 69, 71, 71, 71, 71, 34, 34, 33, 33, 32, 33, 33, 33,
+ 33, 34, 36, 37, 39, 41, 42, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65,
+ 66, 68, 70, 70, 70, 70, 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 35, 37,
+ 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 63, 65, 67, 68, 68,
+ 68, 68, 31, 32, 32, 33, 34, 34, 34, 33, 33, 34, 35, 36, 37, 39, 41, 43,
+ 45, 46, 48, 50, 53, 54, 56, 58, 60, 62, 64, 65, 67, 67, 67, 67, 31, 32,
+ 33, 33, 34, 34, 35, 35, 35, 36, 37, 38, 40, 41, 43, 44, 46, 48, 50, 52,
+ 54, 56, 57, 59, 61, 63, 64, 66, 68, 68, 68, 68, 32, 32, 33, 33, 34, 35,
+ 35, 36, 37, 38, 39, 41, 42, 43, 45, 46, 48, 50, 52, 53, 55, 57, 59, 61,
+ 62, 64, 65, 67, 69, 69, 69, 69, 32, 33, 33, 33, 33, 35, 36, 38, 40, 41,
+ 42, 43, 44, 46, 47, 49, 50, 52, 53, 55, 57, 58, 60, 62, 63, 65, 66, 68,
+ 69, 69, 69, 69, 33, 33, 33, 33, 33, 35, 37, 40, 43, 44, 45, 46, 47, 49,
+ 50, 51, 53, 54, 55, 57, 58, 60, 61, 63, 65, 66, 67, 69, 70, 70, 70, 70,
+ 34, 34, 34, 34, 34, 36, 38, 41, 44, 45, 46, 48, 49, 50, 52, 53, 54, 56,
+ 57, 59, 60, 61, 63, 64, 66, 67, 68, 70, 71, 71, 71, 71, 36, 36, 36, 35,
+ 35, 37, 39, 42, 45, 46, 48, 49, 51, 52, 54, 55, 56, 58, 59, 60, 62, 63,
+ 64, 66, 67, 68, 69, 71, 72, 72, 72, 72, 38, 38, 37, 37, 36, 38, 41, 43,
+ 46, 48, 49, 51, 53, 54, 56, 57, 58, 60, 61, 62, 63, 65, 66, 67, 68, 69,
+ 71, 72, 73, 73, 73, 73, 40, 40, 39, 38, 37, 40, 42, 44, 47, 49, 51, 53,
+ 55, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74, 74,
+ 74, 74, 42, 41, 41, 40, 39, 41, 43, 46, 49, 50, 52, 54, 57, 58, 59, 61,
+ 62, 63, 64, 65, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 75, 75, 44, 43,
+ 42, 42, 41, 43, 45, 47, 50, 52, 54, 56, 58, 59, 61, 62, 64, 65, 66, 67,
+ 68, 69, 70, 71, 72, 73, 74, 75, 76, 76, 76, 76, 47, 46, 45, 44, 43, 44,
+ 46, 49, 51, 53, 55, 57, 59, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72,
+ 73, 74, 75, 76, 77, 77, 77, 77, 49, 48, 47, 46, 45, 46, 48, 50, 53, 54,
+ 56, 58, 61, 62, 64, 65, 67, 68, 69, 70, 71, 72, 73, 74, 74, 75, 76, 77,
+ 78, 78, 78, 78, 51, 50, 49, 48, 46, 48, 50, 52, 54, 56, 58, 60, 62, 63,
+ 65, 66, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 78, 78, 78, 78,
+ 53, 52, 51, 50, 48, 50, 52, 53, 55, 57, 59, 61, 63, 64, 66, 67, 69, 70,
+ 71, 72, 73, 74, 75, 75, 76, 77, 78, 78, 79, 79, 79, 79, 56, 54, 53, 52,
+ 50, 52, 53, 55, 57, 59, 60, 62, 64, 65, 67, 68, 70, 71, 72, 73, 74, 75,
+ 76, 76, 77, 78, 79, 79, 80, 80, 80, 80, 58, 57, 55, 54, 53, 54, 55, 57,
+ 58, 60, 62, 63, 65, 67, 68, 69, 71, 72, 73, 74, 75, 76, 77, 77, 78, 79,
+ 79, 80, 81, 81, 81, 81, 60, 59, 57, 56, 54, 56, 57, 58, 60, 61, 63, 65,
+ 66, 68, 69, 70, 72, 73, 74, 75, 76, 77, 77, 78, 79, 79, 80, 81, 81, 81,
+ 81, 81, 62, 60, 59, 58, 56, 57, 59, 60, 61, 63, 64, 66, 67, 69, 70, 71,
+ 73, 74, 75, 76, 77, 77, 78, 79, 80, 80, 81, 81, 82, 82, 82, 82, 64, 62,
+ 61, 60, 58, 59, 61, 62, 63, 64, 66, 67, 69, 70, 71, 72, 74, 75, 75, 76,
+ 77, 78, 79, 80, 80, 81, 81, 82, 82, 82, 82, 82, 66, 64, 63, 62, 60, 61,
+ 62, 63, 65, 66, 67, 68, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 80,
+ 81, 82, 82, 83, 83, 83, 83, 83, 67, 66, 65, 63, 62, 63, 64, 65, 66, 67,
+ 68, 69, 71, 72, 73, 74, 75, 76, 77, 78, 79, 79, 80, 81, 82, 82, 83, 83,
+ 84, 84, 84, 84, 69, 68, 66, 65, 64, 64, 65, 66, 67, 68, 69, 71, 72, 73,
+ 74, 75, 76, 77, 78, 79, 79, 80, 81, 81, 82, 83, 83, 84, 84, 84, 84, 84,
+ 71, 69, 68, 67, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78,
+ 78, 79, 80, 81, 81, 82, 83, 83, 84, 84, 84, 84, 84, 84, 72, 71, 70, 68,
+ 67, 68, 69, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 78, 79, 80, 81, 81,
+ 82, 82, 83, 84, 84, 84, 85, 85, 85, 85, 72, 71, 70, 68, 67, 68, 69, 69,
+ 70, 71, 72, 73, 74, 75, 76, 77, 78, 78, 79, 80, 81, 81, 82, 82, 83, 84,
+ 84, 84, 85, 85, 85, 85, 72, 71, 70, 68, 67, 68, 69, 69, 70, 71, 72, 73,
+ 74, 75, 76, 77, 78, 78, 79, 80, 81, 81, 82, 82, 83, 84, 84, 84, 85, 85,
+ 85, 85, 72, 71, 70, 68, 67, 68, 69, 69, 70, 71, 72, 73, 74, 75, 76, 77,
+ 78, 78, 79, 80, 81, 81, 82, 82, 83, 84, 84, 84, 85, 85, 85, 85 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 88, 93, 107, 88, 99, 103, 112, 93, 103, 119, 127, 107, 112, 127,
+ 137,
+ /* Size 8 */
+ 64, 54, 83, 87, 90, 97, 104, 111, 54, 71, 84, 79, 81, 86, 92, 100, 83,
+ 84, 93, 91, 91, 94, 99, 104, 87, 79, 91, 98, 101, 103, 107, 111, 90, 81,
+ 91, 101, 107, 111, 115, 118, 97, 86, 94, 103, 111, 117, 121, 124, 104,
+ 92, 99, 107, 115, 121, 126, 129, 111, 100, 104, 111, 118, 124, 129, 133,
+ /* Size 16 */
+ 64, 59, 54, 66, 83, 85, 87, 88, 90, 93, 97, 100, 104, 107, 111, 111, 59,
+ 60, 62, 71, 84, 83, 83, 84, 85, 88, 91, 94, 98, 101, 105, 105, 54, 62,
+ 71, 77, 84, 82, 79, 80, 81, 83, 86, 89, 92, 96, 100, 100, 66, 71, 77,
+ 83, 88, 87, 85, 85, 85, 87, 90, 92, 95, 99, 102, 102, 83, 84, 84, 88,
+ 93, 92, 91, 91, 91, 92, 94, 96, 99, 101, 104, 104, 85, 83, 82, 87, 92,
+ 93, 95, 95, 96, 97, 98, 100, 103, 105, 108, 108, 87, 83, 79, 85, 91, 95,
+ 98, 100, 101, 102, 103, 105, 107, 109, 111, 111, 88, 84, 80, 85, 91, 95,
+ 100, 102, 104, 106, 107, 109, 111, 113, 115, 115, 90, 85, 81, 85, 91,
+ 96, 101, 104, 107, 109, 111, 113, 115, 116, 118, 118, 93, 88, 83, 87,
+ 92, 97, 102, 106, 109, 112, 114, 116, 118, 120, 121, 121, 97, 91, 86,
+ 90, 94, 98, 103, 107, 111, 114, 117, 119, 121, 123, 124, 124, 100, 94,
+ 89, 92, 96, 100, 105, 109, 113, 116, 119, 121, 123, 125, 127, 127, 104,
+ 98, 92, 95, 99, 103, 107, 111, 115, 118, 121, 123, 126, 128, 129, 129,
+ 107, 101, 96, 99, 101, 105, 109, 113, 116, 120, 123, 125, 128, 129, 131,
+ 131, 111, 105, 100, 102, 104, 108, 111, 115, 118, 121, 124, 127, 129,
+ 131, 133, 133, 111, 105, 100, 102, 104, 108, 111, 115, 118, 121, 124,
+ 127, 129, 131, 133, 133,
+ /* Size 32 */
+ 64, 61, 59, 57, 54, 60, 66, 74, 83, 84, 85, 86, 87, 87, 88, 89, 90, 92,
+ 93, 95, 97, 98, 100, 102, 104, 105, 107, 109, 111, 111, 111, 111, 61,
+ 60, 60, 59, 58, 63, 68, 75, 84, 84, 84, 84, 85, 85, 86, 87, 88, 89, 90,
+ 92, 94, 95, 97, 99, 101, 102, 104, 106, 108, 108, 108, 108, 59, 60, 60,
+ 61, 62, 66, 71, 77, 84, 84, 83, 83, 83, 83, 84, 84, 85, 86, 88, 89, 91,
+ 92, 94, 96, 98, 99, 101, 103, 105, 105, 105, 105, 57, 59, 61, 64, 66,
+ 70, 74, 79, 84, 83, 82, 82, 81, 81, 82, 82, 83, 84, 85, 87, 88, 90, 91,
+ 93, 95, 97, 98, 100, 102, 102, 102, 102, 54, 58, 62, 66, 71, 74, 77, 81,
+ 84, 83, 82, 80, 79, 79, 80, 80, 81, 82, 83, 84, 86, 87, 89, 91, 92, 94,
+ 96, 98, 100, 100, 100, 100, 60, 63, 66, 70, 74, 77, 80, 83, 86, 85, 84,
+ 83, 82, 82, 82, 83, 83, 84, 85, 86, 88, 89, 91, 92, 94, 95, 97, 99, 101,
+ 101, 101, 101, 66, 68, 71, 74, 77, 80, 83, 85, 88, 88, 87, 86, 85, 85,
+ 85, 85, 85, 86, 87, 89, 90, 91, 92, 94, 95, 97, 99, 100, 102, 102, 102,
+ 102, 74, 75, 77, 79, 81, 83, 85, 88, 91, 90, 89, 89, 88, 88, 88, 88, 88,
+ 89, 90, 91, 92, 93, 94, 96, 97, 98, 100, 102, 103, 103, 103, 103, 83,
+ 84, 84, 84, 84, 86, 88, 91, 93, 93, 92, 92, 91, 91, 91, 91, 91, 92, 92,
+ 93, 94, 95, 96, 97, 99, 100, 101, 103, 104, 104, 104, 104, 84, 84, 84,
+ 83, 83, 85, 88, 90, 93, 93, 93, 93, 93, 93, 93, 93, 93, 94, 95, 95, 96,
+ 97, 98, 99, 101, 102, 103, 105, 106, 106, 106, 106, 85, 84, 83, 82, 82,
+ 84, 87, 89, 92, 93, 93, 94, 95, 95, 95, 95, 96, 96, 97, 98, 98, 99, 100,
+ 102, 103, 104, 105, 106, 108, 108, 108, 108, 86, 84, 83, 82, 80, 83, 86,
+ 89, 92, 93, 94, 95, 96, 97, 97, 98, 98, 99, 100, 100, 101, 102, 103,
+ 104, 105, 106, 107, 108, 109, 109, 109, 109, 87, 85, 83, 81, 79, 82, 85,
+ 88, 91, 93, 95, 96, 98, 99, 100, 100, 101, 102, 102, 103, 103, 104, 105,
+ 106, 107, 108, 109, 110, 111, 111, 111, 111, 87, 85, 83, 81, 79, 82, 85,
+ 88, 91, 93, 95, 97, 99, 100, 101, 102, 102, 103, 104, 105, 105, 106,
+ 107, 108, 109, 110, 111, 112, 113, 113, 113, 113, 88, 86, 84, 82, 80,
+ 82, 85, 88, 91, 93, 95, 97, 100, 101, 102, 103, 104, 105, 106, 106, 107,
+ 108, 109, 110, 111, 112, 113, 114, 115, 115, 115, 115, 89, 87, 84, 82,
+ 80, 83, 85, 88, 91, 93, 95, 98, 100, 102, 103, 104, 106, 107, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 115, 116, 116, 116, 116, 90, 88, 85,
+ 83, 81, 83, 85, 88, 91, 93, 96, 98, 101, 102, 104, 106, 107, 108, 109,
+ 110, 111, 112, 113, 114, 115, 116, 116, 117, 118, 118, 118, 118, 92, 89,
+ 86, 84, 82, 84, 86, 89, 92, 94, 96, 99, 102, 103, 105, 107, 108, 109,
+ 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 120, 120, 120,
+ 93, 90, 88, 85, 83, 85, 87, 90, 92, 95, 97, 100, 102, 104, 106, 107,
+ 109, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 120, 121, 121,
+ 121, 121, 95, 92, 89, 87, 84, 86, 89, 91, 93, 95, 98, 100, 103, 105,
+ 106, 108, 110, 112, 113, 114, 116, 116, 117, 118, 119, 120, 121, 122,
+ 123, 123, 123, 123, 97, 94, 91, 88, 86, 88, 90, 92, 94, 96, 98, 101,
+ 103, 105, 107, 109, 111, 113, 114, 116, 117, 118, 119, 120, 121, 122,
+ 123, 124, 124, 124, 124, 124, 98, 95, 92, 90, 87, 89, 91, 93, 95, 97,
+ 99, 102, 104, 106, 108, 110, 112, 114, 115, 116, 118, 119, 120, 121,
+ 122, 123, 124, 125, 126, 126, 126, 126, 100, 97, 94, 91, 89, 91, 92, 94,
+ 96, 98, 100, 103, 105, 107, 109, 111, 113, 114, 116, 117, 119, 120, 121,
+ 122, 123, 124, 125, 126, 127, 127, 127, 127, 102, 99, 96, 93, 91, 92,
+ 94, 96, 97, 99, 102, 104, 106, 108, 110, 112, 114, 115, 117, 118, 120,
+ 121, 122, 123, 125, 125, 126, 127, 128, 128, 128, 128, 104, 101, 98, 95,
+ 92, 94, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115, 116, 118,
+ 119, 121, 122, 123, 125, 126, 127, 128, 128, 129, 129, 129, 129, 105,
+ 102, 99, 97, 94, 95, 97, 98, 100, 102, 104, 106, 108, 110, 112, 114,
+ 116, 117, 119, 120, 122, 123, 124, 125, 127, 128, 128, 129, 130, 130,
+ 130, 130, 107, 104, 101, 98, 96, 97, 99, 100, 101, 103, 105, 107, 109,
+ 111, 113, 115, 116, 118, 120, 121, 123, 124, 125, 126, 128, 128, 129,
+ 130, 131, 131, 131, 131, 109, 106, 103, 100, 98, 99, 100, 102, 103, 105,
+ 106, 108, 110, 112, 114, 115, 117, 119, 120, 122, 124, 125, 126, 127,
+ 128, 129, 130, 131, 132, 132, 132, 132, 111, 108, 105, 102, 100, 101,
+ 102, 103, 104, 106, 108, 109, 111, 113, 115, 116, 118, 120, 121, 123,
+ 124, 126, 127, 128, 129, 130, 131, 132, 133, 133, 133, 133, 111, 108,
+ 105, 102, 100, 101, 102, 103, 104, 106, 108, 109, 111, 113, 115, 116,
+ 118, 120, 121, 123, 124, 126, 127, 128, 129, 130, 131, 132, 133, 133,
+ 133, 133, 111, 108, 105, 102, 100, 101, 102, 103, 104, 106, 108, 109,
+ 111, 113, 115, 116, 118, 120, 121, 123, 124, 126, 127, 128, 129, 130,
+ 131, 132, 133, 133, 133, 133, 111, 108, 105, 102, 100, 101, 102, 103,
+ 104, 106, 108, 109, 111, 113, 115, 116, 118, 120, 121, 123, 124, 126,
+ 127, 128, 129, 130, 131, 132, 133, 133, 133, 133 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 37, 52, 55, 64, 52, 59, 62, 67, 55, 62, 72, 77, 64, 67, 77, 84,
+ /* Size 8 */
+ 40, 33, 52, 54, 57, 61, 66, 71, 33, 44, 53, 49, 50, 54, 58, 63, 52, 53,
+ 59, 57, 57, 59, 63, 66, 54, 49, 57, 62, 64, 66, 68, 71, 57, 50, 57, 64,
+ 68, 71, 74, 76, 61, 54, 59, 66, 71, 75, 78, 80, 66, 58, 63, 68, 74, 78,
+ 81, 84, 71, 63, 66, 71, 76, 80, 84, 86,
+ /* Size 16 */
+ 39, 36, 33, 40, 51, 52, 53, 55, 56, 58, 60, 62, 65, 67, 70, 70, 36, 37,
+ 38, 43, 52, 51, 51, 52, 53, 54, 56, 58, 61, 63, 66, 66, 33, 38, 44, 47,
+ 52, 50, 49, 49, 50, 51, 53, 55, 57, 60, 62, 62, 40, 43, 47, 51, 55, 53,
+ 52, 52, 53, 54, 55, 57, 59, 61, 64, 64, 51, 52, 52, 55, 58, 57, 56, 56,
+ 56, 57, 58, 60, 61, 63, 65, 65, 52, 51, 50, 53, 57, 58, 59, 59, 59, 60,
+ 61, 63, 64, 66, 68, 68, 53, 51, 49, 52, 56, 59, 61, 62, 63, 64, 65, 66,
+ 67, 68, 70, 70, 55, 52, 49, 52, 56, 59, 62, 64, 65, 66, 67, 68, 70, 71,
+ 72, 72, 56, 53, 50, 53, 56, 59, 63, 65, 67, 69, 70, 71, 72, 73, 75, 75,
+ 58, 54, 51, 54, 57, 60, 64, 66, 69, 70, 72, 73, 74, 76, 77, 77, 60, 56,
+ 53, 55, 58, 61, 65, 67, 70, 72, 74, 75, 77, 78, 79, 79, 62, 58, 55, 57,
+ 60, 63, 66, 68, 71, 73, 75, 77, 78, 79, 81, 81, 65, 61, 57, 59, 61, 64,
+ 67, 70, 72, 74, 77, 78, 80, 81, 82, 82, 67, 63, 60, 61, 63, 66, 68, 71,
+ 73, 76, 78, 79, 81, 82, 84, 84, 70, 66, 62, 64, 65, 68, 70, 72, 75, 77,
+ 79, 81, 82, 84, 85, 85, 70, 66, 62, 64, 65, 68, 70, 72, 75, 77, 79, 81,
+ 82, 84, 85, 85,
+ /* Size 32 */
+ 39, 37, 35, 34, 33, 36, 40, 45, 51, 51, 52, 52, 53, 54, 54, 55, 55, 56,
+ 57, 58, 59, 61, 62, 63, 64, 65, 67, 68, 69, 69, 69, 69, 37, 36, 36, 35,
+ 35, 38, 41, 46, 51, 51, 51, 52, 52, 52, 53, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 66, 67, 67, 67, 67, 35, 36, 36, 37, 37, 40, 43, 47,
+ 51, 51, 51, 51, 50, 51, 51, 52, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+ 63, 64, 65, 65, 65, 65, 34, 35, 37, 38, 40, 42, 45, 48, 51, 51, 50, 50,
+ 49, 50, 50, 50, 51, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, 62, 63, 63,
+ 63, 63, 33, 35, 37, 40, 43, 45, 47, 49, 52, 51, 50, 49, 48, 48, 49, 49,
+ 49, 50, 51, 52, 52, 53, 55, 56, 57, 58, 59, 60, 62, 62, 62, 62, 36, 38,
+ 40, 42, 45, 47, 49, 51, 53, 52, 51, 51, 50, 50, 50, 50, 51, 51, 52, 53,
+ 54, 55, 56, 57, 58, 59, 60, 61, 62, 62, 62, 62, 40, 41, 43, 45, 47, 49,
+ 50, 52, 54, 54, 53, 52, 52, 52, 52, 52, 52, 53, 54, 54, 55, 56, 57, 58,
+ 59, 60, 61, 62, 63, 63, 63, 63, 45, 46, 47, 48, 49, 51, 52, 54, 56, 55,
+ 55, 54, 54, 54, 54, 54, 54, 55, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 64, 64, 64, 51, 51, 51, 51, 52, 53, 54, 56, 57, 57, 57, 56, 56, 56,
+ 56, 56, 56, 56, 57, 57, 58, 59, 59, 60, 61, 62, 63, 64, 65, 65, 65, 65,
+ 51, 51, 51, 51, 51, 52, 54, 55, 57, 57, 57, 57, 57, 57, 57, 57, 57, 58,
+ 58, 59, 59, 60, 61, 61, 62, 63, 64, 65, 66, 66, 66, 66, 52, 51, 51, 50,
+ 50, 51, 53, 55, 57, 57, 57, 58, 58, 58, 59, 59, 59, 59, 60, 60, 61, 61,
+ 62, 63, 64, 64, 65, 66, 67, 67, 67, 67, 52, 52, 51, 50, 49, 51, 52, 54,
+ 56, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62, 62, 63, 64, 64, 65, 66,
+ 66, 67, 68, 68, 68, 68, 53, 52, 50, 49, 48, 50, 52, 54, 56, 57, 58, 59,
+ 61, 61, 61, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 67, 68, 69, 69, 69,
+ 69, 69, 54, 52, 51, 50, 48, 50, 52, 54, 56, 57, 58, 60, 61, 62, 62, 63,
+ 63, 64, 64, 65, 65, 66, 66, 67, 68, 68, 69, 70, 70, 70, 70, 70, 54, 53,
+ 51, 50, 49, 50, 52, 54, 56, 57, 59, 60, 61, 62, 63, 64, 64, 65, 66, 66,
+ 67, 67, 68, 68, 69, 70, 70, 71, 72, 72, 72, 72, 55, 53, 52, 50, 49, 50,
+ 52, 54, 56, 57, 59, 60, 62, 63, 64, 65, 66, 66, 67, 67, 68, 69, 69, 70,
+ 70, 71, 71, 72, 73, 73, 73, 73, 55, 54, 52, 51, 49, 51, 52, 54, 56, 57,
+ 59, 61, 62, 63, 64, 66, 67, 67, 68, 69, 69, 70, 70, 71, 72, 72, 73, 73,
+ 74, 74, 74, 74, 56, 55, 53, 51, 50, 51, 53, 55, 56, 58, 59, 61, 63, 64,
+ 65, 66, 67, 68, 69, 70, 70, 71, 71, 72, 73, 73, 74, 74, 75, 75, 75, 75,
+ 57, 56, 54, 52, 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 66, 67, 68, 69,
+ 70, 70, 71, 72, 72, 73, 74, 74, 75, 75, 76, 76, 76, 76, 58, 57, 55, 53,
+ 52, 53, 54, 56, 57, 59, 60, 62, 64, 65, 66, 67, 69, 70, 70, 71, 72, 73,
+ 73, 74, 75, 75, 76, 77, 77, 77, 77, 77, 59, 58, 56, 54, 52, 54, 55, 56,
+ 58, 59, 61, 62, 64, 65, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 76,
+ 77, 78, 78, 78, 78, 78, 61, 59, 57, 55, 53, 55, 56, 57, 59, 60, 61, 63,
+ 65, 66, 67, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 79, 79,
+ 79, 79, 62, 60, 58, 56, 55, 56, 57, 58, 59, 61, 62, 64, 65, 66, 68, 69,
+ 70, 71, 72, 73, 75, 75, 76, 77, 77, 78, 79, 79, 80, 80, 80, 80, 63, 61,
+ 59, 57, 56, 57, 58, 59, 60, 61, 63, 64, 66, 67, 68, 70, 71, 72, 73, 74,
+ 75, 76, 77, 77, 78, 79, 79, 80, 81, 81, 81, 81, 64, 62, 60, 58, 57, 58,
+ 59, 60, 61, 62, 64, 65, 66, 68, 69, 70, 72, 73, 74, 75, 76, 77, 77, 78,
+ 79, 80, 80, 81, 82, 82, 82, 82, 65, 63, 61, 60, 58, 59, 60, 61, 62, 63,
+ 64, 66, 67, 68, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 80, 81, 82,
+ 82, 82, 82, 82, 67, 64, 63, 61, 59, 60, 61, 62, 63, 64, 65, 66, 68, 69,
+ 70, 71, 73, 74, 75, 76, 77, 78, 79, 79, 80, 81, 82, 82, 83, 83, 83, 83,
+ 68, 66, 64, 62, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74,
+ 75, 77, 78, 78, 79, 80, 81, 82, 82, 83, 84, 84, 84, 84, 69, 67, 65, 63,
+ 62, 62, 63, 64, 65, 66, 67, 68, 69, 70, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 82, 83, 84, 84, 84, 84, 84, 69, 67, 65, 63, 62, 62, 63, 64,
+ 65, 66, 67, 68, 69, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 82,
+ 83, 84, 84, 84, 84, 84, 69, 67, 65, 63, 62, 62, 63, 64, 65, 66, 67, 68,
+ 69, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 82, 83, 84, 84, 84,
+ 84, 84, 69, 67, 65, 63, 62, 62, 63, 64, 65, 66, 67, 68, 69, 70, 72, 73,
+ 74, 75, 76, 77, 78, 79, 80, 81, 82, 82, 83, 84, 84, 84, 84, 84 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 68, 92, 113, 68, 87, 103, 117, 92, 103, 119, 128, 113, 117, 128,
+ 134,
+ /* Size 8 */
+ 64, 53, 56, 67, 79, 91, 100, 107, 53, 58, 56, 63, 73, 84, 93, 101, 56,
+ 56, 70, 77, 83, 91, 98, 105, 67, 63, 77, 87, 94, 99, 104, 109, 79, 73,
+ 83, 94, 101, 106, 110, 113, 91, 84, 91, 99, 106, 110, 114, 116, 100, 93,
+ 98, 104, 110, 114, 117, 119, 107, 101, 105, 109, 113, 116, 119, 121,
+ /* Size 16 */
+ 64, 58, 53, 54, 56, 61, 67, 72, 79, 84, 91, 95, 100, 103, 107, 107, 58,
+ 57, 55, 56, 56, 60, 65, 70, 76, 81, 87, 91, 96, 100, 104, 104, 53, 55,
+ 58, 57, 56, 59, 63, 67, 73, 78, 84, 88, 93, 97, 101, 101, 54, 56, 57,
+ 60, 63, 66, 69, 73, 78, 82, 87, 91, 96, 99, 103, 103, 56, 56, 56, 63,
+ 70, 73, 77, 80, 83, 87, 91, 94, 98, 101, 105, 105, 61, 60, 59, 66, 73,
+ 77, 81, 85, 88, 91, 95, 98, 101, 104, 107, 107, 67, 65, 63, 69, 77, 81,
+ 87, 90, 94, 96, 99, 102, 104, 106, 109, 109, 72, 70, 67, 73, 80, 85, 90,
+ 93, 97, 100, 102, 104, 107, 109, 111, 111, 79, 76, 73, 78, 83, 88, 94,
+ 97, 101, 103, 106, 108, 110, 111, 113, 113, 84, 81, 78, 82, 87, 91, 96,
+ 100, 103, 105, 108, 110, 112, 113, 115, 115, 91, 87, 84, 87, 91, 95, 99,
+ 102, 106, 108, 110, 112, 114, 115, 116, 116, 95, 91, 88, 91, 94, 98,
+ 102, 104, 108, 110, 112, 114, 115, 116, 118, 118, 100, 96, 93, 96, 98,
+ 101, 104, 107, 110, 112, 114, 115, 117, 118, 119, 119, 103, 100, 97, 99,
+ 101, 104, 106, 109, 111, 113, 115, 116, 118, 119, 120, 120, 107, 104,
+ 101, 103, 105, 107, 109, 111, 113, 115, 116, 118, 119, 120, 121, 121,
+ 107, 104, 101, 103, 105, 107, 109, 111, 113, 115, 116, 118, 119, 120,
+ 121, 121,
+ /* Size 32 */
+ 64, 61, 58, 55, 53, 54, 54, 55, 56, 58, 61, 64, 67, 70, 72, 76, 79, 82,
+ 84, 87, 91, 93, 95, 97, 100, 102, 103, 105, 107, 107, 107, 107, 61, 59,
+ 57, 56, 54, 55, 55, 56, 56, 58, 60, 63, 66, 68, 71, 74, 78, 80, 83, 86,
+ 89, 91, 93, 96, 98, 100, 102, 104, 106, 106, 106, 106, 58, 57, 57, 56,
+ 55, 56, 56, 56, 56, 58, 60, 62, 65, 67, 70, 73, 76, 78, 81, 84, 87, 89,
+ 91, 94, 96, 98, 100, 102, 104, 104, 104, 104, 55, 56, 56, 56, 57, 57,
+ 57, 56, 56, 58, 60, 62, 64, 66, 69, 71, 74, 77, 79, 82, 85, 87, 90, 92,
+ 95, 97, 99, 101, 103, 103, 103, 103, 53, 54, 55, 57, 58, 58, 57, 57, 56,
+ 58, 59, 61, 63, 65, 67, 70, 73, 75, 78, 81, 84, 86, 88, 91, 93, 95, 97,
+ 99, 101, 101, 101, 101, 54, 55, 56, 57, 58, 58, 59, 59, 59, 61, 62, 64,
+ 66, 68, 70, 73, 75, 77, 80, 82, 85, 87, 90, 92, 94, 96, 98, 100, 102,
+ 102, 102, 102, 54, 55, 56, 57, 57, 59, 60, 61, 63, 64, 66, 67, 69, 71,
+ 73, 75, 78, 80, 82, 84, 87, 89, 91, 93, 96, 97, 99, 101, 103, 103, 103,
+ 103, 55, 56, 56, 56, 57, 59, 61, 63, 66, 68, 69, 71, 73, 74, 76, 78, 80,
+ 82, 84, 87, 89, 91, 93, 95, 97, 99, 100, 102, 104, 104, 104, 104, 56,
+ 56, 56, 56, 56, 59, 63, 66, 70, 72, 73, 75, 77, 78, 80, 81, 83, 85, 87,
+ 89, 91, 93, 94, 96, 98, 100, 101, 103, 105, 105, 105, 105, 58, 58, 58,
+ 58, 58, 61, 64, 68, 72, 73, 75, 77, 79, 80, 82, 84, 86, 87, 89, 91, 93,
+ 94, 96, 98, 100, 101, 103, 104, 106, 106, 106, 106, 61, 60, 60, 60, 59,
+ 62, 66, 69, 73, 75, 77, 79, 81, 83, 85, 86, 88, 90, 91, 93, 95, 96, 98,
+ 99, 101, 102, 104, 105, 107, 107, 107, 107, 64, 63, 62, 62, 61, 64, 67,
+ 71, 75, 77, 79, 82, 84, 86, 87, 89, 91, 92, 94, 95, 97, 98, 100, 101,
+ 103, 104, 105, 106, 108, 108, 108, 108, 67, 66, 65, 64, 63, 66, 69, 73,
+ 77, 79, 81, 84, 87, 88, 90, 92, 94, 95, 96, 98, 99, 100, 102, 103, 104,
+ 105, 106, 108, 109, 109, 109, 109, 70, 68, 67, 66, 65, 68, 71, 74, 78,
+ 80, 83, 86, 88, 90, 92, 93, 95, 97, 98, 99, 101, 102, 103, 104, 106,
+ 107, 108, 109, 110, 110, 110, 110, 72, 71, 70, 69, 67, 70, 73, 76, 80,
+ 82, 85, 87, 90, 92, 93, 95, 97, 98, 100, 101, 102, 103, 104, 106, 107,
+ 108, 109, 110, 111, 111, 111, 111, 76, 74, 73, 71, 70, 73, 75, 78, 81,
+ 84, 86, 89, 92, 93, 95, 97, 99, 100, 101, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 112, 112, 112, 79, 78, 76, 74, 73, 75, 78, 80, 83,
+ 86, 88, 91, 94, 95, 97, 99, 101, 102, 103, 104, 106, 107, 108, 109, 110,
+ 110, 111, 112, 113, 113, 113, 113, 82, 80, 78, 77, 75, 77, 80, 82, 85,
+ 87, 90, 92, 95, 97, 98, 100, 102, 103, 104, 106, 107, 108, 109, 110,
+ 111, 111, 112, 113, 114, 114, 114, 114, 84, 83, 81, 79, 78, 80, 82, 84,
+ 87, 89, 91, 94, 96, 98, 100, 101, 103, 104, 105, 107, 108, 109, 110,
+ 111, 112, 112, 113, 114, 115, 115, 115, 115, 87, 86, 84, 82, 81, 82, 84,
+ 87, 89, 91, 93, 95, 98, 99, 101, 103, 104, 106, 107, 108, 109, 110, 111,
+ 112, 113, 113, 114, 115, 116, 116, 116, 116, 91, 89, 87, 85, 84, 85, 87,
+ 89, 91, 93, 95, 97, 99, 101, 102, 104, 106, 107, 108, 109, 110, 111,
+ 112, 113, 114, 114, 115, 116, 116, 116, 116, 116, 93, 91, 89, 87, 86,
+ 87, 89, 91, 93, 94, 96, 98, 100, 102, 103, 105, 107, 108, 109, 110, 111,
+ 112, 113, 114, 114, 115, 116, 116, 117, 117, 117, 117, 95, 93, 91, 90,
+ 88, 90, 91, 93, 94, 96, 98, 100, 102, 103, 104, 106, 108, 109, 110, 111,
+ 112, 113, 114, 114, 115, 116, 116, 117, 118, 118, 118, 118, 97, 96, 94,
+ 92, 91, 92, 93, 95, 96, 98, 99, 101, 103, 104, 106, 107, 109, 110, 111,
+ 112, 113, 114, 114, 115, 116, 117, 117, 118, 118, 118, 118, 118, 100,
+ 98, 96, 95, 93, 94, 96, 97, 98, 100, 101, 103, 104, 106, 107, 108, 110,
+ 111, 112, 113, 114, 114, 115, 116, 117, 117, 118, 118, 119, 119, 119,
+ 119, 102, 100, 98, 97, 95, 96, 97, 99, 100, 101, 102, 104, 105, 107,
+ 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 117, 118, 118, 119,
+ 119, 119, 119, 119, 103, 102, 100, 99, 97, 98, 99, 100, 101, 103, 104,
+ 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 116, 117, 118,
+ 118, 119, 119, 120, 120, 120, 120, 105, 104, 102, 101, 99, 100, 101,
+ 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116,
+ 116, 117, 118, 118, 119, 119, 120, 120, 120, 120, 120, 107, 106, 104,
+ 103, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
+ 114, 115, 116, 116, 117, 118, 118, 119, 119, 120, 120, 121, 121, 121,
+ 121, 107, 106, 104, 103, 101, 102, 103, 104, 105, 106, 107, 108, 109,
+ 110, 111, 112, 113, 114, 115, 116, 116, 117, 118, 118, 119, 119, 120,
+ 120, 121, 121, 121, 121, 107, 106, 104, 103, 101, 102, 103, 104, 105,
+ 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 116, 117, 118,
+ 118, 119, 119, 120, 120, 121, 121, 121, 121, 107, 106, 104, 103, 101,
+ 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
+ 116, 116, 117, 118, 118, 119, 119, 120, 120, 121, 121, 121, 121 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 38, 40, 55, 69, 40, 52, 62, 72, 55, 62, 73, 79, 69, 72, 79, 83,
+ /* Size 8 */
+ 43, 35, 37, 45, 53, 62, 68, 74, 35, 39, 37, 42, 49, 56, 64, 70, 37, 37,
+ 47, 52, 56, 62, 67, 72, 45, 42, 52, 59, 64, 68, 72, 75, 53, 49, 56, 64,
+ 69, 73, 76, 78, 62, 56, 62, 68, 73, 76, 79, 81, 68, 64, 67, 72, 76, 79,
+ 81, 83, 74, 70, 72, 75, 78, 81, 83, 84,
+ /* Size 16 */
+ 42, 38, 34, 35, 36, 40, 44, 48, 52, 56, 60, 63, 67, 70, 72, 72, 38, 37,
+ 36, 36, 36, 39, 42, 46, 50, 54, 58, 61, 65, 67, 70, 70, 34, 36, 38, 37,
+ 37, 39, 41, 44, 48, 51, 55, 59, 62, 65, 68, 68, 35, 36, 37, 39, 41, 43,
+ 45, 48, 51, 54, 58, 61, 64, 67, 69, 69, 36, 36, 37, 41, 46, 48, 50, 53,
+ 55, 58, 61, 63, 66, 68, 70, 70, 40, 39, 39, 43, 48, 51, 54, 56, 59, 61,
+ 63, 66, 68, 70, 72, 72, 44, 42, 41, 45, 50, 54, 58, 60, 62, 64, 66, 68,
+ 70, 72, 74, 74, 48, 46, 44, 48, 53, 56, 60, 62, 65, 67, 69, 70, 72, 74,
+ 75, 75, 52, 50, 48, 51, 55, 59, 62, 65, 68, 69, 71, 73, 74, 75, 77, 77,
+ 56, 54, 51, 54, 58, 61, 64, 67, 69, 71, 73, 74, 76, 77, 78, 78, 60, 58,
+ 55, 58, 61, 63, 66, 69, 71, 73, 75, 76, 77, 78, 79, 79, 63, 61, 59, 61,
+ 63, 66, 68, 70, 73, 74, 76, 77, 78, 79, 80, 80, 67, 65, 62, 64, 66, 68,
+ 70, 72, 74, 76, 77, 78, 79, 80, 81, 81, 70, 67, 65, 67, 68, 70, 72, 74,
+ 75, 77, 78, 79, 80, 81, 82, 82, 72, 70, 68, 69, 70, 72, 74, 75, 77, 78,
+ 79, 80, 81, 82, 82, 82, 72, 70, 68, 69, 70, 72, 74, 75, 77, 78, 79, 80,
+ 81, 82, 82, 82,
+ /* Size 32 */
+ 41, 39, 37, 35, 34, 34, 35, 35, 36, 37, 39, 41, 43, 45, 47, 49, 52, 54,
+ 55, 57, 60, 61, 63, 65, 66, 68, 69, 70, 72, 72, 72, 72, 39, 38, 37, 36,
+ 35, 35, 35, 36, 36, 37, 39, 41, 43, 44, 46, 48, 51, 52, 54, 56, 58, 60,
+ 62, 63, 65, 66, 68, 69, 70, 70, 70, 70, 37, 37, 36, 36, 36, 36, 36, 36,
+ 36, 37, 39, 40, 42, 43, 45, 47, 49, 51, 53, 55, 57, 59, 60, 62, 64, 65,
+ 67, 68, 69, 69, 69, 69, 35, 36, 36, 36, 36, 36, 36, 36, 36, 37, 38, 40,
+ 41, 43, 44, 46, 48, 50, 52, 54, 56, 57, 59, 61, 63, 64, 65, 67, 68, 68,
+ 68, 68, 34, 35, 36, 36, 37, 37, 37, 37, 36, 37, 38, 39, 40, 42, 44, 45,
+ 47, 49, 51, 53, 55, 56, 58, 60, 62, 63, 64, 66, 67, 67, 67, 67, 34, 35,
+ 36, 36, 37, 37, 38, 38, 38, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 54,
+ 56, 57, 59, 61, 62, 64, 65, 66, 68, 68, 68, 68, 35, 35, 36, 36, 37, 38,
+ 38, 39, 40, 41, 42, 44, 45, 46, 48, 49, 51, 52, 54, 55, 57, 59, 60, 62,
+ 63, 65, 66, 67, 69, 69, 69, 69, 35, 36, 36, 36, 37, 38, 39, 41, 43, 44,
+ 45, 46, 47, 48, 50, 51, 53, 54, 55, 57, 59, 60, 61, 63, 64, 65, 67, 68,
+ 69, 69, 69, 69, 36, 36, 36, 36, 36, 38, 40, 43, 45, 46, 48, 49, 50, 51,
+ 52, 53, 55, 56, 57, 58, 60, 61, 62, 64, 65, 66, 67, 69, 70, 70, 70, 70,
+ 37, 37, 37, 37, 37, 39, 41, 44, 46, 48, 49, 50, 52, 53, 54, 55, 56, 57,
+ 59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 71, 71, 71, 71, 39, 39, 39, 38,
+ 38, 40, 42, 45, 48, 49, 50, 52, 53, 54, 56, 57, 58, 59, 60, 61, 63, 64,
+ 65, 66, 67, 68, 69, 70, 71, 71, 71, 71, 41, 41, 40, 40, 39, 41, 44, 46,
+ 49, 50, 52, 53, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
+ 70, 71, 72, 72, 72, 72, 43, 43, 42, 41, 40, 42, 45, 47, 50, 52, 53, 55,
+ 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 68, 69, 70, 71, 72, 73, 73,
+ 73, 73, 45, 44, 43, 43, 42, 44, 46, 48, 51, 53, 54, 56, 58, 59, 61, 62,
+ 63, 64, 65, 66, 67, 68, 69, 69, 70, 71, 72, 73, 74, 74, 74, 74, 47, 46,
+ 45, 44, 44, 45, 48, 50, 52, 54, 56, 57, 59, 61, 62, 63, 64, 65, 66, 67,
+ 68, 69, 70, 71, 71, 72, 73, 74, 74, 74, 74, 74, 49, 48, 47, 46, 45, 47,
+ 49, 51, 53, 55, 57, 59, 61, 62, 63, 64, 66, 66, 67, 68, 69, 70, 71, 72,
+ 72, 73, 74, 74, 75, 75, 75, 75, 52, 51, 49, 48, 47, 49, 51, 53, 55, 56,
+ 58, 60, 62, 63, 64, 66, 67, 68, 69, 70, 71, 71, 72, 73, 73, 74, 75, 75,
+ 76, 76, 76, 76, 54, 52, 51, 50, 49, 51, 52, 54, 56, 57, 59, 61, 63, 64,
+ 65, 66, 68, 69, 70, 70, 71, 72, 73, 73, 74, 75, 75, 76, 76, 76, 76, 76,
+ 55, 54, 53, 52, 51, 52, 54, 55, 57, 59, 60, 62, 64, 65, 66, 67, 69, 70,
+ 70, 71, 72, 73, 73, 74, 75, 75, 76, 77, 77, 77, 77, 77, 57, 56, 55, 54,
+ 53, 54, 55, 57, 58, 60, 61, 63, 65, 66, 67, 68, 70, 70, 71, 72, 73, 74,
+ 74, 75, 76, 76, 77, 77, 78, 78, 78, 78, 60, 58, 57, 56, 55, 56, 57, 59,
+ 60, 61, 63, 64, 66, 67, 68, 69, 71, 71, 72, 73, 74, 75, 75, 76, 76, 77,
+ 77, 78, 78, 78, 78, 78, 61, 60, 59, 57, 56, 57, 59, 60, 61, 62, 64, 65,
+ 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
+ 79, 79, 63, 62, 60, 59, 58, 59, 60, 61, 62, 64, 65, 66, 68, 69, 70, 71,
+ 72, 73, 73, 74, 75, 76, 76, 77, 77, 78, 78, 79, 79, 79, 79, 79, 65, 63,
+ 62, 61, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73, 73, 74, 75,
+ 76, 76, 77, 77, 78, 78, 79, 79, 80, 80, 80, 80, 66, 65, 64, 63, 62, 62,
+ 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 76, 77, 77, 78,
+ 79, 79, 79, 80, 80, 80, 80, 80, 68, 66, 65, 64, 63, 64, 65, 65, 66, 67,
+ 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 79, 79, 80, 80,
+ 81, 81, 81, 81, 69, 68, 67, 65, 64, 65, 66, 67, 67, 68, 69, 70, 71, 72,
+ 73, 74, 75, 75, 76, 77, 77, 78, 78, 79, 79, 80, 80, 81, 81, 81, 81, 81,
+ 70, 69, 68, 67, 66, 66, 67, 68, 69, 69, 70, 71, 72, 73, 74, 74, 75, 76,
+ 77, 77, 78, 78, 79, 79, 80, 80, 81, 81, 81, 81, 81, 81, 72, 70, 69, 68,
+ 67, 68, 69, 69, 70, 71, 71, 72, 73, 74, 74, 75, 76, 76, 77, 78, 78, 79,
+ 79, 80, 80, 81, 81, 81, 82, 82, 82, 82, 72, 70, 69, 68, 67, 68, 69, 69,
+ 70, 71, 71, 72, 73, 74, 74, 75, 76, 76, 77, 78, 78, 79, 79, 80, 80, 81,
+ 81, 81, 82, 82, 82, 82, 72, 70, 69, 68, 67, 68, 69, 69, 70, 71, 71, 72,
+ 73, 74, 74, 75, 76, 76, 77, 78, 78, 79, 79, 80, 80, 81, 81, 81, 82, 82,
+ 82, 82, 72, 70, 69, 68, 67, 68, 69, 69, 70, 71, 71, 72, 73, 74, 74, 75,
+ 76, 76, 77, 78, 78, 79, 79, 80, 80, 81, 81, 81, 82, 82, 82, 82 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 86, 90, 101, 86, 95, 98, 105, 90, 98, 110, 117, 101, 105, 117, 125,
+ /* Size 8 */
+ 64, 55, 81, 84, 87, 92, 98, 104, 55, 71, 82, 77, 79, 83, 89, 95, 81, 82,
+ 89, 88, 88, 90, 94, 99, 84, 77, 88, 94, 96, 98, 101, 104, 87, 79, 88,
+ 96, 101, 104, 107, 110, 92, 83, 90, 98, 104, 109, 112, 115, 98, 89, 94,
+ 101, 107, 112, 116, 118, 104, 95, 99, 104, 110, 115, 118, 121,
+ /* Size 16 */
+ 64, 59, 55, 66, 81, 82, 84, 85, 87, 90, 92, 95, 98, 101, 104, 104, 59,
+ 61, 62, 70, 82, 81, 80, 82, 83, 85, 87, 90, 93, 96, 99, 99, 55, 62, 71,
+ 76, 82, 80, 77, 78, 79, 81, 83, 86, 89, 92, 95, 95, 66, 70, 76, 80, 86,
+ 84, 82, 83, 83, 85, 86, 89, 91, 94, 97, 97, 81, 82, 82, 86, 89, 89, 88,
+ 88, 88, 89, 90, 92, 94, 96, 99, 99, 82, 81, 80, 84, 89, 90, 91, 91, 92,
+ 93, 94, 96, 97, 99, 101, 101, 84, 80, 77, 82, 88, 91, 94, 95, 96, 97,
+ 98, 99, 101, 102, 104, 104, 85, 82, 78, 83, 88, 91, 95, 97, 98, 100,
+ 101, 102, 104, 105, 107, 107, 87, 83, 79, 83, 88, 92, 96, 98, 101, 103,
+ 104, 106, 107, 108, 110, 110, 90, 85, 81, 85, 89, 93, 97, 100, 103, 105,
+ 107, 108, 109, 111, 112, 112, 92, 87, 83, 86, 90, 94, 98, 101, 104, 107,
+ 109, 110, 112, 113, 115, 115, 95, 90, 86, 89, 92, 96, 99, 102, 106, 108,
+ 110, 112, 114, 115, 116, 116, 98, 93, 89, 91, 94, 97, 101, 104, 107,
+ 109, 112, 114, 116, 117, 118, 118, 101, 96, 92, 94, 96, 99, 102, 105,
+ 108, 111, 113, 115, 117, 118, 120, 120, 104, 99, 95, 97, 99, 101, 104,
+ 107, 110, 112, 115, 116, 118, 120, 121, 121, 104, 99, 95, 97, 99, 101,
+ 104, 107, 110, 112, 115, 116, 118, 120, 121, 121,
+ /* Size 32 */
+ 64, 62, 59, 57, 55, 60, 66, 73, 81, 82, 82, 83, 84, 85, 85, 86, 87, 88,
+ 90, 91, 92, 94, 95, 97, 98, 100, 101, 102, 104, 104, 104, 104, 62, 61,
+ 60, 59, 58, 63, 68, 74, 81, 82, 82, 82, 82, 83, 83, 84, 85, 86, 87, 88,
+ 90, 91, 93, 94, 96, 97, 98, 100, 101, 101, 101, 101, 59, 60, 61, 61, 62,
+ 66, 70, 76, 82, 81, 81, 81, 80, 81, 82, 82, 83, 84, 85, 86, 87, 89, 90,
+ 92, 93, 95, 96, 98, 99, 99, 99, 99, 57, 59, 61, 64, 66, 69, 73, 77, 82,
+ 81, 80, 80, 79, 79, 80, 80, 81, 82, 83, 84, 85, 87, 88, 89, 91, 92, 94,
+ 95, 97, 97, 97, 97, 55, 58, 62, 66, 71, 73, 76, 79, 82, 81, 80, 78, 77,
+ 78, 78, 78, 79, 80, 81, 82, 83, 84, 86, 87, 89, 90, 92, 93, 95, 95, 95,
+ 95, 60, 63, 66, 69, 73, 76, 78, 81, 84, 83, 82, 81, 80, 80, 80, 81, 81,
+ 82, 83, 84, 85, 86, 87, 89, 90, 91, 93, 94, 96, 96, 96, 96, 66, 68, 70,
+ 73, 76, 78, 80, 83, 86, 85, 84, 83, 82, 82, 83, 83, 83, 84, 85, 86, 86,
+ 88, 89, 90, 91, 93, 94, 95, 97, 97, 97, 97, 73, 74, 76, 77, 79, 81, 83,
+ 85, 87, 87, 86, 86, 85, 85, 85, 85, 85, 86, 87, 87, 88, 89, 90, 92, 93,
+ 94, 95, 96, 98, 98, 98, 98, 81, 81, 82, 82, 82, 84, 86, 87, 89, 89, 89,
+ 88, 88, 88, 88, 88, 88, 88, 89, 89, 90, 91, 92, 93, 94, 95, 96, 98, 99,
+ 99, 99, 99, 82, 82, 81, 81, 81, 83, 85, 87, 89, 89, 89, 89, 89, 89, 89,
+ 89, 90, 90, 91, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 100, 100, 100,
+ 82, 82, 81, 80, 80, 82, 84, 86, 89, 89, 90, 90, 91, 91, 91, 91, 92, 92,
+ 93, 93, 94, 95, 96, 96, 97, 98, 99, 100, 101, 101, 101, 101, 83, 82, 81,
+ 80, 78, 81, 83, 86, 88, 89, 90, 91, 92, 92, 93, 93, 94, 94, 95, 95, 96,
+ 97, 97, 98, 99, 100, 101, 102, 103, 103, 103, 103, 84, 82, 80, 79, 77,
+ 80, 82, 85, 88, 89, 91, 92, 94, 94, 95, 95, 96, 96, 97, 97, 98, 99, 99,
+ 100, 101, 102, 102, 103, 104, 104, 104, 104, 85, 83, 81, 79, 78, 80, 82,
+ 85, 88, 89, 91, 92, 94, 95, 96, 96, 97, 98, 98, 99, 99, 100, 101, 102,
+ 102, 103, 104, 105, 106, 106, 106, 106, 85, 83, 82, 80, 78, 80, 83, 85,
+ 88, 89, 91, 93, 95, 96, 97, 98, 98, 99, 100, 100, 101, 102, 102, 103,
+ 104, 105, 105, 106, 107, 107, 107, 107, 86, 84, 82, 80, 78, 81, 83, 85,
+ 88, 89, 91, 93, 95, 96, 98, 99, 100, 100, 101, 102, 103, 103, 104, 105,
+ 105, 106, 107, 108, 108, 108, 108, 108, 87, 85, 83, 81, 79, 81, 83, 85,
+ 88, 90, 92, 94, 96, 97, 98, 100, 101, 102, 103, 104, 104, 105, 106, 106,
+ 107, 108, 108, 109, 110, 110, 110, 110, 88, 86, 84, 82, 80, 82, 84, 86,
+ 88, 90, 92, 94, 96, 98, 99, 100, 102, 103, 104, 105, 105, 106, 107, 108,
+ 108, 109, 110, 110, 111, 111, 111, 111, 90, 87, 85, 83, 81, 83, 85, 87,
+ 89, 91, 93, 95, 97, 98, 100, 101, 103, 104, 105, 106, 107, 107, 108,
+ 109, 109, 110, 111, 111, 112, 112, 112, 112, 91, 88, 86, 84, 82, 84, 86,
+ 87, 89, 91, 93, 95, 97, 99, 100, 102, 104, 105, 106, 107, 108, 108, 109,
+ 110, 111, 111, 112, 113, 113, 113, 113, 113, 92, 90, 87, 85, 83, 85, 86,
+ 88, 90, 92, 94, 96, 98, 99, 101, 103, 104, 105, 107, 108, 109, 110, 110,
+ 111, 112, 113, 113, 114, 115, 115, 115, 115, 94, 91, 89, 87, 84, 86, 88,
+ 89, 91, 93, 95, 97, 99, 100, 102, 103, 105, 106, 107, 108, 110, 110,
+ 111, 112, 113, 114, 114, 115, 116, 116, 116, 116, 95, 93, 90, 88, 86,
+ 87, 89, 90, 92, 94, 96, 97, 99, 101, 102, 104, 106, 107, 108, 109, 110,
+ 111, 112, 113, 114, 114, 115, 116, 116, 116, 116, 116, 97, 94, 92, 89,
+ 87, 89, 90, 92, 93, 95, 96, 98, 100, 102, 103, 105, 106, 108, 109, 110,
+ 111, 112, 113, 114, 115, 115, 116, 117, 117, 117, 117, 117, 98, 96, 93,
+ 91, 89, 90, 91, 93, 94, 96, 97, 99, 101, 102, 104, 105, 107, 108, 109,
+ 111, 112, 113, 114, 115, 116, 116, 117, 118, 118, 118, 118, 118, 100,
+ 97, 95, 92, 90, 91, 93, 94, 95, 97, 98, 100, 102, 103, 105, 106, 108,
+ 109, 110, 111, 113, 114, 114, 115, 116, 117, 118, 118, 119, 119, 119,
+ 119, 101, 98, 96, 94, 92, 93, 94, 95, 96, 98, 99, 101, 102, 104, 105,
+ 107, 108, 110, 111, 112, 113, 114, 115, 116, 117, 118, 118, 119, 120,
+ 120, 120, 120, 102, 100, 98, 95, 93, 94, 95, 96, 98, 99, 100, 102, 103,
+ 105, 106, 108, 109, 110, 111, 113, 114, 115, 116, 117, 118, 118, 119,
+ 120, 121, 121, 121, 121, 104, 101, 99, 97, 95, 96, 97, 98, 99, 100, 101,
+ 103, 104, 106, 107, 108, 110, 111, 112, 113, 115, 116, 116, 117, 118,
+ 119, 120, 121, 121, 121, 121, 121, 104, 101, 99, 97, 95, 96, 97, 98, 99,
+ 100, 101, 103, 104, 106, 107, 108, 110, 111, 112, 113, 115, 116, 116,
+ 117, 118, 119, 120, 121, 121, 121, 121, 121, 104, 101, 99, 97, 95, 96,
+ 97, 98, 99, 100, 101, 103, 104, 106, 107, 108, 110, 111, 112, 113, 115,
+ 116, 116, 117, 118, 119, 120, 121, 121, 121, 121, 121, 104, 101, 99, 97,
+ 95, 96, 97, 98, 99, 100, 101, 103, 104, 106, 107, 108, 110, 111, 112,
+ 113, 115, 116, 116, 117, 118, 119, 120, 121, 121, 121, 121, 121 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 40, 54, 57, 65, 54, 60, 62, 67, 57, 62, 71, 75, 65, 67, 75, 81,
+ /* Size 8 */
+ 42, 36, 54, 56, 58, 62, 66, 70, 36, 47, 54, 51, 52, 55, 59, 64, 54, 54,
+ 60, 59, 58, 60, 63, 66, 56, 51, 59, 63, 64, 66, 68, 70, 58, 52, 58, 64,
+ 68, 70, 72, 74, 62, 55, 60, 66, 70, 74, 76, 78, 66, 59, 63, 68, 72, 76,
+ 79, 81, 70, 64, 66, 70, 74, 78, 81, 83,
+ /* Size 16 */
+ 41, 38, 35, 43, 53, 54, 55, 56, 57, 59, 61, 63, 65, 67, 69, 69, 38, 39,
+ 40, 46, 53, 53, 53, 53, 54, 56, 57, 59, 62, 64, 66, 66, 35, 40, 46, 49,
+ 54, 52, 51, 51, 51, 53, 54, 56, 58, 60, 63, 63, 43, 46, 49, 53, 56, 55,
+ 54, 54, 54, 56, 57, 58, 60, 62, 64, 64, 53, 53, 54, 56, 59, 58, 58, 58,
+ 58, 58, 59, 61, 62, 64, 65, 65, 54, 53, 52, 55, 58, 59, 60, 60, 60, 61,
+ 62, 63, 64, 66, 67, 67, 55, 53, 51, 54, 58, 60, 62, 63, 63, 64, 65, 66,
+ 67, 68, 69, 69, 56, 53, 51, 54, 58, 60, 63, 64, 65, 66, 67, 68, 69, 70,
+ 71, 71, 57, 54, 51, 54, 58, 60, 63, 65, 67, 68, 69, 70, 71, 72, 73, 73,
+ 59, 56, 53, 56, 58, 61, 64, 66, 68, 70, 71, 72, 73, 74, 75, 75, 61, 57,
+ 54, 57, 59, 62, 65, 67, 69, 71, 73, 74, 75, 76, 77, 77, 63, 59, 56, 58,
+ 61, 63, 66, 68, 70, 72, 74, 75, 76, 77, 78, 78, 65, 62, 58, 60, 62, 64,
+ 67, 69, 71, 73, 75, 76, 78, 79, 80, 80, 67, 64, 60, 62, 64, 66, 68, 70,
+ 72, 74, 76, 77, 79, 80, 81, 81, 69, 66, 63, 64, 65, 67, 69, 71, 73, 75,
+ 77, 78, 80, 81, 82, 82, 69, 66, 63, 64, 65, 67, 69, 71, 73, 75, 77, 78,
+ 80, 81, 82, 82,
+ /* Size 32 */
+ 41, 39, 38, 37, 35, 38, 42, 47, 53, 53, 54, 54, 55, 55, 56, 56, 57, 58,
+ 59, 59, 60, 61, 62, 63, 65, 65, 66, 68, 69, 69, 69, 69, 39, 39, 38, 38,
+ 37, 40, 44, 48, 53, 53, 53, 53, 53, 54, 54, 55, 55, 56, 57, 58, 59, 60,
+ 61, 62, 63, 64, 65, 66, 67, 67, 67, 67, 38, 38, 39, 39, 40, 42, 45, 49,
+ 53, 53, 53, 52, 52, 53, 53, 53, 54, 55, 55, 56, 57, 58, 59, 60, 61, 62,
+ 63, 64, 65, 65, 65, 65, 37, 38, 39, 41, 42, 45, 47, 50, 53, 53, 52, 52,
+ 51, 51, 52, 52, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 63, 64, 64,
+ 64, 64, 35, 37, 40, 42, 46, 47, 49, 51, 53, 52, 52, 51, 50, 50, 51, 51,
+ 51, 52, 53, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 62, 62, 62, 38, 40,
+ 42, 45, 47, 49, 51, 52, 54, 54, 53, 52, 52, 52, 52, 52, 52, 53, 54, 54,
+ 55, 56, 57, 58, 59, 60, 61, 62, 63, 63, 63, 63, 42, 44, 45, 47, 49, 51,
+ 52, 54, 56, 55, 55, 54, 53, 54, 54, 54, 54, 55, 55, 56, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 64, 64, 64, 47, 48, 49, 50, 51, 52, 54, 55, 57, 57,
+ 56, 56, 55, 55, 55, 55, 55, 56, 57, 57, 58, 58, 59, 60, 61, 62, 62, 63,
+ 64, 64, 64, 64, 53, 53, 53, 53, 53, 54, 56, 57, 58, 58, 58, 58, 57, 57,
+ 57, 57, 57, 58, 58, 58, 59, 60, 60, 61, 62, 62, 63, 64, 65, 65, 65, 65,
+ 53, 53, 53, 53, 52, 54, 55, 57, 58, 58, 58, 58, 58, 58, 58, 58, 59, 59,
+ 59, 60, 60, 61, 61, 62, 63, 64, 64, 65, 66, 66, 66, 66, 54, 53, 53, 52,
+ 52, 53, 55, 56, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 61, 61, 61, 62,
+ 63, 63, 64, 65, 65, 66, 67, 67, 67, 67, 54, 53, 52, 52, 51, 52, 54, 56,
+ 58, 58, 59, 60, 60, 61, 61, 61, 61, 62, 62, 63, 63, 63, 64, 65, 65, 66,
+ 66, 67, 68, 68, 68, 68, 55, 53, 52, 51, 50, 52, 53, 55, 57, 58, 59, 60,
+ 61, 62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 66, 66, 67, 68, 68, 69, 69,
+ 69, 69, 55, 54, 53, 51, 50, 52, 54, 55, 57, 58, 59, 61, 62, 62, 63, 63,
+ 64, 64, 65, 65, 65, 66, 66, 67, 67, 68, 69, 69, 70, 70, 70, 70, 56, 54,
+ 53, 52, 51, 52, 54, 55, 57, 58, 60, 61, 62, 63, 63, 64, 65, 65, 66, 66,
+ 67, 67, 68, 68, 69, 69, 70, 70, 71, 71, 71, 71, 56, 55, 53, 52, 51, 52,
+ 54, 55, 57, 58, 60, 61, 63, 63, 64, 65, 66, 66, 67, 67, 68, 68, 69, 69,
+ 70, 70, 71, 71, 72, 72, 72, 72, 57, 55, 54, 52, 51, 52, 54, 55, 57, 59,
+ 60, 61, 63, 64, 65, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71, 72, 72,
+ 73, 73, 73, 73, 58, 56, 55, 53, 52, 53, 55, 56, 58, 59, 60, 62, 63, 64,
+ 65, 66, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 73, 73, 74, 74, 74, 74,
+ 59, 57, 55, 54, 53, 54, 55, 57, 58, 59, 61, 62, 64, 65, 66, 67, 68, 68,
+ 69, 70, 70, 71, 71, 72, 73, 73, 73, 74, 74, 74, 74, 74, 59, 58, 56, 55,
+ 53, 54, 56, 57, 58, 60, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 71, 72,
+ 72, 73, 73, 74, 74, 75, 75, 75, 75, 75, 60, 59, 57, 56, 54, 55, 56, 58,
+ 59, 60, 61, 63, 64, 65, 67, 68, 69, 70, 70, 71, 72, 73, 73, 74, 74, 75,
+ 75, 76, 76, 76, 76, 76, 61, 60, 58, 56, 55, 56, 57, 58, 60, 61, 62, 63,
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77,
+ 77, 77, 62, 61, 59, 57, 56, 57, 58, 59, 60, 61, 63, 64, 65, 66, 68, 69,
+ 70, 71, 71, 72, 73, 74, 74, 75, 76, 76, 77, 77, 78, 78, 78, 78, 63, 62,
+ 60, 58, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73,
+ 74, 74, 75, 76, 76, 77, 77, 78, 78, 78, 78, 78, 65, 63, 61, 59, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 73, 74, 75, 76, 76,
+ 77, 77, 78, 78, 79, 79, 79, 79, 65, 64, 62, 60, 59, 60, 61, 62, 62, 64,
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 79,
+ 79, 79, 79, 79, 66, 65, 63, 61, 60, 61, 62, 62, 63, 64, 65, 66, 68, 69,
+ 70, 71, 72, 73, 73, 74, 75, 76, 77, 77, 78, 78, 79, 79, 80, 80, 80, 80,
+ 68, 66, 64, 63, 61, 62, 63, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
+ 74, 75, 76, 76, 77, 78, 78, 79, 79, 80, 81, 81, 81, 81, 69, 67, 65, 64,
+ 62, 63, 64, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 74, 75, 76, 77,
+ 78, 78, 79, 79, 80, 81, 81, 81, 81, 81, 69, 67, 65, 64, 62, 63, 64, 64,
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 74, 75, 76, 77, 78, 78, 79, 79,
+ 80, 81, 81, 81, 81, 81, 69, 67, 65, 64, 62, 63, 64, 64, 65, 66, 67, 68,
+ 69, 70, 71, 72, 73, 74, 74, 75, 76, 77, 78, 78, 79, 79, 80, 81, 81, 81,
+ 81, 81, 69, 67, 65, 64, 62, 63, 64, 64, 65, 66, 67, 68, 69, 70, 71, 72,
+ 73, 74, 74, 75, 76, 77, 78, 78, 79, 79, 80, 81, 81, 81, 81, 81 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 67, 88, 105, 67, 84, 97, 108, 88, 97, 109, 116, 105, 108, 116, 120,
+ /* Size 8 */
+ 64, 54, 57, 66, 77, 86, 94, 99, 54, 59, 57, 63, 72, 81, 88, 95, 57, 57,
+ 69, 75, 80, 87, 92, 97, 66, 63, 75, 83, 89, 93, 97, 101, 77, 72, 80, 89,
+ 94, 98, 101, 104, 86, 81, 87, 93, 98, 102, 104, 106, 94, 88, 92, 97,
+ 101, 104, 106, 108, 99, 95, 97, 101, 104, 106, 108, 109,
+ /* Size 16 */
+ 64, 59, 54, 55, 57, 61, 66, 71, 77, 81, 86, 90, 94, 96, 99, 99, 59, 57,
+ 56, 57, 57, 61, 65, 69, 74, 79, 83, 87, 91, 94, 97, 97, 54, 56, 59, 58,
+ 57, 60, 63, 67, 72, 76, 81, 84, 88, 91, 95, 95, 55, 57, 58, 60, 63, 65,
+ 68, 72, 76, 79, 83, 87, 90, 93, 96, 96, 57, 57, 57, 63, 69, 72, 75, 78,
+ 80, 83, 87, 89, 92, 95, 97, 97, 61, 61, 60, 65, 72, 75, 79, 82, 84, 87,
+ 90, 92, 95, 97, 99, 99, 66, 65, 63, 68, 75, 79, 83, 86, 89, 91, 93, 95,
+ 97, 99, 101, 101, 71, 69, 67, 72, 78, 82, 86, 89, 91, 93, 95, 97, 99,
+ 100, 102, 102, 77, 74, 72, 76, 80, 84, 89, 91, 94, 96, 98, 100, 101,
+ 102, 104, 104, 81, 79, 76, 79, 83, 87, 91, 93, 96, 98, 100, 101, 103,
+ 104, 105, 105, 86, 83, 81, 83, 87, 90, 93, 95, 98, 100, 102, 103, 104,
+ 105, 106, 106, 90, 87, 84, 87, 89, 92, 95, 97, 100, 101, 103, 104, 105,
+ 106, 107, 107, 94, 91, 88, 90, 92, 95, 97, 99, 101, 103, 104, 105, 106,
+ 107, 108, 108, 96, 94, 91, 93, 95, 97, 99, 100, 102, 104, 105, 106, 107,
+ 108, 109, 109, 99, 97, 95, 96, 97, 99, 101, 102, 104, 105, 106, 107,
+ 108, 109, 109, 109, 99, 97, 95, 96, 97, 99, 101, 102, 104, 105, 106,
+ 107, 108, 109, 109, 109,
+ /* Size 32 */
+ 64, 61, 59, 56, 54, 55, 55, 56, 57, 59, 61, 64, 66, 69, 71, 74, 77, 79,
+ 81, 84, 86, 88, 90, 92, 94, 95, 96, 98, 99, 99, 99, 99, 61, 60, 58, 57,
+ 55, 56, 56, 56, 57, 59, 61, 63, 66, 68, 70, 73, 76, 78, 80, 82, 85, 87,
+ 88, 90, 92, 94, 95, 97, 98, 98, 98, 98, 59, 58, 57, 57, 56, 56, 57, 57,
+ 57, 59, 61, 63, 65, 67, 69, 72, 74, 76, 79, 81, 83, 85, 87, 89, 91, 92,
+ 94, 95, 97, 97, 97, 97, 56, 57, 57, 57, 58, 57, 57, 57, 57, 59, 60, 62,
+ 64, 66, 68, 70, 73, 75, 77, 79, 82, 84, 86, 88, 90, 91, 93, 94, 96, 96,
+ 96, 96, 54, 55, 56, 58, 59, 58, 58, 58, 57, 59, 60, 61, 63, 65, 67, 69,
+ 72, 74, 76, 78, 81, 82, 84, 86, 88, 90, 91, 93, 95, 95, 95, 95, 55, 56,
+ 56, 57, 58, 59, 59, 59, 60, 61, 63, 64, 65, 67, 69, 71, 74, 76, 78, 80,
+ 82, 84, 86, 87, 89, 91, 92, 94, 95, 95, 95, 95, 55, 56, 57, 57, 58, 59,
+ 60, 61, 63, 64, 65, 67, 68, 70, 72, 74, 76, 78, 79, 81, 83, 85, 87, 89,
+ 90, 92, 93, 95, 96, 96, 96, 96, 56, 56, 57, 57, 58, 59, 61, 64, 66, 67,
+ 68, 70, 71, 73, 75, 76, 78, 80, 81, 83, 85, 86, 88, 90, 91, 93, 94, 95,
+ 97, 97, 97, 97, 57, 57, 57, 57, 57, 60, 63, 66, 69, 71, 72, 73, 75, 76,
+ 78, 79, 80, 82, 83, 85, 87, 88, 89, 91, 92, 94, 95, 96, 97, 97, 97, 97,
+ 59, 59, 59, 59, 59, 61, 64, 67, 71, 72, 74, 75, 77, 78, 79, 81, 82, 84,
+ 85, 87, 88, 89, 91, 92, 93, 95, 96, 97, 98, 98, 98, 98, 61, 61, 61, 60,
+ 60, 63, 65, 68, 72, 74, 75, 77, 79, 80, 82, 83, 84, 86, 87, 88, 90, 91,
+ 92, 93, 95, 96, 97, 98, 99, 99, 99, 99, 64, 63, 63, 62, 61, 64, 67, 70,
+ 73, 75, 77, 79, 81, 82, 84, 85, 86, 88, 89, 90, 91, 92, 93, 95, 96, 97,
+ 98, 99, 100, 100, 100, 100, 66, 66, 65, 64, 63, 65, 68, 71, 75, 77, 79,
+ 81, 83, 85, 86, 87, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
+ 101, 101, 101, 101, 69, 68, 67, 66, 65, 67, 70, 73, 76, 78, 80, 82, 85,
+ 86, 87, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 100, 101, 101,
+ 101, 101, 71, 70, 69, 68, 67, 69, 72, 75, 78, 79, 82, 84, 86, 87, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 100, 101, 102, 102, 102,
+ 102, 74, 73, 72, 70, 69, 71, 74, 76, 79, 81, 83, 85, 87, 89, 90, 91, 93,
+ 94, 95, 96, 97, 98, 98, 99, 100, 101, 101, 102, 103, 103, 103, 103, 77,
+ 76, 74, 73, 72, 74, 76, 78, 80, 82, 84, 86, 89, 90, 91, 93, 94, 95, 96,
+ 97, 98, 99, 100, 100, 101, 102, 102, 103, 104, 104, 104, 104, 79, 78,
+ 76, 75, 74, 76, 78, 80, 82, 84, 86, 88, 90, 91, 92, 94, 95, 96, 97, 98,
+ 99, 100, 100, 101, 102, 102, 103, 104, 104, 104, 104, 104, 81, 80, 79,
+ 77, 76, 78, 79, 81, 83, 85, 87, 89, 91, 92, 93, 95, 96, 97, 98, 99, 100,
+ 101, 101, 102, 103, 103, 104, 104, 105, 105, 105, 105, 84, 82, 81, 79,
+ 78, 80, 81, 83, 85, 87, 88, 90, 92, 93, 94, 96, 97, 98, 99, 100, 101,
+ 101, 102, 103, 103, 104, 104, 105, 105, 105, 105, 105, 86, 85, 83, 82,
+ 81, 82, 83, 85, 87, 88, 90, 91, 93, 94, 95, 97, 98, 99, 100, 101, 102,
+ 102, 103, 103, 104, 105, 105, 106, 106, 106, 106, 106, 88, 87, 85, 84,
+ 82, 84, 85, 86, 88, 89, 91, 92, 94, 95, 96, 98, 99, 100, 101, 101, 102,
+ 103, 103, 104, 105, 105, 106, 106, 107, 107, 107, 107, 90, 88, 87, 86,
+ 84, 86, 87, 88, 89, 91, 92, 93, 95, 96, 97, 98, 100, 100, 101, 102, 103,
+ 103, 104, 105, 105, 106, 106, 107, 107, 107, 107, 107, 92, 90, 89, 88,
+ 86, 87, 89, 90, 91, 92, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 103,
+ 104, 105, 105, 106, 106, 107, 107, 107, 107, 107, 107, 94, 92, 91, 90,
+ 88, 89, 90, 91, 92, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 103,
+ 104, 105, 105, 106, 106, 107, 107, 108, 108, 108, 108, 108, 95, 94, 92,
+ 91, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 102, 103,
+ 104, 105, 105, 106, 106, 107, 107, 107, 108, 108, 108, 108, 108, 96, 95,
+ 94, 93, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 100, 101, 102, 103,
+ 104, 104, 105, 106, 106, 107, 107, 107, 108, 108, 109, 109, 109, 109,
+ 98, 97, 95, 94, 93, 94, 95, 95, 96, 97, 98, 99, 100, 100, 101, 102, 103,
+ 104, 104, 105, 106, 106, 107, 107, 108, 108, 108, 109, 109, 109, 109,
+ 109, 99, 98, 97, 96, 95, 95, 96, 97, 97, 98, 99, 100, 101, 101, 102,
+ 103, 104, 104, 105, 105, 106, 107, 107, 107, 108, 108, 109, 109, 109,
+ 109, 109, 109, 99, 98, 97, 96, 95, 95, 96, 97, 97, 98, 99, 100, 101,
+ 101, 102, 103, 104, 104, 105, 105, 106, 107, 107, 107, 108, 108, 109,
+ 109, 109, 109, 109, 109, 99, 98, 97, 96, 95, 95, 96, 97, 97, 98, 99,
+ 100, 101, 101, 102, 103, 104, 104, 105, 105, 106, 107, 107, 107, 108,
+ 108, 109, 109, 109, 109, 109, 109, 99, 98, 97, 96, 95, 95, 96, 97, 97,
+ 98, 99, 100, 101, 101, 102, 103, 104, 104, 105, 105, 106, 107, 107, 107,
+ 108, 108, 109, 109, 109, 109, 109, 109 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 41, 43, 57, 69, 43, 54, 63, 71, 57, 63, 72, 77, 69, 71, 77, 80,
+ /* Size 8 */
+ 46, 38, 40, 47, 55, 63, 68, 73, 38, 42, 41, 45, 51, 58, 64, 69, 40, 41,
+ 50, 54, 58, 63, 67, 71, 47, 45, 54, 60, 64, 68, 71, 74, 55, 51, 58, 64,
+ 69, 72, 74, 76, 63, 58, 63, 68, 72, 75, 77, 78, 68, 64, 67, 71, 74, 77,
+ 78, 80, 73, 69, 71, 74, 76, 78, 80, 81,
+ /* Size 16 */
+ 45, 41, 38, 38, 39, 43, 47, 50, 54, 58, 61, 64, 67, 69, 71, 71, 41, 40,
+ 39, 39, 40, 42, 45, 49, 52, 56, 59, 62, 65, 67, 70, 70, 38, 39, 41, 40,
+ 40, 42, 44, 47, 50, 54, 57, 60, 63, 65, 68, 68, 38, 39, 40, 42, 44, 46,
+ 48, 51, 54, 56, 59, 62, 65, 67, 69, 69, 39, 40, 40, 44, 49, 51, 53, 55,
+ 57, 59, 62, 64, 66, 68, 70, 70, 43, 42, 42, 46, 51, 53, 56, 58, 60, 62,
+ 64, 66, 68, 69, 71, 71, 47, 45, 44, 48, 53, 56, 59, 61, 63, 65, 67, 68,
+ 70, 71, 72, 72, 50, 49, 47, 51, 55, 58, 61, 63, 65, 67, 69, 70, 71, 72,
+ 74, 74, 54, 52, 50, 54, 57, 60, 63, 65, 68, 69, 71, 72, 73, 74, 75, 75,
+ 58, 56, 54, 56, 59, 62, 65, 67, 69, 70, 72, 73, 74, 75, 76, 76, 61, 59,
+ 57, 59, 62, 64, 67, 69, 71, 72, 73, 74, 75, 76, 77, 77, 64, 62, 60, 62,
+ 64, 66, 68, 70, 72, 73, 74, 75, 76, 77, 78, 78, 67, 65, 63, 65, 66, 68,
+ 70, 71, 73, 74, 75, 76, 77, 78, 78, 78, 69, 67, 65, 67, 68, 69, 71, 72,
+ 74, 75, 76, 77, 78, 78, 79, 79, 71, 70, 68, 69, 70, 71, 72, 74, 75, 76,
+ 77, 78, 78, 79, 79, 79, 71, 70, 68, 69, 70, 71, 72, 74, 75, 76, 77, 78,
+ 78, 79, 79, 79,
+ /* Size 32 */
+ 44, 42, 40, 39, 37, 38, 38, 39, 39, 41, 42, 44, 46, 48, 50, 52, 54, 56,
+ 57, 59, 61, 62, 64, 65, 66, 68, 69, 70, 71, 71, 71, 71, 42, 41, 40, 39,
+ 38, 38, 39, 39, 39, 41, 42, 44, 45, 47, 49, 51, 53, 54, 56, 58, 60, 61,
+ 62, 64, 65, 67, 68, 69, 70, 70, 70, 70, 40, 40, 40, 39, 39, 39, 39, 39,
+ 39, 41, 42, 43, 45, 46, 48, 50, 52, 53, 55, 57, 59, 60, 61, 63, 64, 66,
+ 67, 68, 69, 69, 69, 69, 39, 39, 39, 39, 40, 40, 40, 39, 39, 40, 42, 43,
+ 44, 46, 47, 49, 51, 52, 54, 56, 58, 59, 60, 62, 63, 65, 66, 67, 68, 68,
+ 68, 68, 37, 38, 39, 40, 41, 40, 40, 40, 39, 40, 41, 42, 44, 45, 47, 48,
+ 50, 51, 53, 55, 57, 58, 59, 61, 63, 64, 65, 66, 67, 67, 67, 67, 38, 38,
+ 39, 40, 40, 41, 41, 41, 41, 42, 43, 44, 45, 47, 48, 50, 51, 53, 54, 56,
+ 58, 59, 60, 62, 63, 64, 65, 67, 68, 68, 68, 68, 38, 39, 39, 40, 40, 41,
+ 42, 43, 43, 44, 45, 46, 48, 49, 50, 52, 53, 54, 56, 57, 59, 60, 61, 63,
+ 64, 65, 66, 67, 68, 68, 68, 68, 39, 39, 39, 39, 40, 41, 43, 44, 46, 47,
+ 48, 49, 50, 51, 52, 53, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 68,
+ 69, 69, 69, 69, 39, 39, 39, 39, 39, 41, 43, 46, 48, 49, 50, 51, 52, 53,
+ 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 69, 69, 69,
+ 41, 41, 41, 40, 40, 42, 44, 47, 49, 50, 51, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 70, 70, 70, 42, 42, 42, 42,
+ 41, 43, 45, 48, 50, 51, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
+ 65, 66, 67, 68, 69, 70, 71, 71, 71, 71, 44, 44, 43, 43, 42, 44, 46, 49,
+ 51, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 66, 67, 68, 69,
+ 70, 70, 71, 71, 71, 71, 46, 45, 45, 44, 44, 45, 48, 50, 52, 54, 55, 57,
+ 59, 60, 61, 62, 63, 63, 64, 65, 66, 67, 67, 68, 69, 70, 70, 71, 72, 72,
+ 72, 72, 48, 47, 46, 46, 45, 47, 49, 51, 53, 55, 56, 58, 60, 61, 62, 63,
+ 64, 64, 65, 66, 67, 68, 68, 69, 70, 70, 71, 72, 72, 72, 72, 72, 50, 49,
+ 48, 47, 47, 48, 50, 52, 54, 56, 57, 59, 61, 62, 63, 64, 65, 66, 66, 67,
+ 68, 69, 69, 70, 71, 71, 72, 72, 73, 73, 73, 73, 52, 51, 50, 49, 48, 50,
+ 52, 53, 55, 57, 58, 60, 62, 63, 64, 65, 66, 67, 67, 68, 69, 69, 70, 71,
+ 71, 72, 72, 73, 74, 74, 74, 74, 54, 53, 52, 51, 50, 51, 53, 55, 56, 58,
+ 59, 61, 63, 64, 65, 66, 67, 68, 68, 69, 70, 70, 71, 72, 72, 73, 73, 74,
+ 74, 74, 74, 74, 56, 54, 53, 52, 51, 53, 54, 56, 58, 59, 60, 62, 63, 64,
+ 66, 67, 68, 68, 69, 70, 71, 71, 72, 72, 73, 73, 74, 74, 75, 75, 75, 75,
+ 57, 56, 55, 54, 53, 54, 56, 57, 59, 60, 61, 63, 64, 65, 66, 67, 68, 69,
+ 70, 71, 71, 72, 72, 73, 73, 74, 74, 75, 75, 75, 75, 75, 59, 58, 57, 56,
+ 55, 56, 57, 59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 71, 72, 72,
+ 73, 73, 74, 74, 75, 75, 76, 76, 76, 76, 61, 60, 59, 58, 57, 58, 59, 60,
+ 61, 62, 63, 65, 66, 67, 68, 69, 70, 71, 71, 72, 73, 73, 74, 74, 75, 75,
+ 75, 76, 76, 76, 76, 76, 62, 61, 60, 59, 58, 59, 60, 61, 62, 63, 64, 66,
+ 67, 68, 69, 69, 70, 71, 72, 72, 73, 74, 74, 75, 75, 75, 76, 76, 76, 76,
+ 76, 76, 64, 62, 61, 60, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+ 71, 72, 72, 73, 74, 74, 75, 75, 75, 76, 76, 76, 77, 77, 77, 77, 65, 64,
+ 63, 62, 61, 62, 63, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 72, 73, 73,
+ 74, 75, 75, 75, 76, 76, 77, 77, 77, 77, 77, 77, 66, 65, 64, 63, 63, 63,
+ 64, 65, 65, 66, 67, 68, 69, 70, 71, 71, 72, 73, 73, 74, 75, 75, 75, 76,
+ 76, 77, 77, 77, 78, 78, 78, 78, 68, 67, 66, 65, 64, 64, 65, 66, 66, 67,
+ 68, 69, 70, 70, 71, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 77, 78,
+ 78, 78, 78, 78, 69, 68, 67, 66, 65, 65, 66, 67, 67, 68, 69, 70, 70, 71,
+ 72, 72, 73, 74, 74, 75, 75, 76, 76, 77, 77, 77, 77, 78, 78, 78, 78, 78,
+ 70, 69, 68, 67, 66, 67, 67, 68, 68, 69, 70, 70, 71, 72, 72, 73, 74, 74,
+ 75, 75, 76, 76, 76, 77, 77, 78, 78, 78, 78, 78, 78, 78, 71, 70, 69, 68,
+ 67, 68, 68, 69, 69, 70, 71, 71, 72, 72, 73, 74, 74, 75, 75, 76, 76, 76,
+ 77, 77, 78, 78, 78, 78, 79, 79, 79, 79, 71, 70, 69, 68, 67, 68, 68, 69,
+ 69, 70, 71, 71, 72, 72, 73, 74, 74, 75, 75, 76, 76, 76, 77, 77, 78, 78,
+ 78, 78, 79, 79, 79, 79, 71, 70, 69, 68, 67, 68, 68, 69, 69, 70, 71, 71,
+ 72, 72, 73, 74, 74, 75, 75, 76, 76, 76, 77, 77, 78, 78, 78, 78, 79, 79,
+ 79, 79, 71, 70, 69, 68, 67, 68, 68, 69, 69, 70, 71, 71, 72, 72, 73, 74,
+ 74, 75, 75, 76, 76, 76, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 83, 86, 96, 83, 90, 93, 98, 86, 93, 103, 108, 96, 98, 108, 114,
+ /* Size 8 */
+ 64, 56, 79, 81, 84, 88, 93, 97, 56, 70, 80, 76, 77, 81, 85, 90, 79, 80,
+ 86, 84, 84, 86, 90, 93, 81, 76, 84, 89, 91, 93, 95, 98, 84, 77, 84, 91,
+ 95, 98, 100, 102, 88, 81, 86, 93, 98, 101, 104, 106, 93, 85, 90, 95,
+ 100, 104, 106, 108, 97, 90, 93, 98, 102, 106, 108, 111,
+ /* Size 16 */
+ 64, 60, 56, 66, 79, 80, 81, 82, 84, 86, 88, 90, 93, 95, 97, 97, 60, 61,
+ 62, 70, 79, 79, 78, 79, 80, 82, 84, 86, 89, 91, 94, 94, 56, 62, 70, 74,
+ 80, 78, 76, 76, 77, 79, 81, 83, 85, 88, 90, 90, 66, 70, 74, 78, 83, 81,
+ 80, 80, 80, 82, 83, 85, 87, 89, 92, 92, 79, 79, 80, 83, 86, 85, 84, 84,
+ 84, 85, 86, 88, 90, 91, 93, 93, 80, 79, 78, 81, 85, 86, 87, 87, 88, 88,
+ 89, 91, 92, 94, 95, 95, 81, 78, 76, 80, 84, 87, 89, 90, 91, 92, 93, 94,
+ 95, 96, 98, 98, 82, 79, 76, 80, 84, 87, 90, 92, 93, 94, 95, 96, 97, 98,
+ 100, 100, 84, 80, 77, 80, 84, 88, 91, 93, 95, 96, 98, 99, 100, 101, 102,
+ 102, 86, 82, 79, 82, 85, 88, 92, 94, 96, 98, 99, 101, 102, 103, 104,
+ 104, 88, 84, 81, 83, 86, 89, 93, 95, 98, 99, 101, 102, 104, 105, 106,
+ 106, 90, 86, 83, 85, 88, 91, 94, 96, 99, 101, 102, 104, 105, 106, 107,
+ 107, 93, 89, 85, 87, 90, 92, 95, 97, 100, 102, 104, 105, 106, 107, 108,
+ 108, 95, 91, 88, 89, 91, 94, 96, 98, 101, 103, 105, 106, 107, 108, 109,
+ 109, 97, 94, 90, 92, 93, 95, 98, 100, 102, 104, 106, 107, 108, 109, 111,
+ 111, 97, 94, 90, 92, 93, 95, 98, 100, 102, 104, 106, 107, 108, 109, 111,
+ 111,
+ /* Size 32 */
+ 64, 62, 60, 58, 56, 60, 66, 72, 79, 79, 80, 81, 81, 82, 82, 83, 84, 85,
+ 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 97, 97, 97, 62, 61, 60, 60,
+ 59, 63, 68, 73, 79, 79, 79, 80, 80, 80, 81, 81, 82, 83, 84, 85, 86, 87,
+ 88, 90, 91, 92, 93, 94, 95, 95, 95, 95, 60, 60, 61, 62, 62, 66, 70, 74,
+ 79, 79, 79, 79, 78, 79, 79, 80, 80, 81, 82, 83, 84, 85, 86, 88, 89, 90,
+ 91, 92, 94, 94, 94, 94, 58, 60, 62, 64, 66, 69, 72, 75, 79, 79, 78, 78,
+ 77, 77, 78, 78, 78, 79, 80, 81, 82, 83, 85, 86, 87, 88, 89, 91, 92, 92,
+ 92, 92, 56, 59, 62, 66, 70, 72, 74, 77, 80, 79, 78, 77, 76, 76, 76, 77,
+ 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 88, 89, 90, 90, 90, 90, 60, 63,
+ 66, 69, 72, 74, 76, 79, 81, 80, 79, 78, 78, 78, 78, 78, 79, 79, 80, 81,
+ 82, 83, 84, 85, 86, 87, 89, 90, 91, 91, 91, 91, 66, 68, 70, 72, 74, 76,
+ 78, 80, 83, 82, 81, 80, 80, 80, 80, 80, 80, 81, 82, 83, 83, 84, 85, 86,
+ 87, 88, 89, 91, 92, 92, 92, 92, 72, 73, 74, 75, 77, 79, 80, 82, 84, 84,
+ 83, 83, 82, 82, 82, 82, 82, 83, 83, 84, 85, 86, 87, 87, 88, 89, 90, 91,
+ 92, 92, 92, 92, 79, 79, 79, 79, 80, 81, 83, 84, 86, 85, 85, 85, 84, 84,
+ 84, 84, 84, 85, 85, 86, 86, 87, 88, 89, 90, 90, 91, 92, 93, 93, 93, 93,
+ 79, 79, 79, 79, 79, 80, 82, 84, 85, 85, 85, 85, 86, 86, 86, 86, 86, 86,
+ 87, 87, 88, 88, 89, 90, 91, 92, 93, 93, 94, 94, 94, 94, 80, 79, 79, 78,
+ 78, 79, 81, 83, 85, 85, 86, 86, 87, 87, 87, 87, 88, 88, 88, 89, 89, 90,
+ 91, 91, 92, 93, 94, 95, 95, 95, 95, 95, 81, 80, 79, 78, 77, 78, 80, 83,
+ 85, 85, 86, 87, 88, 88, 89, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94,
+ 95, 96, 96, 96, 96, 96, 81, 80, 78, 77, 76, 78, 80, 82, 84, 86, 87, 88,
+ 89, 90, 90, 91, 91, 91, 92, 92, 93, 93, 94, 94, 95, 96, 96, 97, 98, 98,
+ 98, 98, 82, 80, 79, 77, 76, 78, 80, 82, 84, 86, 87, 88, 90, 90, 91, 91,
+ 92, 92, 93, 93, 94, 94, 95, 95, 96, 97, 97, 98, 99, 99, 99, 99, 82, 81,
+ 79, 78, 76, 78, 80, 82, 84, 86, 87, 89, 90, 91, 92, 92, 93, 94, 94, 95,
+ 95, 96, 96, 97, 97, 98, 98, 99, 100, 100, 100, 100, 83, 81, 80, 78, 77,
+ 78, 80, 82, 84, 86, 87, 89, 91, 91, 92, 93, 94, 95, 95, 96, 96, 97, 97,
+ 98, 99, 99, 100, 100, 101, 101, 101, 101, 84, 82, 80, 78, 77, 79, 80,
+ 82, 84, 86, 88, 89, 91, 92, 93, 94, 95, 96, 96, 97, 98, 98, 99, 99, 100,
+ 100, 101, 101, 102, 102, 102, 102, 85, 83, 81, 79, 78, 79, 81, 83, 85,
+ 86, 88, 90, 91, 92, 94, 95, 96, 96, 97, 98, 99, 99, 100, 100, 101, 101,
+ 102, 102, 103, 103, 103, 103, 86, 84, 82, 80, 79, 80, 82, 83, 85, 87,
+ 88, 90, 92, 93, 94, 95, 96, 97, 98, 99, 99, 100, 101, 101, 102, 102,
+ 103, 103, 104, 104, 104, 104, 87, 85, 83, 81, 80, 81, 83, 84, 86, 87,
+ 89, 91, 92, 93, 95, 96, 97, 98, 99, 99, 100, 101, 101, 102, 103, 103,
+ 104, 104, 105, 105, 105, 105, 88, 86, 84, 82, 81, 82, 83, 85, 86, 88,
+ 89, 91, 93, 94, 95, 96, 98, 99, 99, 100, 101, 102, 102, 103, 104, 104,
+ 105, 105, 106, 106, 106, 106, 89, 87, 85, 83, 82, 83, 84, 86, 87, 88,
+ 90, 92, 93, 94, 96, 97, 98, 99, 100, 101, 102, 102, 103, 104, 104, 105,
+ 105, 106, 106, 106, 106, 106, 90, 88, 86, 85, 83, 84, 85, 87, 88, 89,
+ 91, 92, 94, 95, 96, 97, 99, 100, 101, 101, 102, 103, 104, 104, 105, 105,
+ 106, 106, 107, 107, 107, 107, 92, 90, 88, 86, 84, 85, 86, 87, 89, 90,
+ 91, 93, 94, 95, 97, 98, 99, 100, 101, 102, 103, 104, 104, 105, 106, 106,
+ 107, 107, 108, 108, 108, 108, 93, 91, 89, 87, 85, 86, 87, 88, 90, 91,
+ 92, 93, 95, 96, 97, 99, 100, 101, 102, 103, 104, 104, 105, 106, 106,
+ 107, 107, 108, 108, 108, 108, 108, 94, 92, 90, 88, 86, 87, 88, 89, 90,
+ 92, 93, 94, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 105, 106, 107,
+ 107, 108, 108, 109, 109, 109, 109, 95, 93, 91, 89, 88, 89, 89, 90, 91,
+ 93, 94, 95, 96, 97, 98, 100, 101, 102, 103, 104, 105, 105, 106, 107,
+ 107, 108, 108, 109, 109, 109, 109, 109, 96, 94, 92, 91, 89, 90, 91, 91,
+ 92, 93, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 106, 107,
+ 108, 108, 109, 109, 110, 110, 110, 110, 97, 95, 94, 92, 90, 91, 92, 92,
+ 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 106, 107,
+ 108, 108, 109, 109, 110, 111, 111, 111, 111, 97, 95, 94, 92, 90, 91, 92,
+ 92, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 106, 107,
+ 108, 108, 109, 109, 110, 111, 111, 111, 111, 97, 95, 94, 92, 90, 91, 92,
+ 92, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 106, 107,
+ 108, 108, 109, 109, 110, 111, 111, 111, 111, 97, 95, 94, 92, 90, 91, 92,
+ 92, 93, 94, 95, 96, 98, 99, 100, 101, 102, 103, 104, 105, 106, 106, 107,
+ 108, 108, 109, 109, 110, 111, 111, 111, 111 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 42, 56, 58, 65, 56, 61, 63, 67, 58, 63, 70, 74, 65, 67, 74, 78,
+ /* Size 8 */
+ 45, 39, 55, 57, 59, 62, 66, 69, 39, 49, 56, 53, 54, 57, 60, 64, 55, 56,
+ 61, 60, 60, 61, 63, 66, 57, 53, 60, 63, 65, 66, 68, 70, 59, 54, 60, 65,
+ 68, 70, 71, 73, 62, 57, 61, 66, 70, 72, 74, 76, 66, 60, 63, 68, 71, 74,
+ 76, 78, 69, 64, 66, 70, 73, 76, 78, 80,
+ /* Size 16 */
+ 44, 41, 38, 45, 55, 56, 56, 57, 58, 60, 62, 63, 65, 67, 69, 69, 41, 42,
+ 43, 48, 55, 55, 54, 55, 56, 57, 59, 60, 62, 64, 66, 66, 38, 43, 48, 52,
+ 55, 54, 52, 53, 53, 55, 56, 58, 59, 61, 63, 63, 45, 48, 52, 54, 57, 56,
+ 55, 56, 56, 57, 58, 60, 61, 63, 64, 64, 55, 55, 55, 57, 60, 59, 59, 59,
+ 59, 59, 60, 61, 63, 64, 66, 66, 56, 55, 54, 56, 59, 60, 61, 61, 61, 62,
+ 63, 64, 65, 66, 67, 67, 56, 54, 52, 55, 59, 61, 62, 63, 64, 64, 65, 66,
+ 67, 68, 69, 69, 57, 55, 53, 56, 59, 61, 63, 64, 65, 66, 67, 68, 69, 69,
+ 70, 70, 58, 56, 53, 56, 59, 61, 64, 65, 67, 68, 69, 70, 70, 71, 72, 72,
+ 60, 57, 55, 57, 59, 62, 64, 66, 68, 69, 70, 71, 72, 73, 73, 73, 62, 59,
+ 56, 58, 60, 63, 65, 67, 69, 70, 71, 72, 73, 74, 75, 75, 63, 60, 58, 60,
+ 61, 64, 66, 68, 70, 71, 72, 73, 74, 75, 76, 76, 65, 62, 59, 61, 63, 65,
+ 67, 69, 70, 72, 73, 74, 75, 76, 77, 77, 67, 64, 61, 63, 64, 66, 68, 69,
+ 71, 73, 74, 75, 76, 77, 78, 78, 69, 66, 63, 64, 66, 67, 69, 70, 72, 73,
+ 75, 76, 77, 78, 79, 79, 69, 66, 63, 64, 66, 67, 69, 70, 72, 73, 75, 76,
+ 77, 78, 79, 79,
+ /* Size 32 */
+ 44, 42, 41, 39, 38, 41, 45, 49, 54, 55, 55, 56, 56, 57, 57, 58, 58, 59,
+ 60, 60, 61, 62, 63, 64, 65, 66, 66, 67, 68, 68, 68, 68, 42, 42, 41, 41,
+ 40, 43, 46, 50, 55, 55, 55, 55, 55, 55, 56, 56, 57, 57, 58, 59, 60, 61,
+ 61, 62, 63, 64, 65, 66, 67, 67, 67, 67, 41, 41, 42, 42, 42, 45, 48, 51,
+ 55, 55, 54, 54, 54, 54, 55, 55, 55, 56, 57, 58, 58, 59, 60, 61, 62, 63,
+ 63, 64, 65, 65, 65, 65, 39, 41, 42, 43, 45, 47, 49, 52, 55, 54, 54, 53,
+ 53, 53, 54, 54, 54, 55, 55, 56, 57, 58, 59, 59, 60, 61, 62, 63, 64, 64,
+ 64, 64, 38, 40, 42, 45, 48, 49, 51, 53, 55, 54, 53, 53, 52, 52, 53, 53,
+ 53, 54, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 63, 63, 63, 41, 43,
+ 45, 47, 49, 51, 53, 54, 56, 55, 55, 54, 54, 54, 54, 54, 54, 55, 55, 56,
+ 57, 57, 58, 59, 60, 61, 62, 62, 63, 63, 63, 63, 45, 46, 48, 49, 51, 53,
+ 54, 56, 57, 57, 56, 56, 55, 55, 55, 55, 56, 56, 57, 57, 58, 58, 59, 60,
+ 61, 61, 62, 63, 64, 64, 64, 64, 49, 50, 51, 52, 53, 54, 56, 57, 58, 58,
+ 57, 57, 57, 57, 57, 57, 57, 57, 58, 58, 59, 59, 60, 61, 61, 62, 63, 64,
+ 64, 64, 64, 64, 54, 55, 55, 55, 55, 56, 57, 58, 59, 59, 59, 59, 58, 58,
+ 58, 58, 58, 59, 59, 59, 60, 60, 61, 62, 62, 63, 64, 64, 65, 65, 65, 65,
+ 55, 55, 55, 54, 54, 55, 57, 58, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60,
+ 60, 61, 61, 62, 62, 63, 63, 64, 65, 65, 66, 66, 66, 66, 55, 55, 54, 54,
+ 53, 55, 56, 57, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 63,
+ 63, 64, 64, 65, 65, 66, 67, 67, 67, 67, 56, 55, 54, 53, 53, 54, 56, 57,
+ 59, 59, 60, 60, 61, 61, 62, 62, 62, 62, 63, 63, 63, 64, 64, 65, 65, 66,
+ 66, 67, 67, 67, 67, 67, 56, 55, 54, 53, 52, 54, 55, 57, 58, 59, 60, 61,
+ 62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 66, 66, 67, 67, 68, 68, 68,
+ 68, 68, 57, 55, 54, 53, 52, 54, 55, 57, 58, 59, 60, 61, 62, 63, 63, 64,
+ 64, 64, 65, 65, 66, 66, 66, 67, 67, 68, 68, 69, 69, 69, 69, 69, 57, 56,
+ 55, 54, 53, 54, 55, 57, 58, 59, 60, 62, 63, 63, 64, 64, 65, 65, 66, 66,
+ 66, 67, 67, 68, 68, 69, 69, 69, 70, 70, 70, 70, 58, 56, 55, 54, 53, 54,
+ 55, 57, 58, 59, 61, 62, 63, 64, 64, 65, 66, 66, 67, 67, 67, 68, 68, 69,
+ 69, 69, 70, 70, 71, 71, 71, 71, 58, 57, 55, 54, 53, 54, 56, 57, 58, 60,
+ 61, 62, 63, 64, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70, 71, 71,
+ 72, 72, 72, 72, 59, 57, 56, 55, 54, 55, 56, 57, 59, 60, 61, 62, 64, 64,
+ 65, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 72, 72,
+ 60, 58, 57, 55, 54, 55, 57, 58, 59, 60, 61, 63, 64, 65, 66, 67, 67, 68,
+ 69, 69, 70, 70, 71, 71, 71, 72, 72, 73, 73, 73, 73, 73, 60, 59, 58, 56,
+ 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 68, 69, 70, 70, 71,
+ 71, 72, 72, 72, 73, 73, 74, 74, 74, 74, 61, 60, 58, 57, 56, 57, 58, 59,
+ 60, 61, 62, 63, 65, 66, 66, 67, 68, 69, 70, 70, 71, 71, 72, 72, 73, 73,
+ 74, 74, 74, 74, 74, 74, 62, 61, 59, 58, 56, 57, 58, 59, 60, 62, 63, 64,
+ 65, 66, 67, 68, 69, 69, 70, 71, 71, 72, 72, 73, 73, 74, 74, 74, 75, 75,
+ 75, 75, 63, 61, 60, 59, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
+ 69, 70, 71, 71, 72, 72, 73, 73, 74, 74, 75, 75, 75, 75, 75, 75, 64, 62,
+ 61, 59, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 70, 71, 72,
+ 72, 73, 73, 74, 74, 75, 75, 76, 76, 76, 76, 76, 65, 63, 62, 60, 59, 60,
+ 61, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 71, 72, 73, 73, 74, 74,
+ 75, 75, 76, 76, 76, 76, 76, 76, 66, 64, 63, 61, 60, 61, 61, 62, 63, 64,
+ 65, 66, 67, 68, 69, 69, 70, 71, 72, 72, 73, 74, 74, 75, 75, 76, 76, 76,
+ 77, 77, 77, 77, 66, 65, 63, 62, 61, 62, 62, 63, 64, 65, 65, 66, 67, 68,
+ 69, 70, 71, 71, 72, 73, 74, 74, 75, 75, 76, 76, 76, 77, 77, 77, 77, 77,
+ 67, 66, 64, 63, 62, 62, 63, 64, 64, 65, 66, 67, 68, 69, 69, 70, 71, 72,
+ 73, 73, 74, 74, 75, 76, 76, 76, 77, 77, 78, 78, 78, 78, 68, 67, 65, 64,
+ 63, 63, 64, 64, 65, 66, 67, 67, 68, 69, 70, 71, 72, 72, 73, 74, 74, 75,
+ 75, 76, 76, 77, 77, 78, 78, 78, 78, 78, 68, 67, 65, 64, 63, 63, 64, 64,
+ 65, 66, 67, 67, 68, 69, 70, 71, 72, 72, 73, 74, 74, 75, 75, 76, 76, 77,
+ 77, 78, 78, 78, 78, 78, 68, 67, 65, 64, 63, 63, 64, 64, 65, 66, 67, 67,
+ 68, 69, 70, 71, 72, 72, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 78,
+ 78, 78, 68, 67, 65, 64, 63, 63, 64, 64, 65, 66, 67, 67, 68, 69, 70, 71,
+ 72, 72, 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 78, 78, 78 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 67, 84, 97, 67, 81, 91, 99, 84, 91, 100, 105, 97, 99, 105, 108,
+ /* Size 8 */
+ 64, 55, 58, 66, 75, 82, 88, 92, 55, 59, 58, 63, 70, 78, 84, 89, 58, 58,
+ 68, 73, 78, 82, 87, 91, 66, 63, 73, 80, 84, 87, 90, 93, 75, 70, 78, 84,
+ 88, 91, 93, 95, 82, 78, 82, 87, 91, 94, 96, 97, 88, 84, 87, 90, 93, 96,
+ 97, 98, 92, 89, 91, 93, 95, 97, 98, 99,
+ /* Size 16 */
+ 64, 59, 55, 56, 58, 62, 66, 70, 75, 78, 82, 85, 88, 90, 92, 92, 59, 58,
+ 57, 58, 58, 61, 65, 68, 73, 76, 80, 83, 86, 88, 90, 90, 55, 57, 59, 59,
+ 58, 60, 63, 67, 70, 74, 78, 81, 84, 86, 89, 89, 56, 58, 59, 61, 63, 65,
+ 68, 71, 74, 77, 80, 83, 85, 87, 90, 90, 58, 58, 58, 63, 68, 71, 73, 75,
+ 78, 80, 82, 85, 87, 89, 91, 91, 62, 61, 60, 65, 71, 73, 76, 78, 81, 83,
+ 85, 87, 89, 90, 92, 92, 66, 65, 63, 68, 73, 76, 80, 82, 84, 86, 87, 89,
+ 90, 92, 93, 93, 70, 68, 67, 71, 75, 78, 82, 84, 86, 88, 89, 91, 92, 93,
+ 94, 94, 75, 73, 70, 74, 78, 81, 84, 86, 88, 90, 91, 92, 93, 94, 95, 95,
+ 78, 76, 74, 77, 80, 83, 86, 88, 90, 91, 92, 93, 94, 95, 96, 96, 82, 80,
+ 78, 80, 82, 85, 87, 89, 91, 92, 94, 95, 96, 96, 97, 97, 85, 83, 81, 83,
+ 85, 87, 89, 91, 92, 93, 95, 96, 96, 97, 98, 98, 88, 86, 84, 85, 87, 89,
+ 90, 92, 93, 94, 96, 96, 97, 98, 98, 98, 90, 88, 86, 87, 89, 90, 92, 93,
+ 94, 95, 96, 97, 98, 98, 99, 99, 92, 90, 89, 90, 91, 92, 93, 94, 95, 96,
+ 97, 98, 98, 99, 99, 99, 92, 90, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
+ 98, 99, 99, 99,
+ /* Size 32 */
+ 64, 62, 59, 57, 55, 56, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 75, 77,
+ 78, 80, 82, 84, 85, 86, 88, 89, 90, 91, 92, 92, 92, 92, 62, 60, 59, 58,
+ 56, 57, 57, 57, 58, 60, 61, 63, 65, 67, 69, 71, 74, 75, 77, 79, 81, 82,
+ 84, 85, 87, 88, 89, 90, 91, 91, 91, 91, 59, 59, 58, 58, 57, 57, 58, 58,
+ 58, 59, 61, 63, 65, 66, 68, 70, 73, 74, 76, 78, 80, 81, 83, 84, 86, 87,
+ 88, 89, 90, 90, 90, 90, 57, 58, 58, 58, 58, 58, 58, 58, 58, 59, 61, 62,
+ 64, 66, 67, 69, 72, 73, 75, 77, 79, 80, 82, 83, 85, 86, 87, 88, 90, 90,
+ 90, 90, 55, 56, 57, 58, 59, 59, 59, 58, 58, 59, 60, 62, 63, 65, 67, 68,
+ 70, 72, 74, 76, 78, 79, 81, 82, 84, 85, 86, 87, 89, 89, 89, 89, 56, 57,
+ 57, 58, 59, 59, 60, 60, 60, 62, 63, 64, 65, 67, 69, 70, 72, 74, 75, 77,
+ 79, 80, 82, 83, 85, 86, 87, 88, 89, 89, 89, 89, 56, 57, 58, 58, 59, 60,
+ 61, 62, 63, 64, 65, 66, 68, 69, 71, 72, 74, 75, 77, 78, 80, 81, 83, 84,
+ 85, 86, 87, 89, 90, 90, 90, 90, 57, 57, 58, 58, 58, 60, 62, 64, 66, 67,
+ 68, 69, 70, 72, 73, 74, 76, 77, 78, 80, 81, 82, 84, 85, 86, 87, 88, 89,
+ 90, 90, 90, 90, 58, 58, 58, 58, 58, 60, 63, 66, 68, 70, 71, 72, 73, 74,
+ 75, 76, 78, 79, 80, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 91, 91, 91,
+ 60, 60, 59, 59, 59, 62, 64, 67, 70, 71, 72, 73, 75, 76, 77, 78, 79, 80,
+ 81, 82, 84, 85, 86, 87, 88, 89, 89, 90, 91, 91, 91, 91, 62, 61, 61, 61,
+ 60, 63, 65, 68, 71, 72, 73, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86,
+ 87, 88, 89, 89, 90, 91, 92, 92, 92, 92, 64, 63, 63, 62, 62, 64, 66, 69,
+ 72, 73, 75, 76, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 89, 90,
+ 91, 92, 92, 92, 92, 92, 66, 65, 65, 64, 63, 65, 68, 70, 73, 75, 76, 78,
+ 80, 81, 82, 83, 84, 85, 86, 87, 87, 88, 89, 90, 90, 91, 92, 92, 93, 93,
+ 93, 93, 68, 67, 66, 66, 65, 67, 69, 72, 74, 76, 77, 79, 81, 82, 83, 84,
+ 85, 86, 87, 87, 88, 89, 90, 90, 91, 92, 92, 93, 94, 94, 94, 94, 70, 69,
+ 68, 67, 67, 69, 71, 73, 75, 77, 78, 80, 82, 83, 84, 85, 86, 87, 88, 88,
+ 89, 90, 91, 91, 92, 92, 93, 94, 94, 94, 94, 94, 72, 71, 70, 69, 68, 70,
+ 72, 74, 76, 78, 80, 81, 83, 84, 85, 86, 87, 88, 89, 89, 90, 91, 91, 92,
+ 93, 93, 94, 94, 95, 95, 95, 95, 75, 74, 73, 72, 70, 72, 74, 76, 78, 79,
+ 81, 82, 84, 85, 86, 87, 88, 89, 90, 90, 91, 92, 92, 93, 93, 94, 94, 95,
+ 95, 95, 95, 95, 77, 75, 74, 73, 72, 74, 75, 77, 79, 80, 82, 83, 85, 86,
+ 87, 88, 89, 90, 90, 91, 92, 92, 93, 93, 94, 94, 95, 95, 96, 96, 96, 96,
+ 78, 77, 76, 75, 74, 75, 77, 78, 80, 81, 83, 84, 86, 87, 88, 89, 90, 90,
+ 91, 92, 92, 93, 93, 94, 94, 95, 95, 96, 96, 96, 96, 96, 80, 79, 78, 77,
+ 76, 77, 78, 80, 81, 82, 84, 85, 87, 87, 88, 89, 90, 91, 92, 92, 93, 94,
+ 94, 95, 95, 95, 96, 96, 97, 97, 97, 97, 82, 81, 80, 79, 78, 79, 80, 81,
+ 82, 84, 85, 86, 87, 88, 89, 90, 91, 92, 92, 93, 94, 94, 95, 95, 96, 96,
+ 96, 97, 97, 97, 97, 97, 84, 82, 81, 80, 79, 80, 81, 82, 83, 85, 86, 87,
+ 88, 89, 90, 91, 92, 92, 93, 94, 94, 95, 95, 96, 96, 96, 97, 97, 97, 97,
+ 97, 97, 85, 84, 83, 82, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 91,
+ 92, 93, 93, 94, 95, 95, 96, 96, 96, 97, 97, 97, 98, 98, 98, 98, 86, 85,
+ 84, 83, 82, 83, 84, 85, 86, 87, 88, 89, 90, 90, 91, 92, 93, 93, 94, 95,
+ 95, 96, 96, 96, 97, 97, 97, 98, 98, 98, 98, 98, 88, 87, 86, 85, 84, 85,
+ 85, 86, 87, 88, 89, 89, 90, 91, 92, 93, 93, 94, 94, 95, 96, 96, 96, 97,
+ 97, 97, 98, 98, 98, 98, 98, 98, 89, 88, 87, 86, 85, 86, 86, 87, 88, 89,
+ 89, 90, 91, 92, 92, 93, 94, 94, 95, 95, 96, 96, 97, 97, 97, 98, 98, 98,
+ 99, 99, 99, 99, 90, 89, 88, 87, 86, 87, 87, 88, 89, 89, 90, 91, 92, 92,
+ 93, 94, 94, 95, 95, 96, 96, 97, 97, 97, 98, 98, 98, 99, 99, 99, 99, 99,
+ 91, 90, 89, 88, 87, 88, 89, 89, 90, 90, 91, 92, 92, 93, 94, 94, 95, 95,
+ 96, 96, 97, 97, 97, 98, 98, 98, 99, 99, 99, 99, 99, 99, 92, 91, 90, 90,
+ 89, 89, 90, 90, 91, 91, 92, 92, 93, 94, 94, 95, 95, 96, 96, 97, 97, 97,
+ 98, 98, 98, 99, 99, 99, 99, 99, 99, 99, 92, 91, 90, 90, 89, 89, 90, 90,
+ 91, 91, 92, 92, 93, 94, 94, 95, 95, 96, 96, 97, 97, 97, 98, 98, 98, 99,
+ 99, 99, 99, 99, 99, 99, 92, 91, 90, 90, 89, 89, 90, 90, 91, 91, 92, 92,
+ 93, 94, 94, 95, 95, 96, 96, 97, 97, 97, 98, 98, 98, 99, 99, 99, 99, 99,
+ 99, 99, 92, 91, 90, 90, 89, 89, 90, 90, 91, 91, 92, 92, 93, 94, 94, 95,
+ 95, 96, 96, 97, 97, 97, 98, 98, 98, 99, 99, 99, 99, 99, 99, 99 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 44, 46, 58, 68, 46, 56, 64, 70, 58, 64, 71, 74, 68, 70, 74, 77,
+ /* Size 8 */
+ 49, 42, 44, 50, 57, 63, 68, 71, 42, 45, 44, 48, 54, 60, 65, 69, 44, 44,
+ 52, 56, 60, 63, 67, 70, 50, 48, 56, 61, 65, 68, 70, 72, 57, 54, 60, 65,
+ 68, 71, 73, 74, 63, 60, 63, 68, 71, 73, 74, 76, 68, 65, 67, 70, 73, 74,
+ 76, 77, 71, 69, 70, 72, 74, 76, 77, 78,
+ /* Size 16 */
+ 48, 44, 41, 42, 43, 46, 49, 53, 56, 59, 62, 65, 67, 69, 70, 70, 44, 43,
+ 43, 43, 43, 45, 48, 51, 55, 57, 61, 63, 65, 67, 69, 69, 41, 43, 44, 44,
+ 43, 45, 47, 50, 53, 56, 59, 61, 64, 66, 68, 68, 42, 43, 44, 45, 47, 49,
+ 51, 53, 56, 58, 61, 63, 65, 67, 68, 68, 43, 43, 43, 47, 51, 53, 55, 57,
+ 59, 61, 63, 64, 66, 68, 69, 69, 46, 45, 45, 49, 53, 55, 58, 59, 61, 63,
+ 64, 66, 68, 69, 70, 70, 49, 48, 47, 51, 55, 58, 61, 62, 64, 65, 67, 68,
+ 69, 70, 71, 71, 53, 51, 50, 53, 57, 59, 62, 64, 66, 67, 68, 69, 70, 71,
+ 72, 72, 56, 55, 53, 56, 59, 61, 64, 66, 67, 69, 70, 71, 72, 72, 73, 73,
+ 59, 57, 56, 58, 61, 63, 65, 67, 69, 70, 71, 72, 72, 73, 74, 74, 62, 61,
+ 59, 61, 63, 64, 67, 68, 70, 71, 72, 73, 73, 74, 75, 75, 65, 63, 61, 63,
+ 64, 66, 68, 69, 71, 72, 73, 73, 74, 75, 75, 75, 67, 65, 64, 65, 66, 68,
+ 69, 70, 72, 72, 73, 74, 75, 75, 76, 76, 69, 67, 66, 67, 68, 69, 70, 71,
+ 72, 73, 74, 75, 75, 76, 76, 76, 70, 69, 68, 68, 69, 70, 71, 72, 73, 74,
+ 75, 75, 76, 76, 76, 76, 70, 69, 68, 68, 69, 70, 71, 72, 73, 74, 75, 75,
+ 76, 76, 76, 76,
+ /* Size 32 */
+ 47, 46, 44, 42, 41, 41, 42, 42, 43, 44, 46, 47, 49, 51, 52, 54, 56, 57,
+ 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 70, 70, 70, 46, 44, 43, 42,
+ 41, 42, 42, 42, 43, 44, 45, 47, 48, 50, 52, 53, 55, 56, 58, 59, 61, 62,
+ 63, 64, 66, 66, 67, 68, 69, 69, 69, 69, 44, 43, 43, 43, 42, 42, 42, 43,
+ 43, 44, 45, 46, 48, 49, 51, 52, 54, 56, 57, 58, 60, 61, 62, 64, 65, 66,
+ 67, 68, 68, 68, 68, 68, 42, 42, 43, 43, 43, 43, 43, 43, 43, 44, 45, 46,
+ 47, 49, 50, 52, 53, 55, 56, 58, 59, 60, 61, 63, 64, 65, 66, 67, 68, 68,
+ 68, 68, 41, 41, 42, 43, 44, 44, 43, 43, 43, 44, 45, 46, 47, 48, 49, 51,
+ 53, 54, 55, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 67, 67, 67, 41, 42,
+ 42, 43, 44, 44, 44, 44, 45, 46, 46, 47, 48, 50, 51, 52, 54, 55, 56, 58,
+ 59, 60, 61, 63, 64, 65, 66, 67, 68, 68, 68, 68, 42, 42, 42, 43, 43, 44,
+ 45, 46, 47, 47, 48, 49, 50, 51, 53, 54, 55, 56, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 68, 68, 68, 42, 42, 43, 43, 43, 44, 46, 47, 49, 50,
+ 50, 51, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 67,
+ 68, 68, 68, 68, 43, 43, 43, 43, 43, 45, 47, 49, 51, 52, 53, 54, 55, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 66, 67, 68, 69, 69, 69, 69,
+ 44, 44, 44, 44, 44, 46, 47, 50, 52, 53, 54, 55, 56, 57, 58, 58, 59, 60,
+ 61, 62, 63, 64, 65, 65, 66, 67, 68, 68, 69, 69, 69, 69, 46, 45, 45, 45,
+ 45, 46, 48, 50, 53, 54, 55, 56, 57, 58, 59, 60, 61, 61, 62, 63, 64, 65,
+ 65, 66, 67, 68, 68, 69, 70, 70, 70, 70, 47, 47, 46, 46, 46, 47, 49, 51,
+ 54, 55, 56, 57, 59, 59, 60, 61, 62, 63, 63, 64, 65, 66, 66, 67, 68, 68,
+ 69, 70, 70, 70, 70, 70, 49, 48, 48, 47, 47, 48, 50, 52, 55, 56, 57, 59,
+ 60, 61, 62, 63, 63, 64, 65, 65, 66, 67, 67, 68, 69, 69, 70, 70, 71, 71,
+ 71, 71, 51, 50, 49, 49, 48, 50, 51, 53, 55, 57, 58, 59, 61, 62, 63, 63,
+ 64, 65, 65, 66, 67, 67, 68, 69, 69, 70, 70, 71, 71, 71, 71, 71, 52, 52,
+ 51, 50, 49, 51, 53, 54, 56, 58, 59, 60, 62, 63, 63, 64, 65, 66, 66, 67,
+ 68, 68, 69, 69, 70, 70, 71, 71, 72, 72, 72, 72, 54, 53, 52, 52, 51, 52,
+ 54, 56, 57, 58, 60, 61, 63, 63, 64, 65, 66, 67, 67, 68, 68, 69, 69, 70,
+ 70, 71, 71, 72, 72, 72, 72, 72, 56, 55, 54, 53, 53, 54, 55, 57, 58, 59,
+ 61, 62, 63, 64, 65, 66, 67, 67, 68, 69, 69, 70, 70, 71, 71, 71, 72, 72,
+ 72, 72, 72, 72, 57, 56, 56, 55, 54, 55, 56, 58, 59, 60, 61, 63, 64, 65,
+ 66, 67, 67, 68, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73,
+ 59, 58, 57, 56, 55, 56, 58, 59, 60, 61, 62, 63, 65, 65, 66, 67, 68, 69,
+ 69, 70, 70, 71, 71, 71, 72, 72, 73, 73, 73, 73, 73, 73, 60, 59, 58, 58,
+ 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 69, 70, 70, 71, 71,
+ 72, 72, 72, 73, 73, 73, 74, 74, 74, 74, 62, 61, 60, 59, 58, 59, 60, 61,
+ 62, 63, 64, 65, 66, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73,
+ 73, 74, 74, 74, 74, 74, 63, 62, 61, 60, 59, 60, 61, 62, 63, 64, 65, 66,
+ 67, 67, 68, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 73, 74, 74, 74, 74,
+ 74, 74, 64, 63, 62, 61, 61, 61, 62, 63, 64, 65, 65, 66, 67, 68, 69, 69,
+ 70, 71, 71, 72, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75, 65, 64,
+ 64, 63, 62, 63, 63, 64, 65, 65, 66, 67, 68, 69, 69, 70, 71, 71, 71, 72,
+ 72, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 66, 66, 65, 64, 63, 64,
+ 64, 65, 66, 66, 67, 68, 69, 69, 70, 70, 71, 71, 72, 72, 73, 73, 73, 74,
+ 74, 74, 75, 75, 75, 75, 75, 75, 67, 66, 66, 65, 64, 65, 65, 66, 66, 67,
+ 68, 68, 69, 70, 70, 71, 71, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75, 75,
+ 75, 75, 75, 75, 68, 67, 67, 66, 65, 66, 66, 67, 67, 68, 68, 69, 70, 70,
+ 71, 71, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75,
+ 69, 68, 68, 67, 66, 67, 67, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 72,
+ 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 76, 76, 76, 76, 70, 69, 68, 68,
+ 67, 68, 68, 68, 69, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 74, 74, 74,
+ 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 70, 69, 68, 68, 67, 68, 68, 68,
+ 69, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 74, 74, 74, 75, 75, 75, 75,
+ 75, 76, 76, 76, 76, 76, 70, 69, 68, 68, 67, 68, 68, 68, 69, 69, 70, 70,
+ 71, 71, 72, 72, 72, 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 76, 76, 76,
+ 76, 76, 70, 69, 68, 68, 67, 68, 68, 68, 69, 69, 70, 70, 71, 71, 72, 72,
+ 72, 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 80, 83, 90, 80, 86, 88, 92, 83, 88, 96, 99, 90, 92, 99, 104,
+ /* Size 8 */
+ 64, 57, 77, 78, 80, 84, 88, 91, 57, 69, 77, 74, 75, 78, 82, 86, 77, 77,
+ 82, 81, 81, 82, 85, 88, 78, 74, 81, 85, 86, 87, 89, 91, 80, 75, 81, 86,
+ 89, 91, 93, 95, 84, 78, 82, 87, 91, 94, 96, 97, 88, 82, 85, 89, 93, 96,
+ 98, 99, 91, 86, 88, 91, 95, 97, 99, 101,
+ /* Size 16 */
+ 64, 60, 57, 65, 77, 77, 78, 79, 80, 82, 84, 86, 88, 89, 91, 91, 60, 61,
+ 62, 69, 77, 76, 76, 77, 78, 79, 81, 83, 85, 86, 88, 88, 57, 62, 69, 73,
+ 77, 75, 74, 74, 75, 76, 78, 80, 82, 84, 86, 86, 65, 69, 73, 76, 80, 78,
+ 77, 78, 78, 79, 80, 82, 83, 85, 87, 87, 77, 77, 77, 80, 82, 82, 81, 81,
+ 81, 82, 82, 84, 85, 87, 88, 88, 77, 76, 75, 78, 82, 82, 83, 83, 83, 84,
+ 85, 86, 87, 88, 90, 90, 78, 76, 74, 77, 81, 83, 85, 85, 86, 87, 87, 88,
+ 89, 90, 91, 91, 79, 77, 74, 78, 81, 83, 85, 87, 88, 89, 89, 90, 91, 92,
+ 93, 93, 80, 78, 75, 78, 81, 83, 86, 88, 89, 90, 91, 92, 93, 94, 95, 95,
+ 82, 79, 76, 79, 82, 84, 87, 89, 90, 92, 93, 94, 94, 95, 96, 96, 84, 81,
+ 78, 80, 82, 85, 87, 89, 91, 93, 94, 95, 96, 97, 97, 97, 86, 83, 80, 82,
+ 84, 86, 88, 90, 92, 94, 95, 96, 97, 98, 98, 98, 88, 85, 82, 83, 85, 87,
+ 89, 91, 93, 94, 96, 97, 98, 99, 99, 99, 89, 86, 84, 85, 87, 88, 90, 92,
+ 94, 95, 97, 98, 99, 99, 100, 100, 91, 88, 86, 87, 88, 90, 91, 93, 95,
+ 96, 97, 98, 99, 100, 101, 101, 91, 88, 86, 87, 88, 90, 91, 93, 95, 96,
+ 97, 98, 99, 100, 101, 101,
+ /* Size 32 */
+ 64, 62, 60, 59, 57, 61, 65, 70, 77, 77, 77, 78, 78, 79, 79, 80, 80, 81,
+ 82, 83, 84, 85, 86, 87, 88, 88, 89, 90, 91, 91, 91, 91, 62, 61, 61, 60,
+ 60, 63, 67, 72, 77, 77, 77, 77, 77, 78, 78, 79, 79, 80, 81, 81, 82, 83,
+ 84, 85, 86, 87, 88, 89, 90, 90, 90, 90, 60, 61, 61, 62, 62, 65, 69, 73,
+ 77, 77, 76, 76, 76, 76, 77, 77, 78, 78, 79, 80, 81, 82, 83, 84, 85, 85,
+ 86, 87, 88, 88, 88, 88, 59, 60, 62, 64, 66, 68, 71, 74, 77, 76, 76, 75,
+ 75, 75, 76, 76, 76, 77, 78, 79, 79, 80, 81, 82, 83, 84, 85, 86, 87, 87,
+ 87, 87, 57, 60, 62, 66, 69, 71, 73, 75, 77, 76, 75, 75, 74, 74, 74, 75,
+ 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 86, 86, 86, 61, 63,
+ 65, 68, 71, 73, 74, 76, 78, 78, 77, 76, 76, 76, 76, 76, 76, 77, 78, 78,
+ 79, 80, 81, 82, 82, 83, 84, 85, 86, 86, 86, 86, 65, 67, 69, 71, 73, 74,
+ 76, 78, 80, 79, 78, 78, 77, 77, 78, 78, 78, 78, 79, 80, 80, 81, 82, 82,
+ 83, 84, 85, 86, 87, 87, 87, 87, 70, 72, 73, 74, 75, 76, 78, 79, 81, 80,
+ 80, 80, 79, 79, 79, 79, 79, 80, 80, 81, 81, 82, 83, 83, 84, 85, 86, 87,
+ 87, 87, 87, 87, 77, 77, 77, 77, 77, 78, 80, 81, 82, 82, 82, 81, 81, 81,
+ 81, 81, 81, 81, 82, 82, 82, 83, 84, 84, 85, 86, 87, 87, 88, 88, 88, 88,
+ 77, 77, 77, 76, 76, 78, 79, 80, 82, 82, 82, 82, 82, 82, 82, 82, 82, 83,
+ 83, 83, 84, 84, 85, 85, 86, 87, 87, 88, 89, 89, 89, 89, 77, 77, 76, 76,
+ 75, 77, 78, 80, 82, 82, 82, 83, 83, 83, 83, 83, 83, 84, 84, 85, 85, 85,
+ 86, 87, 87, 88, 88, 89, 90, 90, 90, 90, 78, 77, 76, 75, 75, 76, 78, 80,
+ 81, 82, 83, 83, 84, 84, 84, 85, 85, 85, 86, 86, 86, 87, 87, 88, 88, 89,
+ 89, 90, 90, 90, 90, 90, 78, 77, 76, 75, 74, 76, 77, 79, 81, 82, 83, 84,
+ 85, 85, 85, 86, 86, 87, 87, 87, 87, 88, 88, 89, 89, 90, 90, 91, 91, 91,
+ 91, 91, 79, 78, 76, 75, 74, 76, 77, 79, 81, 82, 83, 84, 85, 86, 86, 87,
+ 87, 87, 88, 88, 88, 89, 89, 90, 90, 91, 91, 92, 92, 92, 92, 92, 79, 78,
+ 77, 76, 74, 76, 78, 79, 81, 82, 83, 84, 85, 86, 87, 87, 88, 88, 89, 89,
+ 89, 90, 90, 91, 91, 92, 92, 92, 93, 93, 93, 93, 80, 79, 77, 76, 75, 76,
+ 78, 79, 81, 82, 83, 85, 86, 87, 87, 88, 89, 89, 89, 90, 90, 91, 91, 92,
+ 92, 92, 93, 93, 94, 94, 94, 94, 80, 79, 78, 76, 75, 76, 78, 79, 81, 82,
+ 83, 85, 86, 87, 88, 89, 89, 90, 90, 91, 91, 92, 92, 93, 93, 93, 94, 94,
+ 95, 95, 95, 95, 81, 80, 78, 77, 76, 77, 78, 80, 81, 83, 84, 85, 87, 87,
+ 88, 89, 90, 90, 91, 91, 92, 92, 93, 93, 94, 94, 94, 95, 95, 95, 95, 95,
+ 82, 81, 79, 78, 76, 78, 79, 80, 82, 83, 84, 86, 87, 88, 89, 89, 90, 91,
+ 92, 92, 93, 93, 94, 94, 94, 95, 95, 96, 96, 96, 96, 96, 83, 81, 80, 79,
+ 77, 78, 80, 81, 82, 83, 85, 86, 87, 88, 89, 90, 91, 91, 92, 93, 93, 94,
+ 94, 95, 95, 95, 96, 96, 97, 97, 97, 97, 84, 82, 81, 79, 78, 79, 80, 81,
+ 82, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 93, 94, 94, 95, 95, 96, 96,
+ 97, 97, 97, 97, 97, 97, 85, 83, 82, 80, 79, 80, 81, 82, 83, 84, 85, 87,
+ 88, 89, 90, 91, 92, 92, 93, 94, 94, 95, 95, 96, 96, 97, 97, 97, 98, 98,
+ 98, 98, 86, 84, 83, 81, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
+ 92, 93, 94, 94, 95, 95, 96, 96, 97, 97, 98, 98, 98, 98, 98, 98, 87, 85,
+ 84, 82, 81, 82, 82, 83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 93, 94, 95,
+ 95, 96, 96, 97, 97, 98, 98, 98, 99, 99, 99, 99, 88, 86, 85, 83, 82, 82,
+ 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 96, 97, 97,
+ 98, 98, 99, 99, 99, 99, 99, 99, 88, 87, 85, 84, 83, 83, 84, 85, 86, 87,
+ 88, 89, 90, 91, 92, 92, 93, 94, 95, 95, 96, 97, 97, 98, 98, 99, 99, 99,
+ 100, 100, 100, 100, 89, 88, 86, 85, 84, 84, 85, 86, 87, 87, 88, 89, 90,
+ 91, 92, 93, 94, 94, 95, 96, 97, 97, 98, 98, 99, 99, 99, 100, 100, 100,
+ 100, 100, 90, 89, 87, 86, 85, 85, 86, 87, 87, 88, 89, 90, 91, 92, 92,
+ 93, 94, 95, 96, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 101,
+ 101, 91, 90, 88, 87, 86, 86, 87, 87, 88, 89, 90, 90, 91, 92, 93, 94, 95,
+ 95, 96, 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 101, 101, 101, 91,
+ 90, 88, 87, 86, 86, 87, 87, 88, 89, 90, 90, 91, 92, 93, 94, 95, 95, 96,
+ 97, 97, 98, 98, 99, 99, 100, 100, 101, 101, 101, 101, 101, 91, 90, 88,
+ 87, 86, 86, 87, 87, 88, 89, 90, 90, 91, 92, 93, 94, 95, 95, 96, 97, 97,
+ 98, 98, 99, 99, 100, 100, 101, 101, 101, 101, 101, 91, 90, 88, 87, 86,
+ 86, 87, 87, 88, 89, 90, 90, 91, 92, 93, 94, 95, 95, 96, 97, 97, 98, 98,
+ 99, 99, 100, 100, 101, 101, 101, 101, 101 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 45, 57, 59, 65, 57, 62, 63, 67, 59, 63, 69, 72, 65, 67, 72, 76,
+ /* Size 8 */
+ 47, 42, 57, 59, 60, 63, 66, 69, 42, 51, 57, 55, 56, 58, 61, 64, 57, 57,
+ 61, 61, 61, 62, 64, 66, 59, 55, 61, 64, 65, 66, 67, 69, 60, 56, 61, 65,
+ 67, 69, 70, 71, 63, 58, 62, 66, 69, 71, 72, 74, 66, 61, 64, 67, 70, 72,
+ 74, 75, 69, 64, 66, 69, 71, 74, 75, 77,
+ /* Size 16 */
+ 47, 44, 41, 48, 56, 57, 58, 59, 60, 61, 62, 64, 65, 67, 68, 68, 44, 45,
+ 46, 50, 57, 56, 56, 57, 57, 58, 60, 61, 63, 64, 66, 66, 41, 46, 51, 54,
+ 57, 56, 54, 55, 55, 56, 57, 59, 60, 62, 63, 63, 48, 50, 54, 56, 59, 58,
+ 57, 57, 57, 58, 59, 60, 62, 63, 64, 64, 56, 57, 57, 59, 61, 60, 60, 60,
+ 60, 60, 61, 62, 63, 64, 65, 65, 57, 56, 56, 58, 60, 61, 61, 62, 62, 62,
+ 63, 64, 65, 66, 67, 67, 58, 56, 54, 57, 60, 61, 63, 63, 64, 65, 65, 66,
+ 66, 67, 68, 68, 59, 57, 55, 57, 60, 62, 63, 64, 65, 66, 67, 67, 68, 69,
+ 69, 69, 60, 57, 55, 57, 60, 62, 64, 65, 67, 67, 68, 69, 69, 70, 71, 71,
+ 61, 58, 56, 58, 60, 62, 65, 66, 67, 68, 69, 70, 71, 71, 72, 72, 62, 60,
+ 57, 59, 61, 63, 65, 67, 68, 69, 70, 71, 72, 72, 73, 73, 64, 61, 59, 60,
+ 62, 64, 66, 67, 69, 70, 71, 72, 73, 73, 74, 74, 65, 63, 60, 62, 63, 65,
+ 66, 68, 69, 71, 72, 73, 73, 74, 75, 75, 67, 64, 62, 63, 64, 66, 67, 69,
+ 70, 71, 72, 73, 74, 75, 75, 75, 68, 66, 63, 64, 65, 67, 68, 69, 71, 72,
+ 73, 74, 75, 75, 76, 76, 68, 66, 63, 64, 65, 67, 68, 69, 71, 72, 73, 74,
+ 75, 75, 76, 76,
+ /* Size 32 */
+ 46, 45, 44, 42, 41, 44, 47, 51, 56, 56, 57, 57, 58, 58, 58, 59, 59, 60,
+ 61, 61, 62, 63, 63, 64, 65, 65, 66, 67, 68, 68, 68, 68, 45, 45, 44, 44,
+ 43, 46, 49, 52, 56, 56, 56, 57, 57, 57, 57, 58, 58, 59, 59, 60, 61, 61,
+ 62, 63, 64, 64, 65, 66, 66, 66, 66, 66, 44, 44, 44, 45, 45, 48, 50, 53,
+ 56, 56, 56, 56, 56, 56, 56, 57, 57, 58, 58, 59, 59, 60, 61, 62, 62, 63,
+ 64, 65, 65, 65, 65, 65, 42, 44, 45, 46, 48, 50, 52, 54, 56, 56, 56, 55,
+ 55, 55, 55, 56, 56, 56, 57, 58, 58, 59, 60, 60, 61, 62, 63, 63, 64, 64,
+ 64, 64, 41, 43, 45, 48, 50, 52, 53, 55, 57, 56, 55, 55, 54, 54, 54, 55,
+ 55, 55, 56, 57, 57, 58, 59, 59, 60, 61, 62, 62, 63, 63, 63, 63, 44, 46,
+ 48, 50, 52, 53, 54, 56, 57, 57, 56, 56, 55, 56, 56, 56, 56, 56, 57, 57,
+ 58, 59, 59, 60, 61, 61, 62, 63, 64, 64, 64, 64, 47, 49, 50, 52, 53, 54,
+ 56, 57, 58, 58, 58, 57, 57, 57, 57, 57, 57, 58, 58, 58, 59, 60, 60, 61,
+ 61, 62, 63, 63, 64, 64, 64, 64, 51, 52, 53, 54, 55, 56, 57, 58, 59, 59,
+ 59, 58, 58, 58, 58, 58, 58, 59, 59, 59, 60, 60, 61, 62, 62, 63, 63, 64,
+ 65, 65, 65, 65, 56, 56, 56, 56, 57, 57, 58, 59, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 61, 61, 62, 62, 63, 63, 64, 65, 65, 65, 65, 65,
+ 56, 56, 56, 56, 56, 57, 58, 59, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61,
+ 61, 61, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 66, 66, 57, 56, 56, 56,
+ 55, 56, 58, 59, 60, 60, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63,
+ 64, 64, 64, 65, 65, 66, 66, 66, 66, 66, 57, 57, 56, 55, 55, 56, 57, 58,
+ 60, 60, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 65, 65, 66,
+ 66, 67, 67, 67, 67, 67, 58, 57, 56, 55, 54, 55, 57, 58, 60, 60, 61, 62,
+ 63, 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 68, 68,
+ 68, 68, 58, 57, 56, 55, 54, 56, 57, 58, 60, 60, 61, 62, 63, 63, 64, 64,
+ 64, 65, 65, 65, 65, 66, 66, 66, 67, 67, 68, 68, 68, 68, 68, 68, 58, 57,
+ 56, 55, 54, 56, 57, 58, 60, 60, 61, 62, 63, 64, 64, 64, 65, 65, 66, 66,
+ 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 59, 58, 57, 56, 55, 56,
+ 57, 58, 60, 60, 61, 62, 63, 64, 64, 65, 66, 66, 66, 67, 67, 67, 68, 68,
+ 68, 69, 69, 69, 70, 70, 70, 70, 59, 58, 57, 56, 55, 56, 57, 58, 60, 61,
+ 62, 63, 64, 64, 65, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 70, 70,
+ 70, 70, 70, 70, 60, 59, 58, 56, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65,
+ 65, 66, 67, 67, 67, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 71, 71,
+ 61, 59, 58, 57, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 66, 67, 67,
+ 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 71, 71, 71, 61, 60, 59, 58,
+ 57, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 67, 68, 68, 69, 69, 70,
+ 70, 70, 71, 71, 71, 72, 72, 72, 72, 72, 62, 61, 59, 58, 57, 58, 59, 60,
+ 61, 62, 63, 64, 65, 65, 66, 67, 68, 68, 69, 69, 70, 70, 71, 71, 71, 72,
+ 72, 72, 72, 72, 72, 72, 63, 61, 60, 59, 58, 59, 60, 60, 61, 62, 63, 64,
+ 65, 66, 67, 67, 68, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73,
+ 73, 73, 63, 62, 61, 60, 59, 59, 60, 61, 62, 63, 64, 64, 65, 66, 67, 68,
+ 68, 69, 69, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 73, 73, 64, 63,
+ 62, 60, 59, 60, 61, 62, 62, 63, 64, 65, 66, 66, 67, 68, 69, 69, 70, 70,
+ 71, 71, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 65, 64, 62, 61, 60, 61,
+ 61, 62, 63, 64, 64, 65, 66, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 73,
+ 73, 73, 74, 74, 74, 74, 74, 74, 65, 64, 63, 62, 61, 61, 62, 63, 63, 64,
+ 65, 66, 66, 67, 68, 69, 69, 70, 70, 71, 72, 72, 72, 73, 73, 74, 74, 74,
+ 74, 74, 74, 74, 66, 65, 64, 63, 62, 62, 63, 63, 64, 65, 65, 66, 67, 68,
+ 68, 69, 70, 70, 71, 71, 72, 72, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75,
+ 67, 66, 65, 63, 62, 63, 63, 64, 65, 65, 66, 67, 67, 68, 69, 69, 70, 71,
+ 71, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 68, 66, 65, 64,
+ 63, 64, 64, 65, 65, 66, 66, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 73,
+ 73, 74, 74, 74, 75, 75, 75, 75, 75, 75, 68, 66, 65, 64, 63, 64, 64, 65,
+ 65, 66, 66, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 73, 73, 74, 74, 74,
+ 75, 75, 75, 75, 75, 75, 68, 66, 65, 64, 63, 64, 64, 65, 65, 66, 66, 67,
+ 68, 68, 69, 70, 70, 71, 71, 72, 72, 73, 73, 74, 74, 74, 75, 75, 75, 75,
+ 75, 75, 68, 66, 65, 64, 63, 64, 64, 65, 65, 66, 66, 67, 68, 68, 69, 70,
+ 70, 71, 71, 72, 72, 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 75 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 67, 80, 89, 67, 77, 85, 91, 80, 85, 92, 95, 89, 91, 95, 97,
+ /* Size 8 */
+ 64, 57, 59, 66, 73, 78, 82, 85, 57, 60, 59, 63, 69, 75, 80, 83, 59, 59,
+ 68, 71, 75, 78, 82, 84, 66, 63, 71, 77, 80, 82, 84, 86, 73, 69, 75, 80,
+ 83, 85, 86, 88, 78, 75, 78, 82, 85, 87, 88, 89, 82, 80, 82, 84, 86, 88,
+ 89, 90, 85, 83, 84, 86, 88, 89, 90, 91,
+ /* Size 16 */
+ 64, 60, 57, 58, 59, 62, 66, 69, 73, 75, 78, 80, 82, 84, 85, 85, 60, 59,
+ 58, 59, 59, 62, 64, 68, 71, 74, 77, 79, 81, 83, 84, 84, 57, 58, 60, 60,
+ 59, 61, 63, 66, 69, 72, 75, 77, 80, 81, 83, 83, 58, 59, 60, 61, 63, 65,
+ 67, 69, 72, 74, 77, 79, 81, 82, 84, 84, 59, 59, 59, 63, 68, 69, 71, 73,
+ 75, 77, 78, 80, 82, 83, 84, 84, 62, 62, 61, 65, 69, 72, 74, 75, 77, 79,
+ 80, 82, 83, 84, 85, 85, 66, 64, 63, 67, 71, 74, 77, 78, 80, 81, 82, 83,
+ 84, 85, 86, 86, 69, 68, 66, 69, 73, 75, 78, 80, 81, 82, 83, 84, 85, 86,
+ 87, 87, 73, 71, 69, 72, 75, 77, 80, 81, 83, 84, 85, 86, 86, 87, 88, 88,
+ 75, 74, 72, 74, 77, 79, 81, 82, 84, 85, 86, 86, 87, 88, 88, 88, 78, 77,
+ 75, 77, 78, 80, 82, 83, 85, 86, 87, 87, 88, 88, 89, 89, 80, 79, 77, 79,
+ 80, 82, 83, 84, 86, 86, 87, 88, 89, 89, 89, 89, 82, 81, 80, 81, 82, 83,
+ 84, 85, 86, 87, 88, 89, 89, 89, 90, 90, 84, 83, 81, 82, 83, 84, 85, 86,
+ 87, 88, 88, 89, 89, 90, 90, 90, 85, 84, 83, 84, 84, 85, 86, 87, 88, 88,
+ 89, 89, 90, 90, 91, 91, 85, 84, 83, 84, 84, 85, 86, 87, 88, 88, 89, 89,
+ 90, 90, 91, 91,
+ /* Size 32 */
+ 64, 62, 60, 58, 57, 57, 58, 58, 59, 60, 62, 64, 66, 67, 69, 71, 73, 74,
+ 75, 77, 78, 79, 80, 81, 82, 83, 84, 85, 85, 85, 85, 85, 62, 61, 60, 59,
+ 57, 58, 58, 58, 59, 60, 62, 63, 65, 67, 68, 70, 72, 73, 75, 76, 77, 78,
+ 79, 81, 82, 82, 83, 84, 85, 85, 85, 85, 60, 60, 59, 59, 58, 58, 59, 59,
+ 59, 60, 62, 63, 64, 66, 68, 69, 71, 72, 74, 75, 77, 78, 79, 80, 81, 82,
+ 83, 83, 84, 84, 84, 84, 58, 59, 59, 59, 59, 59, 59, 59, 59, 60, 61, 63,
+ 64, 65, 67, 68, 70, 71, 73, 74, 76, 77, 78, 79, 80, 81, 82, 83, 84, 84,
+ 84, 84, 57, 57, 58, 59, 60, 60, 60, 59, 59, 60, 61, 62, 63, 65, 66, 68,
+ 69, 71, 72, 73, 75, 76, 77, 78, 80, 80, 81, 82, 83, 83, 83, 83, 57, 58,
+ 58, 59, 60, 60, 60, 61, 61, 62, 63, 64, 65, 66, 68, 69, 71, 72, 73, 74,
+ 76, 77, 78, 79, 80, 81, 82, 83, 83, 83, 83, 83, 58, 58, 59, 59, 60, 60,
+ 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80,
+ 81, 81, 82, 83, 84, 84, 84, 84, 58, 58, 59, 59, 59, 61, 62, 64, 65, 66,
+ 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 78, 78, 79, 80, 81, 82, 83, 83,
+ 84, 84, 84, 84, 59, 59, 59, 59, 59, 61, 63, 65, 68, 69, 69, 70, 71, 72,
+ 73, 74, 75, 76, 77, 77, 78, 79, 80, 81, 82, 82, 83, 84, 84, 84, 84, 84,
+ 60, 60, 60, 60, 60, 62, 64, 66, 69, 69, 70, 71, 73, 73, 74, 75, 76, 77,
+ 78, 78, 79, 80, 81, 82, 82, 83, 84, 84, 85, 85, 85, 85, 62, 62, 62, 61,
+ 61, 63, 65, 67, 69, 70, 72, 73, 74, 75, 75, 76, 77, 78, 79, 79, 80, 81,
+ 82, 82, 83, 84, 84, 85, 85, 85, 85, 85, 64, 63, 63, 63, 62, 64, 66, 68,
+ 70, 71, 73, 74, 75, 76, 77, 78, 78, 79, 80, 80, 81, 82, 82, 83, 84, 84,
+ 85, 85, 86, 86, 86, 86, 66, 65, 64, 64, 63, 65, 67, 69, 71, 73, 74, 75,
+ 77, 77, 78, 79, 80, 80, 81, 81, 82, 83, 83, 84, 84, 85, 85, 86, 86, 86,
+ 86, 86, 67, 67, 66, 65, 65, 66, 68, 70, 72, 73, 75, 76, 77, 78, 79, 80,
+ 80, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86, 86, 86, 86, 86, 86, 69, 68,
+ 68, 67, 66, 68, 69, 71, 73, 74, 75, 77, 78, 79, 80, 80, 81, 82, 82, 83,
+ 83, 84, 84, 85, 85, 86, 86, 86, 87, 87, 87, 87, 71, 70, 69, 68, 68, 69,
+ 71, 72, 74, 75, 76, 78, 79, 80, 80, 81, 82, 83, 83, 84, 84, 85, 85, 85,
+ 86, 86, 87, 87, 87, 87, 87, 87, 73, 72, 71, 70, 69, 71, 72, 73, 75, 76,
+ 77, 78, 80, 80, 81, 82, 83, 83, 84, 84, 85, 85, 86, 86, 86, 87, 87, 87,
+ 88, 88, 88, 88, 74, 73, 72, 71, 71, 72, 73, 74, 76, 77, 78, 79, 80, 81,
+ 82, 83, 83, 84, 84, 85, 85, 86, 86, 86, 87, 87, 87, 88, 88, 88, 88, 88,
+ 75, 75, 74, 73, 72, 73, 74, 75, 77, 78, 79, 80, 81, 82, 82, 83, 84, 84,
+ 85, 85, 86, 86, 86, 87, 87, 87, 88, 88, 88, 88, 88, 88, 77, 76, 75, 74,
+ 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 84, 85, 85, 86, 86, 87,
+ 87, 87, 88, 88, 88, 88, 89, 89, 89, 89, 78, 77, 77, 76, 75, 76, 77, 78,
+ 78, 79, 80, 81, 82, 83, 83, 84, 85, 85, 86, 86, 87, 87, 87, 88, 88, 88,
+ 88, 89, 89, 89, 89, 89, 79, 78, 78, 77, 76, 77, 78, 78, 79, 80, 81, 82,
+ 83, 83, 84, 85, 85, 86, 86, 87, 87, 87, 88, 88, 88, 88, 89, 89, 89, 89,
+ 89, 89, 80, 79, 79, 78, 77, 78, 79, 79, 80, 81, 82, 82, 83, 84, 84, 85,
+ 86, 86, 86, 87, 87, 88, 88, 88, 89, 89, 89, 89, 89, 89, 89, 89, 81, 81,
+ 80, 79, 78, 79, 80, 80, 81, 82, 82, 83, 84, 84, 85, 85, 86, 86, 87, 87,
+ 88, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 82, 82, 81, 80, 80, 80,
+ 81, 81, 82, 82, 83, 84, 84, 85, 85, 86, 86, 87, 87, 88, 88, 88, 89, 89,
+ 89, 89, 89, 90, 90, 90, 90, 90, 83, 82, 82, 81, 80, 81, 81, 82, 82, 83,
+ 84, 84, 85, 85, 86, 86, 87, 87, 87, 88, 88, 88, 89, 89, 89, 89, 90, 90,
+ 90, 90, 90, 90, 84, 83, 83, 82, 81, 82, 82, 83, 83, 84, 84, 85, 85, 86,
+ 86, 87, 87, 87, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 90, 90, 90,
+ 85, 84, 83, 83, 82, 83, 83, 83, 84, 84, 85, 85, 86, 86, 86, 87, 87, 88,
+ 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 90, 90, 90, 90, 85, 85, 84, 84,
+ 83, 83, 84, 84, 84, 85, 85, 86, 86, 86, 87, 87, 88, 88, 88, 89, 89, 89,
+ 89, 90, 90, 90, 90, 90, 91, 91, 91, 91, 85, 85, 84, 84, 83, 83, 84, 84,
+ 84, 85, 85, 86, 86, 86, 87, 87, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90,
+ 90, 90, 91, 91, 91, 91, 85, 85, 84, 84, 83, 83, 84, 84, 84, 85, 85, 86,
+ 86, 86, 87, 87, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 90, 91, 91,
+ 91, 91, 85, 85, 84, 84, 83, 83, 84, 84, 84, 85, 85, 86, 86, 86, 87, 87,
+ 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 90, 91, 91, 91, 91 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 48, 50, 60, 68, 50, 58, 64, 69, 60, 64, 70, 72, 68, 69, 72, 74,
+ /* Size 8 */
+ 52, 45, 47, 53, 59, 64, 68, 70, 45, 48, 47, 51, 56, 61, 65, 68, 47, 47,
+ 55, 58, 61, 64, 67, 69, 53, 51, 58, 62, 65, 67, 69, 71, 59, 56, 61, 65,
+ 68, 70, 71, 72, 64, 61, 64, 67, 70, 71, 72, 73, 68, 65, 67, 69, 71, 72,
+ 73, 74, 70, 68, 69, 71, 72, 73, 74, 75,
+ /* Size 16 */
+ 51, 48, 45, 46, 47, 49, 52, 55, 58, 61, 63, 65, 67, 68, 69, 69, 48, 47,
+ 46, 46, 47, 49, 51, 54, 57, 59, 62, 63, 65, 67, 68, 68, 45, 46, 48, 47,
+ 47, 48, 50, 53, 55, 58, 60, 62, 64, 66, 67, 67, 46, 46, 47, 49, 50, 52,
+ 53, 55, 58, 60, 62, 63, 65, 66, 68, 68, 47, 47, 47, 50, 54, 56, 57, 59,
+ 60, 62, 63, 65, 66, 67, 68, 68, 49, 49, 48, 52, 56, 57, 59, 61, 62, 63,
+ 65, 66, 67, 68, 69, 69, 52, 51, 50, 53, 57, 59, 62, 63, 64, 65, 66, 67,
+ 68, 69, 70, 70, 55, 54, 53, 55, 59, 61, 63, 64, 66, 67, 68, 68, 69, 70,
+ 71, 71, 58, 57, 55, 58, 60, 62, 64, 66, 67, 68, 69, 69, 70, 71, 71, 71,
+ 61, 59, 58, 60, 62, 63, 65, 67, 68, 69, 70, 70, 71, 71, 72, 72, 63, 62,
+ 60, 62, 63, 65, 66, 68, 69, 70, 70, 71, 71, 72, 72, 72, 65, 63, 62, 63,
+ 65, 66, 67, 68, 69, 70, 71, 71, 72, 72, 73, 73, 67, 65, 64, 65, 66, 67,
+ 68, 69, 70, 71, 71, 72, 72, 73, 73, 73, 68, 67, 66, 66, 67, 68, 69, 70,
+ 71, 71, 72, 72, 73, 73, 73, 73, 69, 68, 67, 68, 68, 69, 70, 71, 71, 72,
+ 72, 73, 73, 73, 74, 74, 69, 68, 67, 68, 68, 69, 70, 71, 71, 72, 72, 73,
+ 73, 73, 74, 74,
+ /* Size 32 */
+ 51, 49, 47, 46, 45, 45, 45, 46, 46, 48, 49, 50, 52, 53, 55, 56, 58, 59,
+ 60, 61, 63, 64, 64, 65, 66, 67, 68, 68, 69, 69, 69, 69, 49, 48, 47, 46,
+ 45, 46, 46, 46, 46, 48, 49, 50, 52, 53, 54, 56, 57, 58, 59, 61, 62, 63,
+ 64, 65, 66, 66, 67, 68, 68, 68, 68, 68, 47, 47, 47, 46, 46, 46, 46, 46,
+ 46, 47, 49, 50, 51, 52, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66,
+ 66, 67, 68, 68, 68, 68, 46, 46, 46, 47, 47, 47, 47, 47, 46, 47, 48, 49,
+ 50, 52, 53, 54, 56, 57, 58, 59, 61, 61, 62, 63, 64, 65, 66, 67, 67, 67,
+ 67, 67, 45, 45, 46, 47, 47, 47, 47, 47, 47, 47, 48, 49, 50, 51, 52, 54,
+ 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 65, 66, 67, 67, 67, 67, 45, 46,
+ 46, 47, 47, 47, 48, 48, 48, 49, 50, 51, 51, 53, 54, 55, 56, 57, 58, 59,
+ 61, 61, 62, 63, 64, 65, 66, 66, 67, 67, 67, 67, 45, 46, 46, 47, 47, 48,
+ 48, 49, 50, 51, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
+ 65, 65, 66, 67, 67, 67, 67, 67, 46, 46, 46, 47, 47, 48, 49, 50, 52, 52,
+ 53, 54, 55, 56, 57, 58, 58, 59, 60, 61, 62, 63, 64, 64, 65, 66, 66, 67,
+ 68, 68, 68, 68, 46, 46, 46, 46, 47, 48, 50, 52, 54, 54, 55, 56, 57, 57,
+ 58, 59, 60, 60, 61, 62, 63, 64, 64, 65, 66, 66, 67, 67, 68, 68, 68, 68,
+ 48, 48, 47, 47, 47, 49, 51, 52, 54, 55, 56, 57, 58, 59, 59, 60, 61, 61,
+ 62, 63, 64, 64, 65, 66, 66, 67, 67, 68, 68, 68, 68, 68, 49, 49, 49, 48,
+ 48, 50, 51, 53, 55, 56, 57, 58, 59, 60, 60, 61, 62, 62, 63, 64, 64, 65,
+ 66, 66, 67, 67, 68, 68, 69, 69, 69, 69, 50, 50, 50, 49, 49, 51, 52, 54,
+ 56, 57, 58, 59, 60, 61, 61, 62, 63, 63, 64, 65, 65, 66, 66, 67, 67, 68,
+ 68, 69, 69, 69, 69, 69, 52, 52, 51, 50, 50, 51, 53, 55, 57, 58, 59, 60,
+ 61, 62, 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 69, 69,
+ 69, 69, 53, 53, 52, 52, 51, 53, 54, 56, 57, 59, 60, 61, 62, 63, 63, 64,
+ 65, 65, 66, 66, 67, 67, 67, 68, 68, 69, 69, 69, 70, 70, 70, 70, 55, 54,
+ 54, 53, 52, 54, 55, 57, 58, 59, 60, 61, 63, 63, 64, 65, 65, 66, 66, 67,
+ 67, 68, 68, 68, 69, 69, 69, 70, 70, 70, 70, 70, 56, 56, 55, 54, 54, 55,
+ 56, 58, 59, 60, 61, 62, 63, 64, 65, 65, 66, 66, 67, 67, 68, 68, 68, 69,
+ 69, 70, 70, 70, 70, 70, 70, 70, 58, 57, 56, 56, 55, 56, 57, 58, 60, 61,
+ 62, 63, 64, 65, 65, 66, 67, 67, 67, 68, 68, 69, 69, 69, 70, 70, 70, 71,
+ 71, 71, 71, 71, 59, 58, 58, 57, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
+ 66, 66, 67, 67, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 71, 71, 71,
+ 60, 59, 59, 58, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 66, 67, 67, 68,
+ 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 61, 61, 60, 59,
+ 59, 59, 60, 61, 62, 63, 64, 65, 65, 66, 67, 67, 68, 68, 69, 69, 70, 70,
+ 70, 70, 71, 71, 71, 71, 72, 72, 72, 72, 63, 62, 61, 61, 60, 61, 61, 62,
+ 63, 64, 64, 65, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70, 70, 71, 71, 71,
+ 71, 72, 72, 72, 72, 72, 64, 63, 62, 61, 61, 61, 62, 63, 64, 64, 65, 66,
+ 66, 67, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 71, 72, 72, 72, 72,
+ 72, 72, 64, 64, 63, 62, 62, 62, 63, 64, 64, 65, 66, 66, 67, 67, 68, 68,
+ 69, 69, 70, 70, 70, 71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 65, 65,
+ 64, 63, 63, 63, 64, 64, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70,
+ 71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 72, 66, 66, 65, 64, 64, 64,
+ 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72,
+ 72, 72, 72, 73, 73, 73, 73, 73, 67, 66, 66, 65, 65, 65, 65, 66, 66, 67,
+ 67, 68, 68, 69, 69, 70, 70, 70, 71, 71, 71, 71, 72, 72, 72, 72, 73, 73,
+ 73, 73, 73, 73, 68, 67, 66, 66, 65, 66, 66, 66, 67, 67, 68, 68, 69, 69,
+ 69, 70, 70, 71, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73,
+ 68, 68, 67, 67, 66, 66, 67, 67, 67, 68, 68, 69, 69, 69, 70, 70, 71, 71,
+ 71, 71, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 69, 68, 68, 67,
+ 67, 67, 67, 68, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72, 72,
+ 72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 69, 68, 68, 67, 67, 67, 67, 68,
+ 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72, 72, 72, 72, 73, 73,
+ 73, 73, 73, 73, 73, 73, 69, 68, 68, 67, 67, 67, 67, 68, 68, 68, 69, 69,
+ 69, 70, 70, 70, 71, 71, 71, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73,
+ 73, 73, 69, 68, 68, 67, 67, 67, 67, 68, 68, 68, 69, 69, 69, 70, 70, 70,
+ 71, 71, 71, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 77, 79, 85, 77, 81, 83, 86, 79, 83, 89, 91, 85, 86, 91, 94,
+ /* Size 8 */
+ 64, 58, 74, 76, 77, 80, 83, 85, 58, 68, 75, 72, 73, 75, 78, 81, 74, 75,
+ 78, 78, 78, 79, 81, 83, 76, 72, 78, 80, 82, 83, 84, 85, 77, 73, 78, 82,
+ 84, 85, 87, 88, 80, 75, 79, 83, 85, 87, 89, 90, 83, 78, 81, 84, 87, 89,
+ 90, 91, 85, 81, 83, 85, 88, 90, 91, 92,
+ /* Size 16 */
+ 64, 61, 58, 65, 74, 75, 76, 76, 77, 78, 80, 81, 83, 84, 85, 85, 61, 62,
+ 63, 68, 74, 74, 74, 74, 75, 76, 77, 79, 80, 82, 83, 83, 58, 63, 68, 71,
+ 75, 73, 72, 72, 73, 74, 75, 77, 78, 80, 81, 81, 65, 68, 71, 74, 76, 76,
+ 75, 75, 75, 76, 77, 78, 79, 81, 82, 82, 74, 74, 75, 76, 78, 78, 78, 78,
+ 78, 78, 79, 80, 81, 82, 83, 83, 75, 74, 73, 76, 78, 79, 79, 79, 79, 80,
+ 81, 81, 82, 83, 84, 84, 76, 74, 72, 75, 78, 79, 80, 81, 82, 82, 83, 83,
+ 84, 85, 85, 85, 76, 74, 72, 75, 78, 79, 81, 82, 83, 83, 84, 85, 85, 86,
+ 86, 86, 77, 75, 73, 75, 78, 79, 82, 83, 84, 85, 85, 86, 87, 87, 88, 88,
+ 78, 76, 74, 76, 78, 80, 82, 83, 85, 85, 86, 87, 88, 88, 89, 89, 80, 77,
+ 75, 77, 79, 81, 83, 84, 85, 86, 87, 88, 89, 89, 90, 90, 81, 79, 77, 78,
+ 80, 81, 83, 85, 86, 87, 88, 89, 89, 90, 90, 90, 83, 80, 78, 79, 81, 82,
+ 84, 85, 87, 88, 89, 89, 90, 91, 91, 91, 84, 82, 80, 81, 82, 83, 85, 86,
+ 87, 88, 89, 90, 91, 91, 92, 92, 85, 83, 81, 82, 83, 84, 85, 86, 88, 89,
+ 90, 90, 91, 92, 92, 92, 85, 83, 81, 82, 83, 84, 85, 86, 88, 89, 90, 90,
+ 91, 92, 92, 92,
+ /* Size 32 */
+ 64, 62, 61, 59, 58, 61, 65, 69, 74, 74, 75, 75, 76, 76, 76, 77, 77, 78,
+ 78, 79, 80, 80, 81, 82, 83, 83, 84, 85, 85, 85, 85, 85, 62, 62, 61, 61,
+ 60, 63, 67, 70, 74, 74, 74, 75, 75, 75, 75, 76, 76, 77, 77, 78, 79, 79,
+ 80, 81, 81, 82, 83, 83, 84, 84, 84, 84, 61, 61, 62, 62, 63, 65, 68, 71,
+ 74, 74, 74, 74, 74, 74, 74, 75, 75, 76, 76, 77, 77, 78, 79, 80, 80, 81,
+ 82, 82, 83, 83, 83, 83, 59, 61, 62, 64, 65, 67, 70, 72, 74, 74, 74, 73,
+ 73, 73, 73, 74, 74, 74, 75, 76, 76, 77, 78, 78, 79, 80, 81, 81, 82, 82,
+ 82, 82, 58, 60, 63, 65, 68, 70, 71, 73, 75, 74, 73, 73, 72, 72, 72, 73,
+ 73, 73, 74, 75, 75, 76, 77, 77, 78, 79, 80, 80, 81, 81, 81, 81, 61, 63,
+ 65, 67, 70, 71, 72, 74, 76, 75, 74, 74, 73, 74, 74, 74, 74, 74, 75, 76,
+ 76, 77, 77, 78, 79, 79, 80, 81, 81, 81, 81, 81, 65, 67, 68, 70, 71, 72,
+ 74, 75, 76, 76, 76, 75, 75, 75, 75, 75, 75, 76, 76, 76, 77, 78, 78, 79,
+ 79, 80, 81, 81, 82, 82, 82, 82, 69, 70, 71, 72, 73, 74, 75, 76, 77, 77,
+ 77, 76, 76, 76, 76, 76, 76, 77, 77, 77, 78, 78, 79, 79, 80, 81, 81, 82,
+ 82, 82, 82, 82, 74, 74, 74, 74, 75, 76, 76, 77, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 79, 79, 80, 80, 81, 81, 82, 82, 83, 83, 83, 83,
+ 74, 74, 74, 74, 74, 75, 76, 77, 78, 78, 78, 78, 78, 78, 78, 78, 78, 79,
+ 79, 79, 80, 80, 81, 81, 81, 82, 82, 83, 83, 83, 83, 83, 75, 74, 74, 74,
+ 73, 74, 76, 77, 78, 78, 79, 79, 79, 79, 79, 79, 79, 80, 80, 80, 81, 81,
+ 81, 82, 82, 83, 83, 84, 84, 84, 84, 84, 75, 75, 74, 73, 73, 74, 75, 76,
+ 78, 78, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 83, 83, 83,
+ 84, 84, 85, 85, 85, 85, 76, 75, 74, 73, 72, 73, 75, 76, 78, 78, 79, 80,
+ 80, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, 84, 84, 85, 85, 85, 85,
+ 85, 85, 76, 75, 74, 73, 72, 74, 75, 76, 78, 78, 79, 80, 81, 81, 81, 82,
+ 82, 82, 83, 83, 83, 84, 84, 84, 84, 85, 85, 86, 86, 86, 86, 86, 76, 75,
+ 74, 73, 72, 74, 75, 76, 78, 78, 79, 80, 81, 81, 82, 82, 83, 83, 83, 84,
+ 84, 84, 85, 85, 85, 85, 86, 86, 86, 86, 86, 86, 77, 76, 75, 74, 73, 74,
+ 75, 76, 78, 78, 79, 80, 81, 82, 82, 83, 83, 84, 84, 84, 85, 85, 85, 86,
+ 86, 86, 86, 87, 87, 87, 87, 87, 77, 76, 75, 74, 73, 74, 75, 76, 78, 78,
+ 79, 81, 82, 82, 83, 83, 84, 84, 85, 85, 85, 86, 86, 86, 87, 87, 87, 87,
+ 88, 88, 88, 88, 78, 77, 76, 74, 73, 74, 76, 77, 78, 79, 80, 81, 82, 82,
+ 83, 84, 84, 85, 85, 85, 86, 86, 86, 87, 87, 87, 88, 88, 88, 88, 88, 88,
+ 78, 77, 76, 75, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 83, 84, 85, 85,
+ 85, 86, 86, 87, 87, 87, 88, 88, 88, 88, 89, 89, 89, 89, 79, 78, 77, 76,
+ 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 84, 85, 85, 86, 86, 87, 87,
+ 87, 88, 88, 88, 89, 89, 89, 89, 89, 89, 80, 79, 77, 76, 75, 76, 77, 78,
+ 79, 80, 81, 82, 83, 83, 84, 85, 85, 86, 86, 87, 87, 88, 88, 88, 89, 89,
+ 89, 89, 90, 90, 90, 90, 80, 79, 78, 77, 76, 77, 78, 78, 79, 80, 81, 82,
+ 83, 84, 84, 85, 86, 86, 87, 87, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90,
+ 90, 90, 81, 80, 79, 78, 77, 77, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85,
+ 86, 86, 87, 87, 88, 88, 89, 89, 89, 90, 90, 90, 90, 90, 90, 90, 82, 81,
+ 80, 78, 77, 78, 79, 79, 80, 81, 82, 83, 83, 84, 85, 86, 86, 87, 87, 88,
+ 88, 89, 89, 89, 90, 90, 90, 90, 91, 91, 91, 91, 83, 81, 80, 79, 78, 79,
+ 79, 80, 81, 81, 82, 83, 84, 84, 85, 86, 87, 87, 88, 88, 89, 89, 89, 90,
+ 90, 90, 91, 91, 91, 91, 91, 91, 83, 82, 81, 80, 79, 79, 80, 81, 81, 82,
+ 83, 83, 84, 85, 85, 86, 87, 87, 88, 88, 89, 89, 90, 90, 90, 91, 91, 91,
+ 91, 91, 91, 91, 84, 83, 82, 81, 80, 80, 81, 81, 82, 82, 83, 84, 85, 85,
+ 86, 86, 87, 88, 88, 89, 89, 89, 90, 90, 91, 91, 91, 91, 92, 92, 92, 92,
+ 85, 83, 82, 81, 80, 81, 81, 82, 82, 83, 84, 84, 85, 86, 86, 87, 87, 88,
+ 88, 89, 89, 90, 90, 90, 91, 91, 91, 92, 92, 92, 92, 92, 85, 84, 83, 82,
+ 81, 81, 82, 82, 83, 83, 84, 85, 85, 86, 86, 87, 88, 88, 89, 89, 90, 90,
+ 90, 91, 91, 91, 92, 92, 92, 92, 92, 92, 85, 84, 83, 82, 81, 81, 82, 82,
+ 83, 83, 84, 85, 85, 86, 86, 87, 88, 88, 89, 89, 90, 90, 90, 91, 91, 91,
+ 92, 92, 92, 92, 92, 92, 85, 84, 83, 82, 81, 81, 82, 82, 83, 83, 84, 85,
+ 85, 86, 86, 87, 88, 88, 89, 89, 90, 90, 90, 91, 91, 91, 92, 92, 92, 92,
+ 92, 92, 85, 84, 83, 82, 81, 81, 82, 82, 83, 83, 84, 85, 85, 86, 86, 87,
+ 88, 88, 89, 89, 90, 90, 90, 91, 91, 91, 92, 92, 92, 92, 92, 92 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 48, 59, 60, 65, 59, 62, 64, 66, 60, 64, 68, 70, 65, 66, 70, 73,
+ /* Size 8 */
+ 50, 45, 59, 60, 61, 63, 66, 68, 45, 54, 59, 57, 57, 59, 62, 64, 59, 59,
+ 62, 61, 61, 62, 64, 66, 60, 57, 61, 64, 65, 66, 67, 68, 61, 57, 61, 65,
+ 67, 68, 69, 70, 63, 59, 62, 66, 68, 70, 71, 72, 66, 62, 64, 67, 69, 71,
+ 72, 73, 68, 64, 66, 68, 70, 72, 73, 74,
+ /* Size 16 */
+ 50, 47, 45, 51, 58, 59, 59, 60, 61, 62, 63, 64, 65, 66, 67, 67, 47, 48,
+ 49, 53, 58, 58, 58, 58, 59, 60, 61, 62, 63, 64, 65, 65, 45, 49, 53, 56,
+ 58, 57, 56, 57, 57, 58, 59, 60, 61, 63, 64, 64, 51, 53, 56, 58, 60, 59,
+ 59, 59, 59, 60, 60, 61, 62, 63, 65, 65, 58, 58, 58, 60, 62, 61, 61, 61,
+ 61, 61, 62, 63, 63, 64, 65, 65, 59, 58, 57, 59, 61, 62, 62, 62, 62, 63,
+ 63, 64, 65, 66, 66, 66, 59, 58, 56, 59, 61, 62, 63, 64, 64, 65, 65, 66,
+ 66, 67, 67, 67, 60, 58, 57, 59, 61, 62, 64, 64, 65, 66, 66, 67, 67, 68,
+ 68, 68, 61, 59, 57, 59, 61, 62, 64, 65, 66, 67, 67, 68, 68, 69, 69, 69,
+ 62, 60, 58, 60, 61, 63, 65, 66, 67, 68, 68, 69, 69, 70, 70, 70, 63, 61,
+ 59, 60, 62, 63, 65, 66, 67, 68, 69, 70, 70, 71, 71, 71, 64, 62, 60, 61,
+ 63, 64, 66, 67, 68, 69, 70, 70, 71, 71, 72, 72, 65, 63, 61, 62, 63, 65,
+ 66, 67, 68, 69, 70, 71, 71, 72, 72, 72, 66, 64, 63, 63, 64, 66, 67, 68,
+ 69, 70, 71, 71, 72, 72, 73, 73, 67, 65, 64, 65, 65, 66, 67, 68, 69, 70,
+ 71, 72, 72, 73, 73, 73, 67, 65, 64, 65, 65, 66, 67, 68, 69, 70, 71, 72,
+ 72, 73, 73, 73,
+ /* Size 32 */
+ 49, 48, 47, 46, 45, 47, 50, 54, 58, 58, 58, 59, 59, 59, 60, 60, 60, 61,
+ 61, 62, 62, 63, 64, 64, 65, 65, 66, 66, 67, 67, 67, 67, 48, 48, 47, 47,
+ 46, 49, 52, 55, 58, 58, 58, 58, 58, 58, 59, 59, 59, 60, 60, 61, 61, 62,
+ 63, 63, 64, 64, 65, 65, 66, 66, 66, 66, 47, 47, 48, 48, 48, 50, 53, 55,
+ 58, 58, 58, 58, 57, 58, 58, 58, 58, 59, 59, 60, 61, 61, 62, 62, 63, 63,
+ 64, 65, 65, 65, 65, 65, 46, 47, 48, 49, 51, 52, 54, 56, 58, 58, 57, 57,
+ 57, 57, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 63, 63, 64, 64, 64,
+ 64, 64, 45, 46, 48, 51, 53, 54, 55, 57, 58, 58, 57, 57, 56, 56, 56, 57,
+ 57, 57, 58, 58, 59, 59, 60, 60, 61, 62, 62, 63, 63, 63, 63, 63, 47, 49,
+ 50, 52, 54, 55, 56, 58, 59, 58, 58, 58, 57, 57, 57, 57, 58, 58, 58, 59,
+ 59, 60, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64, 50, 52, 53, 54, 55, 56,
+ 57, 59, 60, 59, 59, 59, 58, 58, 58, 58, 59, 59, 59, 60, 60, 61, 61, 62,
+ 62, 63, 63, 64, 64, 64, 64, 64, 54, 55, 55, 56, 57, 58, 59, 60, 60, 60,
+ 60, 60, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 62, 62, 63, 63, 64, 64,
+ 65, 65, 65, 65, 58, 58, 58, 58, 58, 59, 60, 60, 61, 61, 61, 61, 61, 61,
+ 61, 61, 61, 61, 61, 61, 62, 62, 62, 63, 63, 64, 64, 65, 65, 65, 65, 65,
+ 58, 58, 58, 58, 58, 58, 59, 60, 61, 61, 61, 61, 61, 61, 61, 61, 61, 62,
+ 62, 62, 62, 63, 63, 63, 64, 64, 65, 65, 66, 66, 66, 66, 58, 58, 58, 57,
+ 57, 58, 59, 60, 61, 61, 61, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63,
+ 64, 64, 64, 65, 65, 66, 66, 66, 66, 66, 59, 58, 58, 57, 57, 58, 59, 60,
+ 61, 61, 62, 62, 62, 63, 63, 63, 63, 63, 63, 64, 64, 64, 65, 65, 65, 65,
+ 66, 66, 67, 67, 67, 67, 59, 58, 57, 57, 56, 57, 58, 59, 61, 61, 62, 62,
+ 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67,
+ 67, 67, 59, 58, 58, 57, 56, 57, 58, 59, 61, 61, 62, 63, 63, 64, 64, 64,
+ 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 60, 59,
+ 58, 57, 56, 57, 58, 59, 61, 61, 62, 63, 63, 64, 64, 65, 65, 65, 65, 66,
+ 66, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 60, 59, 58, 57, 57, 57,
+ 58, 60, 61, 61, 62, 63, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67, 67,
+ 68, 68, 68, 68, 69, 69, 69, 69, 60, 59, 58, 58, 57, 58, 59, 60, 61, 61,
+ 62, 63, 64, 64, 65, 65, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 69, 69,
+ 69, 69, 69, 69, 61, 60, 59, 58, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65,
+ 65, 66, 66, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+ 61, 60, 59, 59, 58, 58, 59, 60, 61, 62, 63, 63, 64, 65, 65, 66, 67, 67,
+ 67, 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 62, 61, 60, 59,
+ 58, 59, 60, 60, 61, 62, 63, 64, 65, 65, 66, 66, 67, 67, 68, 68, 68, 69,
+ 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 62, 61, 61, 60, 59, 59, 60, 61,
+ 62, 62, 63, 64, 65, 65, 66, 67, 67, 68, 68, 68, 69, 69, 69, 70, 70, 70,
+ 70, 70, 71, 71, 71, 71, 63, 62, 61, 60, 59, 60, 61, 61, 62, 63, 63, 64,
+ 65, 66, 66, 67, 67, 68, 68, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71,
+ 71, 71, 64, 63, 62, 61, 60, 60, 61, 62, 62, 63, 64, 65, 65, 66, 66, 67,
+ 68, 68, 68, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 64, 63,
+ 62, 61, 60, 61, 62, 62, 63, 63, 64, 65, 66, 66, 67, 67, 68, 68, 69, 69,
+ 70, 70, 70, 70, 71, 71, 71, 71, 72, 72, 72, 72, 65, 64, 63, 62, 61, 62,
+ 62, 63, 63, 64, 64, 65, 66, 66, 67, 68, 68, 69, 69, 69, 70, 70, 70, 71,
+ 71, 71, 71, 72, 72, 72, 72, 72, 65, 64, 63, 63, 62, 62, 63, 63, 64, 64,
+ 65, 65, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70, 71, 71, 71, 71, 72, 72,
+ 72, 72, 72, 72, 66, 65, 64, 63, 62, 63, 63, 64, 64, 65, 65, 66, 66, 67,
+ 67, 68, 69, 69, 69, 70, 70, 71, 71, 71, 71, 72, 72, 72, 72, 72, 72, 72,
+ 66, 65, 65, 64, 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69,
+ 70, 70, 70, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73, 73, 67, 66, 65, 64,
+ 63, 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69, 69, 69, 70, 70, 71, 71,
+ 71, 72, 72, 72, 72, 73, 73, 73, 73, 73, 67, 66, 65, 64, 63, 64, 64, 65,
+ 65, 66, 66, 67, 67, 68, 68, 69, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72,
+ 72, 73, 73, 73, 73, 73, 67, 66, 65, 64, 63, 64, 64, 65, 65, 66, 66, 67,
+ 67, 68, 68, 69, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73,
+ 73, 73, 67, 66, 65, 64, 63, 64, 64, 65, 65, 66, 66, 67, 67, 68, 68, 69,
+ 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73, 73, 73 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 66, 76, 82, 66, 74, 79, 83, 76, 79, 84, 86, 82, 83, 86, 87,
+ /* Size 8 */
+ 64, 58, 60, 65, 71, 75, 77, 79, 58, 61, 60, 63, 68, 72, 75, 78, 60, 60,
+ 67, 69, 72, 75, 77, 79, 65, 63, 69, 73, 75, 77, 79, 80, 71, 68, 72, 75,
+ 78, 79, 80, 81, 75, 72, 75, 77, 79, 80, 81, 82, 77, 75, 77, 79, 80, 81,
+ 82, 82, 79, 78, 79, 80, 81, 82, 82, 83,
+ /* Size 16 */
+ 64, 61, 58, 59, 60, 62, 65, 68, 71, 72, 75, 76, 77, 78, 79, 79, 61, 60,
+ 60, 60, 60, 62, 64, 67, 69, 71, 73, 75, 76, 77, 79, 79, 58, 60, 61, 61,
+ 60, 62, 63, 66, 68, 70, 72, 74, 75, 77, 78, 78, 59, 60, 61, 62, 63, 65,
+ 66, 68, 70, 72, 73, 75, 76, 77, 78, 78, 60, 60, 60, 63, 67, 68, 69, 71,
+ 72, 73, 75, 76, 77, 78, 79, 79, 62, 62, 62, 65, 68, 70, 71, 73, 74, 75,
+ 76, 77, 78, 79, 79, 79, 65, 64, 63, 66, 69, 71, 73, 74, 75, 76, 77, 78,
+ 79, 79, 80, 80, 68, 67, 66, 68, 71, 73, 74, 75, 77, 77, 78, 79, 79, 80,
+ 80, 80, 71, 69, 68, 70, 72, 74, 75, 77, 78, 78, 79, 80, 80, 80, 81, 81,
+ 72, 71, 70, 72, 73, 75, 76, 77, 78, 79, 80, 80, 81, 81, 81, 81, 75, 73,
+ 72, 73, 75, 76, 77, 78, 79, 80, 80, 81, 81, 81, 82, 82, 76, 75, 74, 75,
+ 76, 77, 78, 79, 80, 80, 81, 81, 81, 82, 82, 82, 77, 76, 75, 76, 77, 78,
+ 79, 79, 80, 81, 81, 81, 82, 82, 82, 82, 78, 77, 77, 77, 78, 79, 79, 80,
+ 80, 81, 81, 82, 82, 82, 83, 83, 79, 79, 78, 78, 79, 79, 80, 80, 81, 81,
+ 82, 82, 82, 83, 83, 83, 79, 79, 78, 78, 79, 79, 80, 80, 81, 81, 82, 82,
+ 82, 83, 83, 83,
+ /* Size 32 */
+ 64, 62, 61, 60, 58, 59, 59, 59, 60, 61, 62, 64, 65, 67, 68, 69, 71, 71,
+ 72, 73, 75, 75, 76, 77, 77, 78, 78, 79, 79, 79, 79, 79, 62, 61, 61, 60,
+ 59, 59, 59, 60, 60, 61, 62, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74, 75,
+ 75, 76, 77, 77, 78, 78, 79, 79, 79, 79, 61, 61, 60, 60, 60, 60, 60, 60,
+ 60, 61, 62, 63, 64, 65, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 76, 77,
+ 77, 78, 79, 79, 79, 79, 60, 60, 60, 60, 60, 60, 60, 60, 60, 61, 62, 63,
+ 64, 65, 66, 67, 69, 70, 71, 72, 73, 73, 74, 75, 76, 76, 77, 78, 78, 78,
+ 78, 78, 58, 59, 60, 60, 61, 61, 61, 60, 60, 61, 62, 63, 63, 64, 66, 67,
+ 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 78, 78, 59, 59,
+ 60, 60, 61, 61, 61, 61, 62, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
+ 73, 73, 74, 75, 76, 76, 77, 77, 78, 78, 78, 78, 59, 59, 60, 60, 61, 61,
+ 62, 63, 63, 64, 65, 66, 66, 67, 68, 69, 70, 71, 72, 72, 73, 74, 75, 75,
+ 76, 77, 77, 78, 78, 78, 78, 78, 59, 60, 60, 60, 60, 61, 63, 64, 65, 66,
+ 66, 67, 68, 69, 69, 70, 71, 72, 72, 73, 74, 75, 75, 76, 77, 77, 77, 78,
+ 78, 78, 78, 78, 60, 60, 60, 60, 60, 62, 63, 65, 67, 67, 68, 69, 69, 70,
+ 71, 71, 72, 73, 73, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79, 79, 79,
+ 61, 61, 61, 61, 61, 62, 64, 66, 67, 68, 69, 70, 70, 71, 72, 72, 73, 73,
+ 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79, 79, 79, 79, 62, 62, 62, 62,
+ 62, 63, 65, 66, 68, 69, 70, 71, 71, 72, 73, 73, 74, 74, 75, 75, 76, 76,
+ 77, 77, 78, 78, 79, 79, 79, 79, 79, 79, 64, 64, 63, 63, 63, 64, 66, 67,
+ 69, 70, 71, 71, 72, 73, 73, 74, 75, 75, 76, 76, 76, 77, 77, 78, 78, 79,
+ 79, 79, 80, 80, 80, 80, 65, 65, 64, 64, 63, 65, 66, 68, 69, 70, 71, 72,
+ 73, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 78, 79, 79, 79, 80, 80, 80,
+ 80, 80, 67, 66, 65, 65, 64, 66, 67, 69, 70, 71, 72, 73, 74, 74, 75, 75,
+ 76, 76, 77, 77, 78, 78, 78, 79, 79, 79, 80, 80, 80, 80, 80, 80, 68, 67,
+ 67, 66, 66, 67, 68, 69, 71, 72, 73, 73, 74, 75, 75, 76, 77, 77, 77, 78,
+ 78, 78, 79, 79, 79, 80, 80, 80, 80, 80, 80, 80, 69, 69, 68, 67, 67, 68,
+ 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 77, 77, 78, 78, 79, 79, 79, 79,
+ 80, 80, 80, 80, 81, 81, 81, 81, 71, 70, 69, 69, 68, 69, 70, 71, 72, 73,
+ 74, 75, 75, 76, 77, 77, 78, 78, 78, 79, 79, 79, 80, 80, 80, 80, 80, 81,
+ 81, 81, 81, 81, 71, 71, 70, 70, 69, 70, 71, 72, 73, 73, 74, 75, 76, 76,
+ 77, 77, 78, 78, 79, 79, 79, 80, 80, 80, 80, 80, 81, 81, 81, 81, 81, 81,
+ 72, 72, 71, 71, 70, 71, 72, 72, 73, 74, 75, 76, 76, 77, 77, 78, 78, 79,
+ 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 81, 81, 81, 81, 73, 73, 72, 72,
+ 71, 72, 72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 79, 79, 79, 80, 80, 80,
+ 80, 81, 81, 81, 81, 81, 82, 82, 82, 82, 75, 74, 73, 73, 72, 73, 73, 74,
+ 75, 75, 76, 76, 77, 78, 78, 79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81,
+ 81, 82, 82, 82, 82, 82, 75, 75, 74, 73, 73, 73, 74, 75, 75, 76, 76, 77,
+ 78, 78, 78, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 81, 82, 82, 82, 82,
+ 82, 82, 76, 75, 75, 74, 74, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79,
+ 80, 80, 80, 80, 81, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 77, 76,
+ 76, 75, 75, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79, 79, 80, 80, 80, 81,
+ 81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 82, 77, 77, 76, 76, 75, 76,
+ 76, 77, 77, 77, 78, 78, 79, 79, 79, 80, 80, 80, 81, 81, 81, 81, 81, 82,
+ 82, 82, 82, 82, 82, 82, 82, 82, 78, 77, 77, 76, 76, 76, 77, 77, 77, 78,
+ 78, 79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82,
+ 82, 82, 82, 82, 78, 78, 77, 77, 77, 77, 77, 77, 78, 78, 79, 79, 79, 80,
+ 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83,
+ 79, 78, 78, 78, 77, 77, 78, 78, 78, 79, 79, 79, 80, 80, 80, 80, 81, 81,
+ 81, 81, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 79, 79, 79, 78,
+ 78, 78, 78, 78, 79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82,
+ 82, 82, 82, 82, 83, 83, 83, 83, 83, 83, 79, 79, 79, 78, 78, 78, 78, 78,
+ 79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82,
+ 83, 83, 83, 83, 83, 83, 79, 79, 79, 78, 78, 78, 78, 78, 79, 79, 79, 80,
+ 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83,
+ 83, 83, 79, 79, 79, 78, 78, 78, 78, 78, 79, 79, 79, 80, 80, 80, 80, 81,
+ 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 51, 53, 61, 67, 53, 60, 65, 68, 61, 65, 68, 70, 67, 68, 70, 71,
+ /* Size 8 */
+ 55, 49, 51, 56, 61, 64, 67, 69, 49, 52, 51, 54, 58, 62, 65, 67, 51, 51,
+ 57, 60, 62, 64, 66, 68, 56, 54, 60, 63, 65, 67, 68, 69, 61, 58, 62, 65,
+ 67, 68, 69, 70, 64, 62, 64, 67, 68, 70, 70, 71, 67, 65, 66, 68, 69, 70,
+ 71, 71, 69, 67, 68, 69, 70, 71, 71, 72,
+ /* Size 16 */
+ 54, 51, 49, 50, 50, 53, 55, 58, 60, 62, 64, 65, 66, 67, 68, 68, 51, 51,
+ 50, 50, 51, 52, 54, 57, 59, 61, 63, 64, 65, 66, 67, 67, 49, 50, 51, 51,
+ 51, 52, 54, 56, 58, 60, 61, 63, 64, 65, 67, 67, 50, 50, 51, 52, 53, 55,
+ 56, 58, 59, 61, 63, 64, 65, 66, 67, 67, 50, 51, 51, 53, 57, 58, 59, 60,
+ 61, 63, 64, 65, 66, 67, 67, 67, 53, 52, 52, 55, 58, 59, 61, 62, 63, 64,
+ 65, 66, 67, 67, 68, 68, 55, 54, 54, 56, 59, 61, 63, 63, 64, 65, 66, 67,
+ 67, 68, 69, 69, 58, 57, 56, 58, 60, 62, 63, 64, 65, 66, 67, 67, 68, 68,
+ 69, 69, 60, 59, 58, 59, 61, 63, 64, 65, 66, 67, 68, 68, 69, 69, 69, 69,
+ 62, 61, 60, 61, 63, 64, 65, 66, 67, 68, 68, 69, 69, 69, 70, 70, 64, 63,
+ 61, 63, 64, 65, 66, 67, 68, 68, 69, 69, 70, 70, 70, 70, 65, 64, 63, 64,
+ 65, 66, 67, 67, 68, 69, 69, 70, 70, 70, 70, 70, 66, 65, 64, 65, 66, 67,
+ 67, 68, 69, 69, 70, 70, 70, 71, 71, 71, 67, 66, 65, 66, 67, 67, 68, 68,
+ 69, 69, 70, 70, 71, 71, 71, 71, 68, 67, 67, 67, 67, 68, 69, 69, 69, 70,
+ 70, 70, 71, 71, 71, 71, 68, 67, 67, 67, 67, 68, 69, 69, 69, 70, 70, 70,
+ 71, 71, 71, 71,
+ /* Size 32 */
+ 54, 52, 51, 50, 49, 49, 49, 50, 50, 51, 53, 54, 55, 56, 57, 58, 60, 61,
+ 61, 62, 63, 64, 65, 65, 66, 66, 67, 67, 68, 68, 68, 68, 52, 52, 51, 50,
+ 49, 50, 50, 50, 50, 51, 52, 53, 55, 56, 57, 58, 59, 60, 61, 62, 63, 63,
+ 64, 65, 65, 66, 66, 67, 67, 67, 67, 67, 51, 51, 51, 50, 50, 50, 50, 50,
+ 50, 51, 52, 53, 54, 55, 56, 57, 59, 59, 60, 61, 62, 63, 64, 64, 65, 65,
+ 66, 67, 67, 67, 67, 67, 50, 50, 50, 50, 51, 51, 50, 50, 50, 51, 52, 53,
+ 54, 55, 56, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65, 65, 66, 66, 67, 67,
+ 67, 67, 49, 49, 50, 51, 51, 51, 51, 51, 50, 51, 52, 53, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 63, 64, 65, 65, 66, 66, 66, 66, 66, 49, 50,
+ 50, 51, 51, 51, 51, 52, 52, 52, 53, 54, 55, 55, 56, 57, 58, 59, 60, 61,
+ 62, 62, 63, 64, 64, 65, 65, 66, 67, 67, 67, 67, 49, 50, 50, 50, 51, 51,
+ 52, 53, 53, 54, 55, 55, 56, 57, 58, 58, 59, 60, 61, 61, 62, 63, 63, 64,
+ 65, 65, 66, 66, 67, 67, 67, 67, 50, 50, 50, 50, 51, 52, 53, 54, 55, 55,
+ 56, 57, 57, 58, 59, 59, 60, 61, 61, 62, 63, 63, 64, 65, 65, 66, 66, 66,
+ 67, 67, 67, 67, 50, 50, 50, 50, 50, 52, 53, 55, 56, 57, 58, 58, 59, 59,
+ 60, 60, 61, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 67, 67, 67,
+ 51, 51, 51, 51, 51, 52, 54, 55, 57, 58, 58, 59, 60, 60, 61, 61, 62, 62,
+ 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 67, 67, 67, 67, 53, 52, 52, 52,
+ 52, 53, 55, 56, 58, 58, 59, 60, 60, 61, 62, 62, 63, 63, 64, 64, 65, 65,
+ 65, 66, 66, 67, 67, 67, 68, 68, 68, 68, 54, 53, 53, 53, 53, 54, 55, 57,
+ 58, 59, 60, 61, 61, 62, 62, 63, 63, 64, 64, 65, 65, 65, 66, 66, 67, 67,
+ 67, 68, 68, 68, 68, 68, 55, 55, 54, 54, 53, 55, 56, 57, 59, 60, 60, 61,
+ 62, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68,
+ 68, 68, 56, 56, 55, 55, 54, 55, 57, 58, 59, 60, 61, 62, 63, 63, 64, 64,
+ 65, 65, 65, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 57, 57,
+ 56, 56, 55, 56, 58, 59, 60, 61, 62, 62, 63, 64, 64, 65, 65, 65, 66, 66,
+ 67, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 58, 58, 57, 57, 56, 57,
+ 58, 59, 60, 61, 62, 63, 64, 64, 65, 65, 66, 66, 66, 67, 67, 67, 67, 68,
+ 68, 68, 68, 69, 69, 69, 69, 69, 60, 59, 59, 58, 57, 58, 59, 60, 61, 62,
+ 63, 63, 64, 65, 65, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69,
+ 69, 69, 69, 69, 61, 60, 59, 59, 58, 59, 60, 61, 62, 62, 63, 64, 65, 65,
+ 65, 66, 66, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+ 61, 61, 60, 60, 59, 60, 61, 61, 62, 63, 64, 64, 65, 65, 66, 66, 67, 67,
+ 67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 62, 62, 61, 61,
+ 60, 61, 61, 62, 63, 63, 64, 65, 65, 66, 66, 67, 67, 67, 68, 68, 68, 68,
+ 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 63, 63, 62, 62, 61, 62, 62, 63,
+ 63, 64, 65, 65, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 69,
+ 70, 70, 70, 70, 70, 70, 64, 63, 63, 62, 62, 62, 63, 63, 64, 64, 65, 65,
+ 66, 66, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70,
+ 70, 70, 65, 64, 64, 63, 63, 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 67,
+ 68, 68, 68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 65, 65,
+ 64, 64, 63, 64, 64, 65, 65, 65, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69,
+ 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 66, 65, 65, 65, 64, 64,
+ 65, 65, 65, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 70, 70,
+ 70, 70, 70, 70, 70, 70, 70, 70, 66, 66, 65, 65, 65, 65, 65, 66, 66, 66,
+ 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70,
+ 71, 71, 71, 71, 67, 66, 66, 66, 65, 65, 66, 66, 66, 67, 67, 67, 68, 68,
+ 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71,
+ 67, 67, 67, 66, 66, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69,
+ 69, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 68, 67, 67, 67,
+ 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70,
+ 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 68, 67, 67, 67, 66, 67, 67, 67,
+ 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 71,
+ 71, 71, 71, 71, 71, 71, 68, 67, 67, 67, 66, 67, 67, 67, 67, 67, 68, 68,
+ 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71,
+ 71, 71, 68, 67, 67, 67, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 69, 69,
+ 69, 69, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 74, 75, 79, 74, 77, 78, 80, 75, 78, 82, 84, 79, 80, 84, 86,
+ /* Size 8 */
+ 64, 59, 72, 73, 74, 76, 78, 80, 59, 67, 72, 70, 71, 72, 75, 77, 72, 72,
+ 75, 74, 74, 75, 76, 78, 73, 70, 74, 76, 77, 78, 79, 80, 74, 71, 74, 77,
+ 79, 80, 80, 81, 76, 72, 75, 78, 80, 81, 82, 82, 78, 75, 76, 79, 80, 82,
+ 83, 83, 80, 77, 78, 80, 81, 82, 83, 84,
+ /* Size 16 */
+ 64, 62, 59, 65, 72, 72, 73, 73, 74, 75, 76, 77, 78, 79, 80, 80, 62, 62,
+ 63, 67, 72, 72, 71, 72, 72, 73, 74, 75, 76, 77, 78, 78, 59, 63, 67, 70,
+ 72, 71, 70, 70, 71, 72, 72, 74, 75, 76, 77, 77, 65, 67, 70, 71, 73, 73,
+ 72, 72, 72, 73, 74, 75, 75, 76, 77, 77, 72, 72, 72, 73, 75, 74, 74, 74,
+ 74, 75, 75, 76, 76, 77, 78, 78, 72, 72, 71, 73, 74, 75, 75, 75, 76, 76,
+ 76, 77, 77, 78, 79, 79, 73, 71, 70, 72, 74, 75, 76, 77, 77, 77, 78, 78,
+ 79, 79, 80, 80, 73, 72, 70, 72, 74, 75, 77, 77, 78, 78, 79, 79, 79, 80,
+ 80, 80, 74, 72, 71, 72, 74, 76, 77, 78, 79, 79, 80, 80, 80, 81, 81, 81,
+ 75, 73, 72, 73, 75, 76, 77, 78, 79, 80, 80, 81, 81, 81, 82, 82, 76, 74,
+ 72, 74, 75, 76, 78, 79, 80, 80, 81, 81, 82, 82, 82, 82, 77, 75, 74, 75,
+ 76, 77, 78, 79, 80, 81, 81, 82, 82, 83, 83, 83, 78, 76, 75, 75, 76, 77,
+ 79, 79, 80, 81, 82, 82, 83, 83, 83, 83, 79, 77, 76, 76, 77, 78, 79, 80,
+ 81, 81, 82, 83, 83, 83, 84, 84, 80, 78, 77, 77, 78, 79, 80, 80, 81, 82,
+ 82, 83, 83, 84, 84, 84, 80, 78, 77, 77, 78, 79, 80, 80, 81, 82, 82, 83,
+ 83, 84, 84, 84,
+ /* Size 32 */
+ 64, 63, 62, 60, 59, 62, 65, 68, 72, 72, 72, 72, 73, 73, 73, 74, 74, 74,
+ 75, 75, 76, 76, 77, 77, 78, 78, 79, 79, 80, 80, 80, 80, 63, 62, 62, 61,
+ 61, 63, 66, 69, 72, 72, 72, 72, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75,
+ 76, 76, 77, 77, 78, 78, 79, 79, 79, 79, 62, 62, 62, 63, 63, 65, 67, 69,
+ 72, 72, 72, 72, 71, 72, 72, 72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77,
+ 77, 78, 78, 78, 78, 78, 60, 61, 63, 64, 65, 67, 68, 70, 72, 72, 71, 71,
+ 71, 71, 71, 71, 71, 72, 72, 73, 73, 74, 74, 75, 75, 76, 76, 77, 77, 77,
+ 77, 77, 59, 61, 63, 65, 67, 68, 70, 71, 72, 72, 71, 71, 70, 70, 70, 71,
+ 71, 71, 72, 72, 72, 73, 74, 74, 75, 75, 76, 76, 77, 77, 77, 77, 62, 63,
+ 65, 67, 68, 69, 70, 72, 73, 72, 72, 72, 71, 71, 71, 71, 72, 72, 72, 73,
+ 73, 74, 74, 75, 75, 75, 76, 76, 77, 77, 77, 77, 65, 66, 67, 68, 70, 70,
+ 71, 72, 73, 73, 73, 72, 72, 72, 72, 72, 72, 73, 73, 73, 74, 74, 75, 75,
+ 75, 76, 76, 77, 77, 77, 77, 77, 68, 69, 69, 70, 71, 72, 72, 73, 74, 74,
+ 74, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 76, 76, 76, 77, 77,
+ 78, 78, 78, 78, 72, 72, 72, 72, 72, 73, 73, 74, 75, 75, 74, 74, 74, 74,
+ 74, 74, 74, 74, 75, 75, 75, 75, 76, 76, 76, 77, 77, 78, 78, 78, 78, 78,
+ 72, 72, 72, 72, 72, 72, 73, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75,
+ 75, 75, 76, 76, 76, 77, 77, 77, 78, 78, 78, 78, 78, 78, 72, 72, 72, 71,
+ 71, 72, 73, 74, 74, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 77,
+ 77, 77, 77, 78, 78, 78, 79, 79, 79, 79, 72, 72, 72, 71, 71, 72, 72, 73,
+ 74, 75, 75, 75, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 78, 78, 78,
+ 79, 79, 79, 79, 79, 79, 73, 72, 71, 71, 70, 71, 72, 73, 74, 75, 75, 76,
+ 76, 76, 77, 77, 77, 77, 77, 78, 78, 78, 78, 78, 79, 79, 79, 79, 80, 80,
+ 80, 80, 73, 72, 72, 71, 70, 71, 72, 73, 74, 75, 75, 76, 76, 77, 77, 77,
+ 77, 78, 78, 78, 78, 78, 79, 79, 79, 79, 80, 80, 80, 80, 80, 80, 73, 73,
+ 72, 71, 70, 71, 72, 73, 74, 75, 75, 76, 77, 77, 77, 78, 78, 78, 78, 78,
+ 79, 79, 79, 79, 79, 80, 80, 80, 80, 80, 80, 80, 74, 73, 72, 71, 71, 71,
+ 72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79, 80, 80,
+ 80, 80, 80, 81, 81, 81, 81, 81, 74, 73, 72, 71, 71, 72, 72, 73, 74, 75,
+ 76, 76, 77, 77, 78, 78, 79, 79, 79, 79, 80, 80, 80, 80, 80, 81, 81, 81,
+ 81, 81, 81, 81, 74, 74, 73, 72, 71, 72, 73, 74, 74, 75, 76, 76, 77, 78,
+ 78, 78, 79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 81, 82, 82, 82, 82,
+ 75, 74, 73, 72, 72, 72, 73, 74, 75, 75, 76, 77, 77, 78, 78, 79, 79, 79,
+ 80, 80, 80, 80, 81, 81, 81, 81, 81, 82, 82, 82, 82, 82, 75, 74, 74, 73,
+ 72, 73, 73, 74, 75, 75, 76, 77, 78, 78, 78, 79, 79, 80, 80, 80, 81, 81,
+ 81, 81, 81, 82, 82, 82, 82, 82, 82, 82, 76, 75, 74, 73, 72, 73, 74, 74,
+ 75, 76, 76, 77, 78, 78, 79, 79, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82,
+ 82, 82, 82, 82, 82, 82, 76, 75, 75, 74, 73, 74, 74, 75, 75, 76, 77, 77,
+ 78, 78, 79, 79, 80, 80, 80, 81, 81, 81, 82, 82, 82, 82, 82, 83, 83, 83,
+ 83, 83, 77, 76, 75, 74, 74, 74, 75, 75, 76, 76, 77, 77, 78, 79, 79, 80,
+ 80, 80, 81, 81, 81, 82, 82, 82, 82, 82, 83, 83, 83, 83, 83, 83, 77, 76,
+ 76, 75, 74, 75, 75, 76, 76, 77, 77, 78, 78, 79, 79, 80, 80, 81, 81, 81,
+ 82, 82, 82, 82, 83, 83, 83, 83, 83, 83, 83, 83, 78, 77, 76, 75, 75, 75,
+ 75, 76, 76, 77, 77, 78, 79, 79, 79, 80, 80, 81, 81, 81, 82, 82, 82, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 78, 77, 77, 76, 75, 75, 76, 76, 77, 77,
+ 78, 78, 79, 79, 80, 80, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, 83,
+ 84, 84, 84, 84, 79, 78, 77, 76, 76, 76, 76, 77, 77, 78, 78, 79, 79, 80,
+ 80, 80, 81, 81, 81, 82, 82, 82, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84,
+ 79, 78, 78, 77, 76, 76, 77, 77, 78, 78, 78, 79, 79, 80, 80, 81, 81, 81,
+ 82, 82, 82, 83, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84, 80, 79, 78, 77,
+ 77, 77, 77, 78, 78, 78, 79, 79, 80, 80, 80, 81, 81, 82, 82, 82, 82, 83,
+ 83, 83, 83, 84, 84, 84, 84, 84, 84, 84, 80, 79, 78, 77, 77, 77, 77, 78,
+ 78, 78, 79, 79, 80, 80, 80, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, 84,
+ 84, 84, 84, 84, 84, 84, 80, 79, 78, 77, 77, 77, 77, 78, 78, 78, 79, 79,
+ 80, 80, 80, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, 84, 84, 84, 84, 84,
+ 84, 84, 80, 79, 78, 77, 77, 77, 77, 78, 78, 78, 79, 79, 80, 80, 80, 81,
+ 81, 82, 82, 82, 82, 83, 83, 83, 83, 84, 84, 84, 84, 84, 84, 84 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 52, 60, 61, 65, 60, 63, 64, 66, 61, 64, 67, 69, 65, 66, 69, 71,
+ /* Size 8 */
+ 53, 49, 60, 61, 62, 64, 65, 67, 49, 56, 60, 59, 59, 61, 63, 64, 60, 60,
+ 63, 62, 62, 63, 64, 66, 61, 59, 62, 64, 65, 65, 66, 67, 62, 59, 62, 65,
+ 66, 67, 68, 69, 64, 61, 63, 65, 67, 68, 69, 70, 65, 63, 64, 66, 68, 69,
+ 70, 71, 67, 64, 66, 67, 69, 70, 71, 71,
+ /* Size 16 */
+ 53, 51, 49, 54, 60, 60, 61, 61, 62, 62, 63, 64, 65, 66, 67, 67, 51, 51,
+ 52, 56, 60, 60, 59, 60, 60, 61, 62, 63, 64, 64, 65, 65, 49, 52, 56, 58,
+ 60, 59, 58, 58, 59, 59, 60, 61, 62, 63, 64, 64, 54, 56, 58, 59, 61, 61,
+ 60, 60, 60, 61, 61, 62, 63, 64, 65, 65, 60, 60, 60, 61, 62, 62, 62, 62,
+ 62, 62, 63, 63, 64, 64, 65, 65, 60, 60, 59, 61, 62, 62, 63, 63, 63, 63,
+ 64, 64, 65, 65, 66, 66, 61, 59, 58, 60, 62, 63, 64, 64, 64, 65, 65, 65,
+ 66, 66, 67, 67, 61, 60, 58, 60, 62, 63, 64, 64, 65, 65, 66, 66, 67, 67,
+ 67, 67, 62, 60, 59, 60, 62, 63, 64, 65, 66, 66, 67, 67, 67, 68, 68, 68,
+ 62, 61, 59, 61, 62, 63, 65, 65, 66, 67, 67, 68, 68, 68, 69, 69, 63, 62,
+ 60, 61, 63, 64, 65, 66, 67, 67, 68, 68, 69, 69, 69, 69, 64, 63, 61, 62,
+ 63, 64, 65, 66, 67, 68, 68, 69, 69, 69, 70, 70, 65, 64, 62, 63, 64, 65,
+ 66, 67, 67, 68, 69, 69, 69, 70, 70, 70, 66, 64, 63, 64, 64, 65, 66, 67,
+ 68, 68, 69, 69, 70, 70, 70, 70, 67, 65, 64, 65, 65, 66, 67, 67, 68, 69,
+ 69, 70, 70, 70, 71, 71, 67, 65, 64, 65, 65, 66, 67, 67, 68, 69, 69, 70,
+ 70, 70, 71, 71,
+ /* Size 32 */
+ 53, 52, 51, 50, 49, 51, 53, 56, 59, 60, 60, 60, 60, 61, 61, 61, 61, 62,
+ 62, 63, 63, 63, 64, 64, 65, 65, 66, 66, 66, 66, 66, 66, 52, 51, 51, 50,
+ 50, 52, 54, 57, 59, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 62, 62, 63,
+ 63, 64, 64, 64, 65, 65, 66, 66, 66, 66, 51, 51, 51, 51, 52, 54, 55, 57,
+ 60, 59, 59, 59, 59, 59, 60, 60, 60, 60, 61, 61, 62, 62, 62, 63, 63, 64,
+ 64, 65, 65, 65, 65, 65, 50, 50, 51, 52, 54, 55, 56, 58, 60, 59, 59, 59,
+ 59, 59, 59, 59, 59, 60, 60, 60, 61, 61, 62, 62, 63, 63, 63, 64, 64, 64,
+ 64, 64, 49, 50, 52, 54, 55, 56, 58, 59, 60, 59, 59, 58, 58, 58, 58, 58,
+ 59, 59, 59, 60, 60, 61, 61, 61, 62, 62, 63, 63, 64, 64, 64, 64, 51, 52,
+ 54, 55, 56, 57, 58, 59, 60, 60, 60, 59, 59, 59, 59, 59, 59, 60, 60, 60,
+ 61, 61, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64, 53, 54, 55, 56, 58, 58,
+ 59, 60, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 62, 62, 62,
+ 63, 63, 64, 64, 64, 64, 64, 64, 56, 57, 57, 58, 59, 59, 60, 61, 61, 61,
+ 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 62, 62, 62, 63, 63, 63, 64, 64,
+ 65, 65, 65, 65, 59, 59, 60, 60, 60, 60, 61, 61, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 65, 65,
+ 60, 60, 59, 59, 59, 60, 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 63, 63, 63, 63, 63, 64, 64, 64, 65, 65, 65, 65, 65, 65, 60, 60, 59, 59,
+ 59, 60, 60, 61, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 64,
+ 64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 60, 60, 59, 59, 58, 59, 60, 61,
+ 62, 62, 62, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65,
+ 65, 66, 66, 66, 66, 66, 60, 60, 59, 59, 58, 59, 60, 61, 62, 62, 62, 63,
+ 63, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66,
+ 66, 66, 61, 60, 59, 59, 58, 59, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64,
+ 64, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 61, 60,
+ 60, 59, 58, 59, 60, 61, 62, 62, 63, 63, 64, 64, 64, 65, 65, 65, 65, 65,
+ 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 61, 60, 60, 59, 58, 59,
+ 60, 61, 62, 62, 63, 63, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 66, 67,
+ 67, 67, 67, 67, 67, 67, 67, 67, 61, 61, 60, 59, 59, 59, 60, 61, 62, 62,
+ 63, 63, 64, 64, 65, 65, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 68,
+ 68, 68, 68, 68, 62, 61, 60, 60, 59, 60, 60, 61, 62, 62, 63, 64, 64, 65,
+ 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68,
+ 62, 61, 61, 60, 59, 60, 61, 61, 62, 63, 63, 64, 64, 65, 65, 66, 66, 66,
+ 66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 63, 62, 61, 60,
+ 60, 60, 61, 61, 62, 63, 63, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67, 67,
+ 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 63, 62, 62, 61, 60, 61, 61, 62,
+ 62, 63, 63, 64, 65, 65, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 68, 68,
+ 69, 69, 69, 69, 69, 69, 63, 63, 62, 61, 61, 61, 62, 62, 63, 63, 64, 64,
+ 65, 65, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69,
+ 69, 69, 64, 63, 62, 62, 61, 61, 62, 62, 63, 63, 64, 65, 65, 65, 66, 66,
+ 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 64, 64,
+ 63, 62, 61, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 67, 68, 68,
+ 68, 68, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 65, 64, 63, 63, 62, 62,
+ 63, 63, 64, 64, 64, 65, 65, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69,
+ 69, 69, 70, 70, 70, 70, 70, 70, 65, 64, 64, 63, 62, 63, 63, 63, 64, 64,
+ 65, 65, 66, 66, 66, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70,
+ 70, 70, 70, 70, 66, 65, 64, 63, 63, 63, 64, 64, 64, 65, 65, 65, 66, 66,
+ 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70,
+ 66, 65, 65, 64, 63, 64, 64, 64, 65, 65, 65, 66, 66, 67, 67, 67, 68, 68,
+ 68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 66, 66, 65, 64,
+ 64, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69,
+ 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 66, 66, 65, 64, 64, 64, 64, 65,
+ 65, 65, 66, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70,
+ 70, 70, 70, 70, 70, 70, 66, 66, 65, 64, 64, 64, 64, 65, 65, 65, 66, 66,
+ 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70,
+ 70, 70, 66, 66, 65, 64, 64, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67,
+ 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 65, 72, 76, 65, 71, 74, 76, 72, 74, 77, 78, 76, 76, 78, 79,
+ /* Size 8 */
+ 64, 60, 61, 65, 68, 71, 73, 74, 60, 62, 61, 64, 67, 69, 71, 73, 61, 61,
+ 66, 68, 69, 71, 72, 73, 65, 64, 68, 70, 71, 72, 73, 74, 68, 67, 69, 71,
+ 73, 74, 74, 75, 71, 69, 71, 72, 74, 74, 75, 75, 73, 71, 72, 73, 74, 75,
+ 75, 76, 74, 73, 73, 74, 75, 75, 76, 76,
+ /* Size 16 */
+ 64, 62, 60, 60, 61, 63, 65, 67, 68, 70, 71, 72, 73, 73, 74, 74, 62, 61,
+ 61, 61, 61, 63, 64, 66, 68, 69, 70, 71, 72, 73, 73, 73, 60, 61, 62, 62,
+ 61, 62, 64, 65, 67, 68, 69, 70, 71, 72, 73, 73, 60, 61, 62, 63, 64, 65,
+ 66, 67, 68, 69, 70, 71, 72, 73, 73, 73, 61, 61, 61, 64, 66, 67, 68, 68,
+ 69, 70, 71, 72, 72, 73, 73, 73, 63, 63, 62, 65, 67, 68, 69, 70, 70, 71,
+ 72, 72, 73, 73, 74, 74, 65, 64, 64, 66, 68, 69, 70, 71, 71, 72, 72, 73,
+ 73, 74, 74, 74, 67, 66, 65, 67, 68, 70, 71, 71, 72, 73, 73, 73, 74, 74,
+ 74, 74, 68, 68, 67, 68, 69, 70, 71, 72, 73, 73, 74, 74, 74, 74, 75, 75,
+ 70, 69, 68, 69, 70, 71, 72, 73, 73, 74, 74, 74, 75, 75, 75, 75, 71, 70,
+ 69, 70, 71, 72, 72, 73, 74, 74, 74, 75, 75, 75, 75, 75, 72, 71, 70, 71,
+ 72, 72, 73, 73, 74, 74, 75, 75, 75, 75, 75, 75, 73, 72, 71, 72, 72, 73,
+ 73, 74, 74, 75, 75, 75, 75, 75, 76, 76, 73, 73, 72, 73, 73, 73, 74, 74,
+ 74, 75, 75, 75, 75, 76, 76, 76, 74, 73, 73, 73, 73, 74, 74, 74, 75, 75,
+ 75, 75, 76, 76, 76, 76, 74, 73, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75,
+ 76, 76, 76, 76,
+ /* Size 32 */
+ 64, 63, 62, 61, 60, 60, 60, 61, 61, 62, 63, 64, 65, 66, 67, 67, 68, 69,
+ 70, 70, 71, 71, 72, 72, 73, 73, 73, 74, 74, 74, 74, 74, 63, 62, 62, 61,
+ 60, 61, 61, 61, 61, 62, 63, 64, 65, 65, 66, 67, 68, 69, 69, 70, 70, 71,
+ 71, 72, 72, 73, 73, 73, 74, 74, 74, 74, 62, 62, 61, 61, 61, 61, 61, 61,
+ 61, 62, 63, 63, 64, 65, 66, 67, 68, 68, 69, 69, 70, 71, 71, 72, 72, 72,
+ 73, 73, 73, 73, 73, 73, 61, 61, 61, 61, 61, 61, 61, 61, 61, 62, 63, 63,
+ 64, 65, 65, 66, 67, 68, 68, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 73,
+ 73, 73, 60, 60, 61, 61, 62, 62, 62, 61, 61, 62, 62, 63, 64, 64, 65, 66,
+ 67, 67, 68, 69, 69, 70, 70, 71, 71, 72, 72, 73, 73, 73, 73, 73, 60, 61,
+ 61, 61, 62, 62, 62, 62, 62, 63, 63, 64, 65, 65, 66, 67, 67, 68, 69, 69,
+ 70, 70, 71, 71, 72, 72, 72, 73, 73, 73, 73, 73, 60, 61, 61, 61, 62, 62,
+ 63, 63, 64, 64, 65, 65, 66, 66, 67, 67, 68, 69, 69, 70, 70, 71, 71, 71,
+ 72, 72, 73, 73, 73, 73, 73, 73, 61, 61, 61, 61, 61, 62, 63, 64, 65, 65,
+ 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 73, 73,
+ 73, 73, 73, 73, 61, 61, 61, 61, 61, 62, 64, 65, 66, 66, 67, 67, 68, 68,
+ 68, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73,
+ 62, 62, 62, 62, 62, 63, 64, 65, 66, 67, 67, 68, 68, 69, 69, 69, 70, 70,
+ 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 63, 63, 63, 63,
+ 62, 63, 65, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72,
+ 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 64, 64, 63, 63, 63, 64, 65, 66,
+ 67, 68, 68, 69, 69, 70, 70, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73, 73,
+ 74, 74, 74, 74, 74, 74, 65, 65, 64, 64, 64, 65, 66, 67, 68, 68, 69, 69,
+ 70, 70, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74,
+ 74, 74, 66, 65, 65, 65, 64, 65, 66, 67, 68, 69, 69, 70, 70, 71, 71, 71,
+ 72, 72, 72, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 74, 67, 66,
+ 66, 65, 65, 66, 67, 68, 68, 69, 70, 70, 71, 71, 71, 72, 72, 72, 73, 73,
+ 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 74, 74, 67, 67, 67, 66, 66, 67,
+ 67, 68, 69, 69, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74,
+ 74, 74, 74, 74, 75, 75, 75, 75, 68, 68, 68, 67, 67, 67, 68, 69, 69, 70,
+ 70, 71, 71, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 75,
+ 75, 75, 75, 75, 69, 69, 68, 68, 67, 68, 69, 69, 70, 70, 71, 71, 72, 72,
+ 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75,
+ 70, 69, 69, 68, 68, 69, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 73, 73,
+ 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 70, 70, 69, 69,
+ 69, 69, 70, 70, 71, 71, 71, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74,
+ 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 71, 70, 70, 70, 69, 70, 70, 71,
+ 71, 71, 72, 72, 72, 73, 73, 73, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75,
+ 75, 75, 75, 75, 75, 75, 71, 71, 71, 70, 70, 70, 71, 71, 71, 72, 72, 72,
+ 73, 73, 73, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75,
+ 75, 75, 72, 71, 71, 71, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 74,
+ 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 72, 72,
+ 72, 71, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 75,
+ 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 73, 72, 72, 72, 71, 72,
+ 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75,
+ 75, 75, 75, 75, 76, 76, 76, 76, 73, 73, 72, 72, 72, 72, 72, 72, 73, 73,
+ 73, 73, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76,
+ 76, 76, 76, 76, 73, 73, 73, 72, 72, 72, 73, 73, 73, 73, 73, 74, 74, 74,
+ 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76,
+ 74, 73, 73, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 75, 75,
+ 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 74, 74, 73, 73,
+ 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75,
+ 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 74, 74, 73, 73, 73, 73, 73, 73,
+ 73, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76,
+ 76, 76, 76, 76, 76, 76, 74, 74, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74,
+ 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76,
+ 76, 76, 74, 74, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 75,
+ 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 55, 57, 62, 66, 57, 61, 65, 67, 62, 65, 67, 68, 66, 67, 68, 69,
+ /* Size 8 */
+ 58, 54, 55, 59, 62, 64, 66, 67, 54, 56, 55, 57, 60, 63, 65, 66, 55, 55,
+ 60, 61, 63, 64, 66, 67, 59, 57, 61, 64, 65, 66, 67, 68, 62, 60, 63, 65,
+ 66, 67, 68, 68, 64, 63, 64, 66, 67, 68, 68, 69, 66, 65, 66, 67, 68, 68,
+ 69, 69, 67, 66, 67, 68, 68, 69, 69, 69,
+ /* Size 16 */
+ 57, 55, 53, 54, 55, 56, 58, 60, 62, 63, 64, 65, 66, 66, 67, 67, 55, 55,
+ 54, 55, 55, 56, 58, 59, 61, 62, 63, 64, 65, 66, 66, 66, 53, 54, 55, 55,
+ 55, 56, 57, 58, 60, 61, 63, 63, 64, 65, 66, 66, 54, 55, 55, 56, 57, 58,
+ 59, 60, 61, 62, 63, 64, 65, 66, 66, 66, 55, 55, 55, 57, 59, 60, 61, 62,
+ 62, 63, 64, 65, 65, 66, 66, 66, 56, 56, 56, 58, 60, 61, 62, 63, 63, 64,
+ 65, 65, 66, 66, 67, 67, 58, 58, 57, 59, 61, 62, 63, 64, 65, 65, 66, 66,
+ 66, 67, 67, 67, 60, 59, 58, 60, 62, 63, 64, 65, 65, 66, 66, 66, 67, 67,
+ 67, 67, 62, 61, 60, 61, 62, 63, 65, 65, 66, 66, 67, 67, 67, 67, 68, 68,
+ 63, 62, 61, 62, 63, 64, 65, 66, 66, 67, 67, 67, 67, 68, 68, 68, 64, 63,
+ 63, 63, 64, 65, 66, 66, 67, 67, 67, 68, 68, 68, 68, 68, 65, 64, 63, 64,
+ 65, 65, 66, 66, 67, 67, 68, 68, 68, 68, 68, 68, 66, 65, 64, 65, 65, 66,
+ 66, 67, 67, 67, 68, 68, 68, 68, 68, 68, 66, 66, 65, 66, 66, 66, 67, 67,
+ 67, 68, 68, 68, 68, 68, 69, 69, 67, 66, 66, 66, 66, 67, 67, 67, 68, 68,
+ 68, 68, 68, 69, 69, 69, 67, 66, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68,
+ 68, 69, 69, 69,
+ /* Size 32 */
+ 57, 56, 55, 54, 53, 54, 54, 54, 54, 55, 56, 57, 58, 59, 60, 60, 61, 62,
+ 63, 63, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67, 67, 56, 56, 55, 54,
+ 54, 54, 54, 54, 54, 55, 56, 57, 58, 58, 59, 60, 61, 62, 62, 63, 63, 64,
+ 64, 65, 65, 65, 66, 66, 66, 66, 66, 66, 55, 55, 55, 54, 54, 54, 54, 54,
+ 55, 55, 56, 57, 57, 58, 59, 60, 61, 61, 62, 62, 63, 63, 64, 64, 65, 65,
+ 65, 66, 66, 66, 66, 66, 54, 54, 54, 55, 55, 55, 55, 55, 55, 55, 56, 56,
+ 57, 58, 59, 59, 60, 61, 61, 62, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66,
+ 66, 66, 53, 54, 54, 55, 55, 55, 55, 55, 55, 55, 56, 56, 57, 57, 58, 59,
+ 60, 60, 61, 62, 62, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66, 66, 54, 54,
+ 54, 55, 55, 55, 55, 56, 56, 56, 57, 57, 58, 58, 59, 60, 60, 61, 61, 62,
+ 63, 63, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 54, 54, 54, 55, 55, 55,
+ 56, 56, 57, 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 63, 63, 63, 64, 64,
+ 65, 65, 65, 66, 66, 66, 66, 66, 54, 54, 54, 55, 55, 56, 56, 57, 58, 58,
+ 59, 59, 60, 60, 61, 61, 62, 62, 62, 63, 63, 64, 64, 65, 65, 65, 65, 66,
+ 66, 66, 66, 66, 54, 54, 55, 55, 55, 56, 57, 58, 59, 59, 60, 60, 61, 61,
+ 61, 62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 66, 66,
+ 55, 55, 55, 55, 55, 56, 57, 58, 59, 60, 60, 61, 61, 62, 62, 62, 63, 63,
+ 63, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 56, 56, 56, 56,
+ 56, 57, 58, 59, 60, 60, 61, 61, 62, 62, 63, 63, 63, 64, 64, 64, 65, 65,
+ 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 57, 57, 57, 56, 56, 57, 58, 59,
+ 60, 61, 61, 62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66,
+ 66, 66, 67, 67, 67, 67, 58, 58, 57, 57, 57, 58, 59, 60, 61, 61, 62, 62,
+ 63, 63, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 67, 67, 67,
+ 67, 67, 59, 58, 58, 58, 57, 58, 59, 60, 61, 62, 62, 63, 63, 64, 64, 64,
+ 65, 65, 65, 65, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 60, 59,
+ 59, 59, 58, 59, 60, 61, 61, 62, 63, 63, 64, 64, 64, 65, 65, 65, 65, 66,
+ 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 60, 60, 60, 59, 59, 60,
+ 60, 61, 62, 62, 63, 63, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 66, 67,
+ 67, 67, 67, 67, 67, 67, 67, 67, 61, 61, 61, 60, 60, 60, 61, 62, 62, 63,
+ 63, 64, 64, 65, 65, 65, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67,
+ 67, 67, 67, 67, 62, 62, 61, 61, 60, 61, 61, 62, 63, 63, 64, 64, 65, 65,
+ 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68,
+ 63, 62, 62, 61, 61, 61, 62, 62, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66,
+ 66, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 63, 63, 62, 62,
+ 62, 62, 63, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67,
+ 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 64, 63, 63, 63, 62, 63, 63, 63,
+ 64, 64, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 68, 68,
+ 68, 68, 68, 68, 68, 68, 64, 64, 63, 63, 63, 63, 63, 64, 64, 64, 65, 65,
+ 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 65, 64, 64, 64, 63, 64, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66,
+ 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 65, 65,
+ 64, 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67,
+ 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 65, 65, 65, 65, 64, 64,
+ 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68,
+ 68, 68, 68, 68, 68, 68, 68, 68, 66, 65, 65, 65, 65, 65, 65, 65, 65, 66,
+ 66, 66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 68, 68, 66, 66, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 67,
+ 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+ 66, 66, 66, 66, 65, 65, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67,
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 67, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68,
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 67, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 68, 68, 68, 68, 67, 66, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67,
+ 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 67, 66, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67,
+ 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 71, 72, 74, 71, 73, 73, 75, 72, 73, 76, 77, 74, 75, 77, 78,
+ /* Size 8 */
+ 64, 61, 69, 70, 71, 72, 73, 74, 61, 66, 69, 68, 69, 70, 71, 72, 69, 69,
+ 71, 71, 71, 71, 72, 73, 70, 68, 71, 72, 73, 73, 74, 74, 71, 69, 71, 73,
+ 74, 74, 75, 75, 72, 70, 71, 73, 74, 75, 75, 76, 73, 71, 72, 74, 75, 75,
+ 76, 76, 74, 72, 73, 74, 75, 76, 76, 77,
+ /* Size 16 */
+ 64, 62, 61, 65, 69, 70, 70, 70, 71, 71, 72, 72, 73, 74, 74, 74, 62, 63,
+ 63, 66, 69, 69, 69, 69, 70, 70, 71, 71, 72, 73, 73, 73, 61, 63, 66, 68,
+ 69, 69, 68, 68, 69, 69, 70, 70, 71, 72, 72, 72, 65, 66, 68, 69, 70, 70,
+ 69, 70, 70, 70, 70, 71, 72, 72, 73, 73, 69, 69, 69, 70, 71, 71, 71, 71,
+ 71, 71, 71, 72, 72, 73, 73, 73, 70, 69, 69, 70, 71, 71, 71, 72, 72, 72,
+ 72, 72, 73, 73, 74, 74, 70, 69, 68, 69, 71, 71, 72, 72, 73, 73, 73, 73,
+ 74, 74, 74, 74, 70, 69, 68, 70, 71, 72, 72, 73, 73, 73, 74, 74, 74, 74,
+ 75, 75, 71, 70, 69, 70, 71, 72, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75,
+ 71, 70, 69, 70, 71, 72, 73, 73, 74, 74, 75, 75, 75, 75, 76, 76, 72, 71,
+ 70, 70, 71, 72, 73, 74, 74, 75, 75, 75, 75, 76, 76, 76, 72, 71, 70, 71,
+ 72, 72, 73, 74, 74, 75, 75, 75, 76, 76, 76, 76, 73, 72, 71, 72, 72, 73,
+ 74, 74, 75, 75, 75, 76, 76, 76, 76, 76, 74, 73, 72, 72, 73, 73, 74, 74,
+ 75, 75, 76, 76, 76, 76, 77, 77, 74, 73, 72, 73, 73, 74, 74, 75, 75, 76,
+ 76, 76, 76, 77, 77, 77, 74, 73, 72, 73, 73, 74, 74, 75, 75, 76, 76, 76,
+ 76, 77, 77, 77,
+ /* Size 32 */
+ 64, 63, 62, 61, 61, 63, 65, 67, 69, 69, 70, 70, 70, 70, 70, 70, 71, 71,
+ 71, 72, 72, 72, 72, 73, 73, 73, 74, 74, 74, 74, 74, 74, 63, 63, 62, 62,
+ 62, 64, 65, 67, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 71, 71, 71, 72,
+ 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 62, 62, 63, 63, 63, 65, 66, 68,
+ 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 72, 72, 72,
+ 73, 73, 73, 73, 73, 73, 61, 62, 63, 64, 65, 66, 67, 68, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72, 72, 72, 73, 73,
+ 73, 73, 61, 62, 63, 65, 66, 67, 68, 69, 69, 69, 69, 68, 68, 68, 68, 68,
+ 69, 69, 69, 69, 70, 70, 70, 71, 71, 71, 72, 72, 72, 72, 72, 72, 63, 64,
+ 65, 66, 67, 68, 68, 69, 70, 70, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70,
+ 70, 70, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 65, 65, 66, 67, 68, 68,
+ 69, 70, 70, 70, 70, 70, 69, 70, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71,
+ 72, 72, 72, 72, 73, 73, 73, 73, 67, 67, 68, 68, 69, 69, 70, 70, 71, 71,
+ 70, 70, 70, 70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 72, 72, 72, 72, 73,
+ 73, 73, 73, 73, 69, 69, 69, 69, 69, 70, 70, 71, 71, 71, 71, 71, 71, 71,
+ 71, 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73,
+ 69, 69, 69, 69, 69, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
+ 71, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 70, 69, 69, 69,
+ 69, 69, 70, 70, 71, 71, 71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 72,
+ 72, 73, 73, 73, 73, 73, 74, 74, 74, 74, 70, 69, 69, 69, 68, 69, 70, 70,
+ 71, 71, 71, 72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73,
+ 74, 74, 74, 74, 74, 74, 70, 69, 69, 69, 68, 69, 69, 70, 71, 71, 71, 72,
+ 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74,
+ 74, 74, 70, 70, 69, 69, 68, 69, 70, 70, 71, 71, 72, 72, 72, 72, 73, 73,
+ 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 70, 70,
+ 69, 69, 68, 69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73,
+ 74, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 70, 70, 69, 69, 68, 69,
+ 70, 70, 71, 71, 72, 72, 72, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74,
+ 74, 74, 75, 75, 75, 75, 75, 75, 71, 70, 70, 69, 69, 69, 70, 70, 71, 71,
+ 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75,
+ 75, 75, 75, 75, 71, 70, 70, 69, 69, 69, 70, 70, 71, 71, 72, 72, 73, 73,
+ 73, 73, 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75,
+ 71, 71, 70, 70, 69, 70, 70, 71, 71, 71, 72, 72, 73, 73, 73, 74, 74, 74,
+ 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 72, 71, 70, 70,
+ 69, 70, 70, 71, 71, 72, 72, 72, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75,
+ 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 72, 71, 71, 70, 70, 70, 70, 71,
+ 71, 72, 72, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 76,
+ 76, 76, 76, 76, 76, 76, 72, 72, 71, 71, 70, 70, 71, 71, 72, 72, 72, 73,
+ 73, 73, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76,
+ 76, 76, 72, 72, 71, 71, 70, 71, 71, 71, 72, 72, 72, 73, 73, 74, 74, 74,
+ 74, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 76, 73, 72,
+ 72, 71, 71, 71, 71, 72, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75,
+ 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 73, 73, 72, 72, 71, 71,
+ 72, 72, 72, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, 75, 76, 76, 76,
+ 76, 76, 76, 76, 76, 76, 76, 76, 73, 73, 72, 72, 71, 72, 72, 72, 72, 73,
+ 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76,
+ 77, 77, 77, 77, 74, 73, 73, 72, 72, 72, 72, 72, 73, 73, 73, 74, 74, 74,
+ 74, 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77,
+ 74, 73, 73, 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 75, 75, 75, 75,
+ 75, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 74, 74, 73, 73,
+ 72, 73, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, 76, 76, 76, 76,
+ 76, 76, 76, 77, 77, 77, 77, 77, 77, 77, 74, 74, 73, 73, 72, 73, 73, 73,
+ 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 77,
+ 77, 77, 77, 77, 77, 77, 74, 74, 73, 73, 72, 73, 73, 73, 73, 73, 74, 74,
+ 74, 74, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77,
+ 77, 77, 74, 74, 73, 73, 72, 73, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75,
+ 75, 75, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 77 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 55, 61, 62, 65, 61, 63, 64, 65, 62, 64, 66, 67, 65, 65, 67, 68,
+ /* Size 8 */
+ 57, 53, 61, 62, 63, 64, 65, 66, 53, 59, 62, 60, 61, 62, 63, 64, 61, 62,
+ 63, 63, 63, 63, 64, 65, 62, 60, 63, 64, 65, 65, 66, 66, 63, 61, 63, 65,
+ 66, 66, 67, 67, 64, 62, 63, 65, 66, 67, 67, 68, 65, 63, 64, 66, 67, 67,
+ 68, 68, 66, 64, 65, 66, 67, 68, 68, 69,
+ /* Size 16 */
+ 56, 55, 53, 57, 61, 61, 62, 62, 62, 63, 64, 64, 65, 65, 66, 66, 55, 55,
+ 56, 58, 61, 61, 61, 61, 61, 62, 63, 63, 64, 64, 65, 65, 53, 56, 58, 60,
+ 61, 61, 60, 60, 61, 61, 62, 62, 63, 63, 64, 64, 57, 58, 60, 61, 62, 62,
+ 61, 61, 62, 62, 62, 63, 63, 64, 64, 64, 61, 61, 61, 62, 63, 63, 63, 63,
+ 63, 63, 63, 64, 64, 64, 65, 65, 61, 61, 61, 62, 63, 63, 63, 63, 63, 64,
+ 64, 64, 65, 65, 65, 65, 62, 61, 60, 61, 63, 63, 64, 64, 64, 64, 65, 65,
+ 65, 65, 66, 66, 62, 61, 60, 61, 63, 63, 64, 64, 65, 65, 65, 65, 66, 66,
+ 66, 66, 62, 61, 61, 62, 63, 63, 64, 65, 65, 66, 66, 66, 66, 66, 67, 67,
+ 63, 62, 61, 62, 63, 64, 64, 65, 66, 66, 66, 66, 67, 67, 67, 67, 64, 63,
+ 62, 62, 63, 64, 65, 65, 66, 66, 67, 67, 67, 67, 67, 67, 64, 63, 62, 63,
+ 64, 64, 65, 65, 66, 66, 67, 67, 67, 68, 68, 68, 65, 64, 63, 63, 64, 65,
+ 65, 66, 66, 67, 67, 67, 68, 68, 68, 68, 65, 64, 63, 64, 64, 65, 65, 66,
+ 66, 67, 67, 68, 68, 68, 68, 68, 66, 65, 64, 64, 65, 65, 66, 66, 67, 67,
+ 67, 68, 68, 68, 68, 68, 66, 65, 64, 64, 65, 65, 66, 66, 67, 67, 67, 68,
+ 68, 68, 68, 68,
+ /* Size 32 */
+ 56, 55, 55, 54, 53, 55, 57, 59, 61, 61, 61, 61, 62, 62, 62, 62, 62, 63,
+ 63, 63, 63, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 55, 55, 55, 54,
+ 54, 56, 57, 59, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 63, 63, 63,
+ 63, 64, 64, 64, 65, 65, 65, 65, 65, 65, 55, 55, 55, 55, 55, 57, 58, 60,
+ 61, 61, 61, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 64, 64,
+ 64, 64, 65, 65, 65, 65, 54, 54, 55, 56, 57, 58, 59, 60, 61, 61, 61, 61,
+ 60, 61, 61, 61, 61, 61, 61, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64,
+ 64, 64, 53, 54, 55, 57, 58, 59, 60, 60, 61, 61, 61, 60, 60, 60, 60, 60,
+ 60, 61, 61, 61, 61, 62, 62, 62, 63, 63, 63, 64, 64, 64, 64, 64, 55, 56,
+ 57, 58, 59, 60, 60, 61, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 62,
+ 62, 62, 62, 63, 63, 63, 64, 64, 64, 64, 64, 64, 57, 57, 58, 59, 60, 60,
+ 61, 61, 62, 62, 62, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 63, 63,
+ 63, 64, 64, 64, 64, 64, 64, 64, 59, 59, 60, 60, 60, 61, 61, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64,
+ 64, 64, 64, 64, 61, 61, 61, 61, 61, 62, 62, 62, 63, 63, 63, 63, 62, 62,
+ 62, 62, 62, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65,
+ 61, 61, 61, 61, 61, 61, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 61, 61, 61, 61,
+ 61, 61, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64,
+ 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 61, 61, 61, 61, 60, 61, 61, 62,
+ 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65,
+ 65, 65, 65, 65, 65, 65, 62, 61, 61, 60, 60, 61, 61, 62, 62, 63, 63, 63,
+ 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66,
+ 66, 66, 62, 61, 61, 61, 60, 61, 61, 62, 62, 63, 63, 63, 64, 64, 64, 64,
+ 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 62, 62,
+ 61, 61, 60, 61, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65,
+ 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 62, 62, 61, 61, 60, 61,
+ 61, 62, 62, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 66, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 62, 62, 61, 61, 60, 61, 61, 62, 62, 63,
+ 63, 64, 64, 64, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 67, 67, 67, 67, 63, 62, 62, 61, 61, 61, 62, 62, 63, 63, 63, 64, 64, 64,
+ 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67,
+ 63, 62, 62, 61, 61, 61, 62, 62, 63, 63, 64, 64, 64, 65, 65, 65, 65, 66,
+ 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 63, 63, 62, 62,
+ 61, 62, 62, 62, 63, 63, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 66, 66,
+ 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 63, 63, 62, 62, 61, 62, 62, 63,
+ 63, 63, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67,
+ 67, 67, 67, 67, 67, 67, 64, 63, 63, 62, 62, 62, 62, 63, 63, 64, 64, 64,
+ 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67,
+ 67, 67, 64, 63, 63, 63, 62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 66,
+ 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 64, 64,
+ 63, 63, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67,
+ 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 65, 64, 64, 63, 63, 63,
+ 63, 64, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67,
+ 67, 68, 68, 68, 68, 68, 68, 68, 65, 64, 64, 63, 63, 63, 64, 64, 64, 64,
+ 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68,
+ 68, 68, 68, 68, 65, 65, 64, 64, 63, 64, 64, 64, 64, 65, 65, 65, 65, 66,
+ 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68,
+ 65, 65, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67,
+ 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 66, 65, 65, 64,
+ 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67,
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 66, 65, 65, 64, 64, 64, 64, 64,
+ 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68,
+ 68, 68, 68, 68, 68, 68, 66, 65, 65, 64, 64, 64, 64, 64, 65, 65, 65, 65,
+ 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 66, 65, 65, 64, 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66,
+ 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 65, 68, 70, 65, 67, 69, 70, 68, 69, 70, 71, 70, 70, 71, 71,
+ /* Size 8 */
+ 64, 62, 62, 64, 66, 67, 68, 69, 62, 63, 63, 64, 65, 67, 68, 68, 62, 63,
+ 65, 66, 67, 67, 68, 69, 64, 64, 66, 67, 68, 68, 69, 69, 66, 65, 67, 68,
+ 68, 69, 69, 69, 67, 67, 67, 68, 69, 69, 69, 69, 68, 68, 68, 69, 69, 69,
+ 69, 69, 69, 68, 69, 69, 69, 69, 69, 70,
+ /* Size 16 */
+ 64, 63, 62, 62, 62, 63, 64, 65, 66, 67, 67, 68, 68, 68, 69, 69, 63, 63,
+ 62, 62, 63, 63, 64, 65, 66, 66, 67, 67, 68, 68, 69, 69, 62, 62, 63, 63,
+ 63, 63, 64, 65, 65, 66, 67, 67, 68, 68, 68, 68, 62, 62, 63, 63, 64, 64,
+ 65, 65, 66, 67, 67, 67, 68, 68, 68, 68, 62, 63, 63, 64, 65, 65, 66, 66,
+ 67, 67, 67, 68, 68, 68, 69, 69, 63, 63, 63, 64, 65, 66, 66, 67, 67, 67,
+ 68, 68, 68, 68, 69, 69, 64, 64, 64, 65, 66, 66, 67, 67, 68, 68, 68, 68,
+ 69, 69, 69, 69, 65, 65, 65, 65, 66, 67, 67, 68, 68, 68, 68, 69, 69, 69,
+ 69, 69, 66, 66, 65, 66, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69,
+ 67, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 67, 67,
+ 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 68, 67, 67, 67,
+ 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 68, 68, 68, 68, 68, 68,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 68, 68, 68, 68, 68, 68, 69, 69,
+ 69, 69, 69, 69, 69, 69, 70, 70, 69, 69, 68, 68, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 70, 70, 70, 69, 69, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 70, 70, 70,
+ /* Size 32 */
+ 64, 63, 63, 62, 62, 62, 62, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 66,
+ 67, 67, 67, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 63, 63, 63, 62,
+ 62, 62, 62, 62, 62, 63, 63, 64, 64, 65, 65, 66, 66, 66, 67, 67, 67, 67,
+ 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 63, 63, 63, 62, 62, 62, 62, 62,
+ 63, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66, 67, 67, 67, 67, 68, 68, 68,
+ 68, 68, 69, 69, 69, 69, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 64,
+ 64, 64, 65, 65, 66, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 65, 65,
+ 65, 66, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 62, 62,
+ 62, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 65, 65, 65, 66, 66, 66, 67,
+ 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 62, 62, 62, 63, 63, 63,
+ 63, 63, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 67, 67, 67, 67, 67, 68,
+ 68, 68, 68, 68, 68, 68, 68, 68, 62, 62, 62, 63, 63, 63, 63, 64, 64, 65,
+ 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68,
+ 68, 68, 68, 68, 62, 62, 63, 63, 63, 63, 64, 64, 65, 65, 65, 66, 66, 66,
+ 66, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+ 63, 63, 63, 63, 63, 63, 64, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67,
+ 67, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 63, 63, 63, 63,
+ 63, 64, 64, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68,
+ 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 64, 64, 64, 64, 63, 64, 65, 65,
+ 66, 66, 66, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+ 69, 69, 69, 69, 69, 69, 64, 64, 64, 64, 64, 64, 65, 65, 66, 66, 66, 67,
+ 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69,
+ 69, 69, 65, 65, 65, 64, 64, 65, 65, 66, 66, 66, 67, 67, 67, 67, 67, 68,
+ 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 65, 65,
+ 65, 65, 65, 65, 65, 66, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68,
+ 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 66, 66, 65, 65, 65, 65,
+ 66, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 66, 66, 66, 66, 65, 66, 66, 66, 67, 67,
+ 67, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 68, 68, 68,
+ 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 67, 67, 66, 66, 66, 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 67, 67, 67, 67,
+ 66, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 67, 67, 67, 67, 67, 67, 67, 67,
+ 67, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 68, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68,
+ 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 68, 68, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 68, 68,
+ 68, 68, 67, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 68, 68, 68, 68, 68, 68,
+ 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 70, 70, 70, 70, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70,
+ 69, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 69, 69, 69, 68,
+ 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 69, 69, 69, 68, 68, 68, 68, 68,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70,
+ 70, 70, 70, 70, 70, 70, 69, 69, 69, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70,
+ 70, 70, 69, 69, 69, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 59, 60, 63, 65, 60, 63, 64, 65, 63, 64, 66, 66, 65, 65, 66, 66,
+ /* Size 8 */
+ 61, 59, 59, 61, 63, 64, 65, 66, 59, 60, 59, 61, 62, 64, 65, 65, 59, 59,
+ 62, 63, 64, 64, 65, 65, 61, 61, 63, 64, 65, 65, 65, 66, 63, 62, 64, 65,
+ 65, 66, 66, 66, 64, 64, 64, 65, 66, 66, 66, 66, 65, 65, 65, 65, 66, 66,
+ 66, 66, 66, 65, 65, 66, 66, 66, 66, 67,
+ /* Size 16 */
+ 61, 60, 58, 59, 59, 60, 61, 62, 63, 63, 64, 64, 65, 65, 65, 65, 60, 59,
+ 59, 59, 59, 60, 61, 62, 62, 63, 64, 64, 65, 65, 65, 65, 58, 59, 60, 59,
+ 59, 60, 60, 61, 62, 63, 63, 64, 64, 65, 65, 65, 59, 59, 59, 60, 60, 61,
+ 61, 62, 63, 63, 64, 64, 65, 65, 65, 65, 59, 59, 59, 60, 62, 62, 63, 63,
+ 63, 64, 64, 64, 65, 65, 65, 65, 60, 60, 60, 61, 62, 63, 63, 63, 64, 64,
+ 64, 65, 65, 65, 65, 65, 61, 61, 60, 61, 63, 63, 64, 64, 64, 65, 65, 65,
+ 65, 65, 66, 66, 62, 62, 61, 62, 63, 63, 64, 64, 65, 65, 65, 65, 65, 66,
+ 66, 66, 63, 62, 62, 63, 63, 64, 64, 65, 65, 65, 65, 65, 66, 66, 66, 66,
+ 63, 63, 63, 63, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 64, 64,
+ 63, 64, 64, 64, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 64, 64, 64, 64,
+ 64, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 65, 65, 64, 65, 65, 65,
+ 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66,
+ 66, 66, 66, 66,
+ /* Size 32 */
+ 61, 60, 59, 59, 58, 59, 59, 59, 59, 60, 60, 61, 61, 61, 62, 62, 63, 63,
+ 63, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 60, 60, 59, 59,
+ 59, 59, 59, 59, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63, 63, 63, 64, 64,
+ 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 60, 60, 61, 61, 62, 62, 62, 63, 63, 63, 64, 64, 64, 64, 65, 65,
+ 65, 65, 65, 65, 65, 65, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60,
+ 61, 61, 61, 62, 62, 62, 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, 65,
+ 65, 65, 58, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 61, 61, 62,
+ 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 59, 59,
+ 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 62, 62, 62, 63, 63, 63,
+ 63, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 59, 59, 59, 59, 59, 60,
+ 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64,
+ 64, 65, 65, 65, 65, 65, 65, 65, 59, 59, 59, 59, 59, 60, 60, 60, 61, 61,
+ 61, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65,
+ 65, 65, 65, 65, 59, 59, 59, 59, 59, 60, 60, 61, 62, 62, 62, 62, 62, 63,
+ 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65,
+ 60, 59, 59, 59, 59, 60, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, 63, 64,
+ 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 60, 60, 60, 60,
+ 60, 60, 61, 61, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64,
+ 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 61, 60, 60, 60, 60, 61, 61, 62,
+ 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65,
+ 65, 65, 65, 65, 65, 65, 61, 61, 61, 61, 60, 61, 61, 62, 62, 63, 63, 63,
+ 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+ 65, 65, 61, 61, 61, 61, 61, 61, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64,
+ 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 62, 62,
+ 62, 61, 61, 62, 62, 62, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65,
+ 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 62, 62, 62, 62, 62, 62,
+ 62, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+ 65, 65, 66, 66, 66, 66, 66, 66, 63, 63, 62, 62, 62, 62, 63, 63, 63, 63,
+ 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66,
+ 66, 66, 66, 66, 63, 63, 63, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 65,
+ 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65,
+ 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 64, 63, 63, 63,
+ 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 64, 64, 64, 63, 63, 63, 64, 64,
+ 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 64, 64, 64, 64, 63, 64, 64, 64, 64, 64, 64, 65,
+ 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65,
+ 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 64,
+ 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 64, 64, 64,
+ 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64, 65, 65, 65, 65,
+ 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+ 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65,
+ 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65,
+ 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+ 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 67, 68, 69, 67, 68, 69, 69, 68, 69, 70, 70, 69, 69, 70, 71,
+ /* Size 8 */
+ 64, 62, 67, 67, 67, 68, 68, 69, 62, 65, 67, 66, 66, 67, 68, 68, 67, 67,
+ 68, 67, 67, 68, 68, 69, 67, 66, 67, 68, 68, 68, 69, 69, 67, 66, 67, 68,
+ 69, 69, 69, 69, 68, 67, 68, 68, 69, 69, 70, 70, 68, 68, 68, 69, 69, 70,
+ 70, 70, 69, 68, 69, 69, 69, 70, 70, 70,
+ /* Size 16 */
+ 64, 63, 62, 64, 67, 67, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 63, 63,
+ 64, 65, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 69, 69, 62, 64, 65, 66,
+ 67, 66, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 64, 65, 66, 67, 67, 67,
+ 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 67, 67, 67, 67, 68, 67, 67, 67,
+ 67, 68, 68, 68, 68, 68, 69, 69, 67, 67, 66, 67, 67, 68, 68, 68, 68, 68,
+ 68, 68, 68, 69, 69, 69, 67, 67, 66, 67, 67, 68, 68, 68, 68, 68, 68, 69,
+ 69, 69, 69, 69, 67, 67, 66, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 69,
+ 69, 69, 67, 67, 66, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+ 68, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 70, 70, 68, 67,
+ 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 68, 68, 67, 68,
+ 68, 68, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 68, 68, 68, 68, 68, 68,
+ 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 69, 68, 68, 68, 68, 69, 69, 69,
+ 69, 69, 70, 70, 70, 70, 70, 70, 69, 69, 68, 68, 69, 69, 69, 69, 69, 70,
+ 70, 70, 70, 70, 70, 70, 69, 69, 68, 68, 69, 69, 69, 69, 69, 70, 70, 70,
+ 70, 70, 70, 70,
+ /* Size 32 */
+ 64, 64, 63, 63, 62, 63, 64, 65, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67,
+ 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 64, 63, 63, 63,
+ 63, 64, 65, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68,
+ 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 63, 63, 63, 63, 64, 64, 65, 66,
+ 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68,
+ 68, 68, 69, 69, 69, 69, 63, 63, 63, 64, 64, 65, 66, 66, 67, 67, 67, 66,
+ 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 62, 63, 64, 64, 65, 66, 66, 66, 67, 67, 66, 66, 66, 66, 66, 66,
+ 66, 66, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 63, 64,
+ 64, 65, 66, 66, 66, 67, 67, 67, 67, 67, 66, 66, 67, 67, 67, 67, 67, 67,
+ 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 64, 65, 65, 66, 66, 66,
+ 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68,
+ 68, 68, 68, 68, 68, 68, 68, 68, 65, 66, 66, 66, 66, 67, 67, 67, 67, 67,
+ 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 68, 68, 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 67, 67, 67, 67,
+ 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+ 67, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 67, 67, 67, 67,
+ 66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 67, 67, 67, 66, 66, 67, 67, 67,
+ 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69,
+ 69, 69, 69, 69, 69, 69, 67, 67, 67, 66, 66, 66, 67, 67, 67, 68, 68, 68,
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 67, 67, 67, 66, 66, 66, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 67, 67,
+ 67, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 67, 67, 67, 67, 66, 67,
+ 67, 67, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 67, 67, 67, 67, 66, 67, 67, 67, 67, 68,
+ 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 67, 67, 67, 67, 66, 67, 67, 67, 67, 68, 68, 68, 68, 68,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 68, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 68, 67, 67, 67,
+ 67, 67, 67, 67, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 68, 68, 67, 67, 67, 67, 67, 67,
+ 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70,
+ 70, 70, 70, 70, 70, 70, 68, 68, 68, 67, 67, 67, 67, 68, 68, 68, 68, 68,
+ 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70,
+ 70, 70, 68, 68, 68, 67, 67, 67, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68, 68,
+ 68, 68, 67, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+ 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 68, 68, 68, 68, 68, 68,
+ 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70,
+ 70, 70, 70, 70, 70, 70, 70, 70, 69, 68, 68, 68, 68, 68, 68, 68, 68, 68,
+ 68, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70,
+ 70, 70, 70, 70, 69, 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
+ 69, 69, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69,
+ 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 69, 69, 69, 68,
+ 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70,
+ 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 69, 69, 69, 68, 68, 68, 68, 68,
+ 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70,
+ 70, 70, 70, 70, 70, 70, 69, 69, 69, 68, 68, 68, 68, 68, 69, 69, 69, 69,
+ 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
+ 70, 70, 69, 69, 69, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69,
+ 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 59, 63, 63, 64, 63, 64, 64, 65, 63, 64, 65, 66, 64, 65, 66, 66,
+ /* Size 8 */
+ 60, 58, 63, 63, 63, 64, 65, 65, 58, 61, 63, 62, 62, 63, 64, 64, 63, 63,
+ 64, 64, 64, 64, 64, 65, 63, 62, 64, 64, 64, 65, 65, 65, 63, 62, 64, 64,
+ 65, 65, 65, 66, 64, 63, 64, 65, 65, 65, 66, 66, 65, 64, 64, 65, 65, 66,
+ 66, 66, 65, 64, 65, 65, 66, 66, 66, 66,
+ /* Size 16 */
+ 60, 59, 58, 60, 63, 63, 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 59, 59,
+ 60, 61, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 58, 60, 61, 62,
+ 63, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 60, 61, 62, 62, 63, 63,
+ 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 63, 63, 63, 63, 64, 63, 63, 63,
+ 63, 63, 64, 64, 64, 64, 64, 64, 63, 63, 62, 63, 63, 64, 64, 64, 64, 64,
+ 64, 64, 64, 65, 65, 65, 63, 63, 62, 63, 63, 64, 64, 64, 64, 64, 64, 65,
+ 65, 65, 65, 65, 63, 63, 62, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, 65,
+ 65, 65, 63, 63, 62, 63, 63, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65,
+ 64, 63, 63, 63, 63, 64, 64, 65, 65, 65, 65, 65, 65, 65, 66, 66, 64, 63,
+ 63, 63, 64, 64, 64, 65, 65, 65, 65, 65, 66, 66, 66, 66, 64, 64, 63, 63,
+ 64, 64, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 64, 64, 63, 64, 64, 64,
+ 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 65, 64, 64, 64, 64, 65, 65, 65,
+ 65, 65, 66, 66, 66, 66, 66, 66, 65, 64, 64, 64, 64, 65, 65, 65, 65, 66,
+ 66, 66, 66, 66, 66, 66, 65, 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66,
+ 66, 66, 66, 66,
+ /* Size 32 */
+ 60, 59, 59, 59, 58, 59, 60, 61, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 59, 59, 59, 59,
+ 59, 60, 61, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64,
+ 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 59, 59, 59, 59, 60, 60, 61, 62,
+ 63, 63, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 59, 59, 59, 60, 60, 61, 61, 62, 63, 62, 62, 62,
+ 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64,
+ 64, 64, 58, 59, 60, 60, 61, 61, 62, 62, 63, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 59, 60,
+ 60, 61, 61, 62, 62, 62, 63, 63, 63, 62, 62, 62, 62, 62, 62, 63, 63, 63,
+ 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 60, 61, 61, 61, 62, 62,
+ 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 61, 62, 62, 62, 62, 62, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 63, 63, 63, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 62, 62,
+ 62, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 63, 63, 62, 62, 62, 62, 63, 63,
+ 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 65, 65, 65, 65, 65, 65, 63, 63, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65,
+ 65, 65, 63, 63, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 63, 63,
+ 63, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65,
+ 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 63, 63, 63, 62, 62, 62,
+ 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65,
+ 65, 65, 65, 65, 65, 65, 65, 65, 63, 63, 63, 62, 62, 62, 63, 63, 63, 63,
+ 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+ 65, 65, 65, 65, 63, 63, 63, 63, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64,
+ 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+ 63, 63, 63, 63, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 65, 65, 65,
+ 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64, 63, 63, 63,
+ 63, 63, 63, 63, 63, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65,
+ 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 64, 64, 63, 63, 63, 63, 63, 63,
+ 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+ 66, 66, 66, 66, 66, 66, 64, 64, 63, 63, 63, 63, 63, 63, 64, 64, 64, 64,
+ 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66,
+ 66, 66, 64, 64, 64, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 65, 65, 65,
+ 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 64, 64,
+ 64, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65,
+ 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 64, 64, 64, 64, 63, 64,
+ 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 66, 66, 65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65,
+ 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65,
+ 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 64, 64,
+ 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 64, 64, 64, 64, 64, 64,
+ 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65,
+ 65, 65, 65, 65, 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65,
+ 65, 65, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 8 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 16 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64,
+ /* Size 32 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 8 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 16 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64,
+ /* Size 32 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 8 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 16 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64,
+ /* Size 32 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 8 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 16 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64,
+ /* Size 32 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 } } }
+};
+
+static uint16_t wt_matrix_ref[NUM_QM_LEVELS][2][2][4 * 4 + 8 * 8 + 16 * 16 +
+ 32 * 32] = {
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 58, 33, 19, 58, 37, 25, 17, 33, 25, 16, 12, 19, 17, 12, 10,
+ /* Size 8 */
+ 64, 87, 80, 59, 42, 31, 24, 19, 87, 75, 79, 66, 50, 38, 29, 23, 80, 79,
+ 54, 46, 38, 31, 25, 20, 59, 66, 46, 34, 29, 24, 21, 18, 42, 50, 38, 29,
+ 23, 20, 17, 15, 31, 38, 31, 24, 20, 17, 15, 13, 24, 29, 25, 21, 17, 15,
+ 13, 12, 19, 23, 20, 18, 15, 13, 12, 11,
+ /* Size 16 */
+ 64, 76, 87, 84, 80, 70, 59, 51, 42, 37, 31, 27, 24, 21, 19, 19, 76, 79,
+ 81, 81, 80, 71, 63, 55, 46, 40, 34, 30, 26, 23, 21, 21, 87, 81, 75, 77,
+ 79, 73, 66, 58, 50, 44, 38, 33, 29, 26, 23, 23, 84, 81, 77, 72, 67, 61,
+ 56, 50, 44, 39, 34, 31, 27, 24, 21, 21, 80, 80, 79, 67, 54, 50, 46, 42,
+ 38, 34, 31, 28, 25, 23, 20, 20, 70, 71, 73, 61, 50, 45, 40, 37, 33, 30,
+ 28, 25, 23, 21, 19, 19, 59, 63, 66, 56, 46, 40, 34, 31, 29, 26, 24, 22,
+ 21, 19, 18, 18, 51, 55, 58, 50, 42, 37, 31, 29, 26, 24, 22, 20, 19, 18,
+ 16, 16, 42, 46, 50, 44, 38, 33, 29, 26, 23, 21, 20, 18, 17, 16, 15, 15,
+ 37, 40, 44, 39, 34, 30, 26, 24, 21, 20, 18, 17, 16, 15, 14, 14, 31, 34,
+ 38, 34, 31, 28, 24, 22, 20, 18, 17, 16, 15, 14, 13, 13, 27, 30, 33, 31,
+ 28, 25, 22, 20, 18, 17, 16, 15, 14, 13, 13, 13, 24, 26, 29, 27, 25, 23,
+ 21, 19, 17, 16, 15, 14, 13, 13, 12, 12, 21, 23, 26, 24, 23, 21, 19, 18,
+ 16, 15, 14, 13, 13, 12, 12, 12, 19, 21, 23, 21, 20, 19, 18, 16, 15, 14,
+ 13, 13, 12, 12, 11, 11, 19, 21, 23, 21, 20, 19, 18, 16, 15, 14, 13, 13,
+ 12, 12, 11, 11,
+ /* Size 32 */
+ 64, 70, 76, 82, 87, 86, 84, 82, 80, 75, 70, 65, 59, 55, 51, 47, 42, 40,
+ 37, 34, 31, 29, 27, 26, 24, 22, 21, 20, 19, 19, 19, 19, 70, 74, 77, 81,
+ 84, 83, 82, 81, 80, 75, 71, 66, 61, 57, 53, 49, 44, 41, 39, 36, 33, 31,
+ 29, 27, 25, 24, 22, 21, 20, 20, 20, 20, 76, 77, 79, 80, 81, 81, 81, 80,
+ 80, 75, 71, 67, 63, 59, 55, 51, 46, 43, 40, 37, 34, 32, 30, 28, 26, 25,
+ 23, 22, 21, 21, 21, 21, 82, 81, 80, 79, 78, 79, 79, 79, 79, 76, 72, 68,
+ 65, 61, 56, 52, 48, 45, 42, 39, 36, 34, 32, 30, 27, 26, 25, 23, 22, 22,
+ 22, 22, 87, 84, 81, 78, 75, 76, 77, 78, 79, 76, 73, 70, 66, 62, 58, 54,
+ 50, 47, 44, 41, 38, 36, 33, 31, 29, 27, 26, 24, 23, 23, 23, 23, 86, 83,
+ 81, 79, 76, 75, 75, 74, 73, 70, 67, 64, 61, 58, 54, 51, 47, 44, 42, 39,
+ 36, 34, 32, 30, 28, 26, 25, 23, 22, 22, 22, 22, 84, 82, 81, 79, 77, 75,
+ 72, 69, 67, 64, 61, 59, 56, 53, 50, 47, 44, 42, 39, 37, 34, 32, 31, 29,
+ 27, 25, 24, 23, 21, 21, 21, 21, 82, 81, 80, 79, 78, 74, 69, 65, 61, 58,
+ 56, 53, 51, 48, 46, 44, 41, 39, 37, 35, 33, 31, 29, 28, 26, 25, 23, 22,
+ 21, 21, 21, 21, 80, 80, 80, 79, 79, 73, 67, 61, 54, 52, 50, 48, 46, 44,
+ 42, 40, 38, 36, 34, 33, 31, 29, 28, 26, 25, 24, 23, 22, 20, 20, 20, 20,
+ 75, 75, 75, 76, 76, 70, 64, 58, 52, 50, 47, 45, 43, 41, 39, 37, 36, 34,
+ 32, 31, 29, 28, 27, 25, 24, 23, 22, 21, 20, 20, 20, 20, 70, 71, 71, 72,
+ 73, 67, 61, 56, 50, 47, 45, 42, 40, 38, 37, 35, 33, 32, 30, 29, 28, 26,
+ 25, 24, 23, 22, 21, 20, 19, 19, 19, 19, 65, 66, 67, 68, 70, 64, 59, 53,
+ 48, 45, 42, 40, 37, 36, 34, 32, 31, 30, 28, 27, 26, 25, 24, 23, 22, 21,
+ 20, 19, 18, 18, 18, 18, 59, 61, 63, 65, 66, 61, 56, 51, 46, 43, 40, 37,
+ 34, 33, 31, 30, 29, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 18, 18, 18,
+ 18, 18, 55, 57, 59, 61, 62, 58, 53, 48, 44, 41, 38, 36, 33, 31, 30, 29,
+ 27, 26, 25, 24, 23, 22, 21, 21, 20, 19, 18, 18, 17, 17, 17, 17, 51, 53,
+ 55, 56, 58, 54, 50, 46, 42, 39, 37, 34, 31, 30, 29, 27, 26, 25, 24, 23,
+ 22, 21, 20, 20, 19, 18, 18, 17, 16, 16, 16, 16, 47, 49, 51, 52, 54, 51,
+ 47, 44, 40, 37, 35, 32, 30, 29, 27, 26, 24, 24, 23, 22, 21, 20, 19, 19,
+ 18, 18, 17, 16, 16, 16, 16, 16, 42, 44, 46, 48, 50, 47, 44, 41, 38, 36,
+ 33, 31, 29, 27, 26, 24, 23, 22, 21, 21, 20, 19, 18, 18, 17, 17, 16, 16,
+ 15, 15, 15, 15, 40, 41, 43, 45, 47, 44, 42, 39, 36, 34, 32, 30, 27, 26,
+ 25, 24, 22, 21, 21, 20, 19, 18, 18, 17, 17, 16, 16, 15, 15, 15, 15, 15,
+ 37, 39, 40, 42, 44, 42, 39, 37, 34, 32, 30, 28, 26, 25, 24, 23, 21, 21,
+ 20, 19, 18, 18, 17, 17, 16, 16, 15, 15, 14, 14, 14, 14, 34, 36, 37, 39,
+ 41, 39, 37, 35, 33, 31, 29, 27, 25, 24, 23, 22, 21, 20, 19, 18, 18, 17,
+ 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 31, 33, 34, 36, 38, 36, 34, 33,
+ 31, 29, 28, 26, 24, 23, 22, 21, 20, 19, 18, 18, 17, 16, 16, 15, 15, 15,
+ 14, 14, 13, 13, 13, 13, 29, 31, 32, 34, 36, 34, 32, 31, 29, 28, 26, 25,
+ 23, 22, 21, 20, 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 13,
+ 13, 13, 27, 29, 30, 32, 33, 32, 31, 29, 28, 27, 25, 24, 22, 21, 20, 19,
+ 18, 18, 17, 16, 16, 15, 15, 14, 14, 14, 13, 13, 13, 13, 13, 13, 26, 27,
+ 28, 30, 31, 30, 29, 28, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 17, 16,
+ 15, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 12, 24, 25, 26, 27, 29, 28,
+ 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 17, 16, 15, 15, 14, 14, 14,
+ 13, 13, 13, 12, 12, 12, 12, 12, 22, 24, 25, 26, 27, 26, 25, 25, 24, 23,
+ 22, 21, 20, 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12,
+ 12, 12, 12, 12, 21, 22, 23, 25, 26, 25, 24, 23, 23, 22, 21, 20, 19, 18,
+ 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12,
+ 20, 21, 22, 23, 24, 23, 23, 22, 22, 21, 20, 19, 18, 18, 17, 16, 16, 15,
+ 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 19, 20, 21, 22,
+ 23, 22, 21, 21, 20, 20, 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13,
+ 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 19, 20, 21, 22, 23, 22, 21, 21,
+ 20, 20, 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12,
+ 12, 11, 11, 11, 11, 11, 19, 20, 21, 22, 23, 22, 21, 21, 20, 20, 19, 18,
+ 18, 17, 16, 16, 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11,
+ 11, 11, 19, 20, 21, 22, 23, 22, 21, 21, 20, 20, 19, 18, 18, 17, 16, 16,
+ 15, 15, 14, 14, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 249, 225, 124, 69, 225, 139, 91, 60, 124, 91, 57, 42, 69, 60, 42, 32,
+ /* Size 8 */
+ 206, 285, 261, 191, 134, 96, 71, 55, 285, 245, 257, 214, 161, 118, 88,
+ 68, 261, 257, 174, 145, 119, 95, 75, 60, 191, 214, 145, 107, 87, 73, 61,
+ 51, 134, 161, 119, 87, 69, 58, 50, 43, 96, 118, 95, 73, 58, 48, 42, 37,
+ 71, 88, 75, 61, 50, 42, 36, 32, 55, 68, 60, 51, 43, 37, 32, 29,
+ /* Size 16 */
+ 217, 259, 300, 287, 275, 237, 200, 171, 141, 121, 101, 88, 75, 66, 57,
+ 57, 259, 269, 279, 275, 272, 243, 213, 184, 155, 134, 113, 98, 84, 74,
+ 64, 64, 300, 279, 257, 264, 270, 248, 226, 197, 169, 147, 124, 109, 93,
+ 82, 71, 71, 287, 275, 264, 245, 227, 208, 189, 168, 147, 130, 112, 99,
+ 86, 77, 67, 67, 275, 272, 270, 227, 183, 168, 152, 139, 125, 113, 100,
+ 90, 79, 71, 63, 63, 237, 243, 248, 208, 168, 150, 132, 120, 109, 98, 88,
+ 80, 72, 65, 58, 58, 200, 213, 226, 189, 152, 132, 113, 102, 92, 84, 77,
+ 70, 64, 59, 54, 54, 171, 184, 197, 168, 139, 120, 102, 92, 82, 75, 69,
+ 63, 58, 54, 49, 49, 141, 155, 169, 147, 125, 109, 92, 82, 73, 67, 61,
+ 56, 52, 49, 45, 45, 121, 134, 147, 130, 113, 98, 84, 75, 67, 61, 56, 52,
+ 48, 45, 42, 42, 101, 113, 124, 112, 100, 88, 77, 69, 61, 56, 50, 47, 44,
+ 41, 39, 39, 88, 98, 109, 99, 90, 80, 70, 63, 56, 52, 47, 44, 41, 39, 36,
+ 36, 75, 84, 93, 86, 79, 72, 64, 58, 52, 48, 44, 41, 38, 36, 34, 34, 66,
+ 74, 82, 77, 71, 65, 59, 54, 49, 45, 41, 39, 36, 34, 32, 32, 57, 64, 71,
+ 67, 63, 58, 54, 49, 45, 42, 39, 36, 34, 32, 31, 31, 57, 64, 71, 67, 63,
+ 58, 54, 49, 45, 42, 39, 36, 34, 32, 31, 31,
+ /* Size 32 */
+ 223, 244, 265, 287, 308, 301, 295, 288, 282, 263, 244, 225, 206, 190,
+ 175, 160, 145, 134, 124, 114, 104, 97, 90, 83, 77, 72, 68, 63, 59, 59,
+ 59, 59, 244, 257, 271, 284, 297, 293, 289, 285, 281, 264, 246, 229, 212,
+ 197, 182, 167, 152, 141, 131, 120, 110, 103, 95, 88, 81, 77, 72, 67, 62,
+ 62, 62, 62, 265, 271, 276, 281, 286, 284, 283, 281, 280, 264, 249, 234,
+ 219, 204, 189, 174, 159, 148, 137, 127, 116, 108, 101, 93, 86, 81, 76,
+ 71, 66, 66, 66, 66, 287, 284, 281, 278, 275, 276, 277, 278, 278, 265,
+ 252, 238, 225, 210, 196, 181, 166, 155, 144, 133, 122, 114, 106, 98, 91,
+ 85, 80, 75, 69, 69, 69, 69, 308, 297, 286, 275, 264, 267, 271, 274, 277,
+ 266, 254, 243, 231, 217, 203, 188, 174, 162, 151, 139, 128, 120, 111,
+ 103, 95, 90, 84, 78, 73, 73, 73, 73, 301, 293, 284, 276, 267, 264, 261,
+ 258, 255, 244, 234, 223, 213, 200, 187, 175, 162, 152, 142, 132, 121,
+ 114, 107, 99, 92, 87, 81, 76, 71, 71, 71, 71, 295, 289, 283, 277, 271,
+ 261, 252, 242, 233, 223, 213, 203, 194, 183, 172, 162, 151, 142, 133,
+ 124, 115, 108, 102, 95, 88, 83, 79, 74, 69, 69, 69, 69, 288, 285, 281,
+ 278, 274, 258, 242, 226, 210, 201, 193, 184, 175, 166, 157, 149, 140,
+ 132, 124, 117, 109, 103, 97, 91, 85, 80, 76, 71, 67, 67, 67, 67, 282,
+ 281, 280, 278, 277, 255, 233, 210, 188, 180, 172, 164, 156, 149, 142,
+ 135, 129, 122, 116, 109, 103, 97, 92, 87, 81, 77, 73, 69, 65, 65, 65,
+ 65, 263, 264, 264, 265, 266, 244, 223, 201, 180, 171, 163, 154, 146,
+ 139, 133, 126, 120, 114, 108, 103, 97, 92, 87, 82, 77, 74, 70, 66, 62,
+ 62, 62, 62, 244, 246, 249, 252, 254, 234, 213, 193, 172, 163, 154, 145,
+ 136, 130, 124, 118, 111, 106, 101, 96, 91, 86, 82, 78, 73, 70, 67, 63,
+ 60, 60, 60, 60, 225, 229, 234, 238, 243, 223, 203, 184, 164, 154, 145,
+ 135, 126, 120, 114, 109, 103, 98, 94, 89, 85, 81, 77, 73, 70, 67, 64,
+ 61, 57, 57, 57, 57, 206, 212, 219, 225, 231, 213, 194, 175, 156, 146,
+ 136, 126, 116, 110, 105, 100, 94, 90, 87, 83, 79, 76, 72, 69, 66, 63,
+ 60, 58, 55, 55, 55, 55, 190, 197, 204, 210, 217, 200, 183, 166, 149,
+ 139, 130, 120, 110, 105, 100, 95, 89, 86, 82, 78, 75, 72, 69, 66, 63,
+ 60, 58, 55, 53, 53, 53, 53, 175, 182, 189, 196, 203, 187, 172, 157, 142,
+ 133, 124, 114, 105, 100, 95, 90, 84, 81, 77, 74, 71, 68, 65, 62, 60, 57,
+ 55, 53, 51, 51, 51, 51, 160, 167, 174, 181, 188, 175, 162, 149, 135,
+ 126, 118, 109, 100, 95, 90, 84, 79, 76, 73, 70, 66, 64, 61, 59, 57, 55,
+ 53, 51, 49, 49, 49, 49, 145, 152, 159, 166, 174, 162, 151, 140, 129,
+ 120, 111, 103, 94, 89, 84, 79, 74, 71, 68, 65, 62, 60, 58, 56, 53, 52,
+ 50, 48, 46, 46, 46, 46, 134, 141, 148, 155, 162, 152, 142, 132, 122,
+ 114, 106, 98, 90, 86, 81, 76, 71, 68, 66, 63, 60, 58, 55, 53, 51, 50,
+ 48, 46, 45, 45, 45, 45, 124, 131, 137, 144, 151, 142, 133, 124, 116,
+ 108, 101, 94, 87, 82, 77, 73, 68, 66, 63, 60, 57, 55, 53, 51, 49, 48,
+ 46, 45, 43, 43, 43, 43, 114, 120, 127, 133, 139, 132, 124, 117, 109,
+ 103, 96, 89, 83, 78, 74, 70, 65, 63, 60, 57, 54, 53, 51, 49, 47, 46, 44,
+ 43, 41, 41, 41, 41, 104, 110, 116, 122, 128, 121, 115, 109, 103, 97, 91,
+ 85, 79, 75, 71, 66, 62, 60, 57, 54, 52, 50, 48, 47, 45, 44, 42, 41, 40,
+ 40, 40, 40, 97, 103, 108, 114, 120, 114, 108, 103, 97, 92, 86, 81, 76,
+ 72, 68, 64, 60, 58, 55, 53, 50, 48, 47, 45, 43, 42, 41, 40, 38, 38, 38,
+ 38, 90, 95, 101, 106, 111, 107, 102, 97, 92, 87, 82, 77, 72, 69, 65, 61,
+ 58, 55, 53, 51, 48, 47, 45, 44, 42, 41, 40, 38, 37, 37, 37, 37, 83, 88,
+ 93, 98, 103, 99, 95, 91, 87, 82, 78, 73, 69, 66, 62, 59, 56, 53, 51, 49,
+ 47, 45, 44, 42, 40, 39, 38, 37, 36, 36, 36, 36, 77, 81, 86, 91, 95, 92,
+ 88, 85, 81, 77, 73, 70, 66, 63, 60, 57, 53, 51, 49, 47, 45, 43, 42, 40,
+ 39, 38, 37, 36, 35, 35, 35, 35, 72, 77, 81, 85, 90, 87, 83, 80, 77, 74,
+ 70, 67, 63, 60, 57, 55, 52, 50, 48, 46, 44, 42, 41, 39, 38, 37, 36, 35,
+ 34, 34, 34, 34, 68, 72, 76, 80, 84, 81, 79, 76, 73, 70, 67, 64, 60, 58,
+ 55, 53, 50, 48, 46, 44, 42, 41, 40, 38, 37, 36, 35, 34, 33, 33, 33, 33,
+ 63, 67, 71, 75, 78, 76, 74, 71, 69, 66, 63, 61, 58, 55, 53, 51, 48, 46,
+ 45, 43, 41, 40, 38, 37, 36, 35, 34, 33, 32, 32, 32, 32, 59, 62, 66, 69,
+ 73, 71, 69, 67, 65, 62, 60, 57, 55, 53, 51, 49, 46, 45, 43, 41, 40, 38,
+ 37, 36, 35, 34, 33, 32, 31, 31, 31, 31, 59, 62, 66, 69, 73, 71, 69, 67,
+ 65, 62, 60, 57, 55, 53, 51, 49, 46, 45, 43, 41, 40, 38, 37, 36, 35, 34,
+ 33, 32, 31, 31, 31, 31, 59, 62, 66, 69, 73, 71, 69, 67, 65, 62, 60, 57,
+ 55, 53, 51, 49, 46, 45, 43, 41, 40, 38, 37, 36, 35, 34, 33, 32, 31, 31,
+ 31, 31, 59, 62, 66, 69, 73, 71, 69, 67, 65, 62, 60, 57, 55, 53, 51, 49,
+ 46, 45, 43, 41, 40, 38, 37, 36, 35, 34, 33, 32, 31, 31, 31, 31 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 39, 35, 27, 39, 31, 29, 25, 35, 29, 21, 18, 27, 25, 18, 15,
+ /* Size 8 */
+ 64, 81, 42, 39, 36, 32, 28, 24, 81, 54, 41, 46, 44, 40, 35, 30, 42, 41,
+ 34, 36, 36, 34, 31, 27, 39, 46, 36, 31, 29, 28, 26, 24, 36, 44, 36, 29,
+ 26, 24, 22, 20, 32, 40, 34, 28, 24, 21, 19, 18, 28, 35, 31, 26, 22, 19,
+ 17, 16, 24, 30, 27, 24, 20, 18, 16, 15,
+ /* Size 16 */
+ 64, 72, 81, 61, 42, 41, 39, 38, 36, 34, 32, 30, 28, 26, 24, 24, 72, 70,
+ 67, 54, 42, 42, 43, 42, 40, 38, 36, 34, 31, 29, 27, 27, 81, 67, 54, 48,
+ 41, 44, 46, 45, 44, 42, 40, 37, 35, 32, 30, 30, 61, 54, 48, 43, 38, 39,
+ 41, 40, 40, 39, 37, 35, 33, 31, 29, 29, 42, 42, 41, 38, 34, 35, 36, 36,
+ 36, 35, 34, 32, 31, 29, 27, 27, 41, 42, 44, 39, 35, 34, 33, 33, 33, 32,
+ 31, 30, 28, 27, 25, 25, 39, 43, 46, 41, 36, 33, 31, 30, 29, 29, 28, 27,
+ 26, 25, 24, 24, 38, 42, 45, 40, 36, 33, 30, 29, 27, 27, 26, 25, 24, 23,
+ 22, 22, 36, 40, 44, 40, 36, 33, 29, 27, 26, 25, 24, 23, 22, 21, 20, 20,
+ 34, 38, 42, 39, 35, 32, 29, 27, 25, 23, 22, 21, 21, 20, 19, 19, 32, 36,
+ 40, 37, 34, 31, 28, 26, 24, 22, 21, 20, 19, 19, 18, 18, 30, 34, 37, 35,
+ 32, 30, 27, 25, 23, 21, 20, 19, 18, 18, 17, 17, 28, 31, 35, 33, 31, 28,
+ 26, 24, 22, 21, 19, 18, 17, 17, 16, 16, 26, 29, 32, 31, 29, 27, 25, 23,
+ 21, 20, 19, 18, 17, 16, 15, 15, 24, 27, 30, 29, 27, 25, 24, 22, 20, 19,
+ 18, 17, 16, 15, 15, 15, 24, 27, 30, 29, 27, 25, 24, 22, 20, 19, 18, 17,
+ 16, 15, 15, 15,
+ /* Size 32 */
+ 64, 68, 72, 76, 81, 71, 61, 52, 42, 41, 41, 40, 39, 39, 38, 37, 36, 35,
+ 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 24, 24, 24, 68, 70, 71, 73,
+ 74, 66, 58, 50, 42, 42, 41, 41, 41, 40, 40, 39, 38, 37, 36, 35, 34, 33,
+ 32, 31, 29, 28, 27, 26, 25, 25, 25, 25, 72, 71, 70, 69, 67, 61, 54, 48,
+ 42, 42, 42, 42, 43, 42, 42, 41, 40, 39, 38, 37, 36, 35, 34, 32, 31, 30,
+ 29, 28, 27, 27, 27, 27, 76, 73, 69, 65, 61, 56, 51, 46, 41, 42, 43, 44,
+ 44, 44, 43, 43, 42, 41, 40, 39, 38, 37, 36, 34, 33, 32, 31, 30, 29, 29,
+ 29, 29, 81, 74, 67, 61, 54, 51, 48, 44, 41, 42, 44, 45, 46, 46, 45, 45,
+ 44, 43, 42, 41, 40, 39, 37, 36, 35, 34, 32, 31, 30, 30, 30, 30, 71, 66,
+ 61, 56, 51, 48, 45, 42, 39, 40, 41, 42, 43, 43, 43, 43, 42, 41, 40, 39,
+ 38, 37, 36, 35, 34, 33, 32, 30, 29, 29, 29, 29, 61, 58, 54, 51, 48, 45,
+ 43, 40, 38, 38, 39, 40, 41, 41, 40, 40, 40, 39, 39, 38, 37, 36, 35, 34,
+ 33, 32, 31, 30, 29, 29, 29, 29, 52, 50, 48, 46, 44, 42, 40, 38, 36, 37,
+ 37, 38, 38, 38, 38, 38, 38, 37, 37, 36, 35, 34, 33, 33, 32, 31, 30, 29,
+ 28, 28, 28, 28, 42, 42, 42, 41, 41, 39, 38, 36, 34, 35, 35, 35, 36, 36,
+ 36, 36, 36, 35, 35, 34, 34, 33, 32, 31, 31, 30, 29, 28, 27, 27, 27, 27,
+ 41, 42, 42, 42, 42, 40, 38, 37, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34,
+ 33, 33, 32, 32, 31, 30, 29, 29, 28, 27, 26, 26, 26, 26, 41, 41, 42, 43,
+ 44, 41, 39, 37, 35, 35, 34, 34, 33, 33, 33, 33, 33, 32, 32, 31, 31, 30,
+ 30, 29, 28, 28, 27, 26, 25, 25, 25, 25, 40, 41, 42, 44, 45, 42, 40, 38,
+ 35, 35, 34, 33, 32, 32, 32, 31, 31, 30, 30, 30, 29, 29, 28, 28, 27, 26,
+ 26, 25, 25, 25, 25, 25, 39, 41, 43, 44, 46, 43, 41, 38, 36, 35, 33, 32,
+ 31, 31, 30, 30, 29, 29, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24,
+ 24, 24, 39, 40, 42, 44, 46, 43, 41, 38, 36, 34, 33, 32, 31, 30, 29, 29,
+ 28, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 23, 23, 23, 23, 23, 38, 40,
+ 42, 43, 45, 43, 40, 38, 36, 34, 33, 32, 30, 29, 29, 28, 27, 27, 27, 26,
+ 26, 25, 25, 24, 24, 23, 23, 23, 22, 22, 22, 22, 37, 39, 41, 43, 45, 43,
+ 40, 38, 36, 34, 33, 31, 30, 29, 28, 27, 27, 26, 26, 25, 25, 24, 24, 23,
+ 23, 23, 22, 22, 21, 21, 21, 21, 36, 38, 40, 42, 44, 42, 40, 38, 36, 34,
+ 33, 31, 29, 28, 27, 27, 26, 25, 25, 24, 24, 23, 23, 22, 22, 22, 21, 21,
+ 20, 20, 20, 20, 35, 37, 39, 41, 43, 41, 39, 37, 35, 34, 32, 30, 29, 28,
+ 27, 26, 25, 25, 24, 23, 23, 23, 22, 22, 21, 21, 21, 20, 20, 20, 20, 20,
+ 34, 36, 38, 40, 42, 40, 39, 37, 35, 33, 32, 30, 29, 28, 27, 26, 25, 24,
+ 23, 23, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 19, 33, 35, 37, 39,
+ 41, 39, 38, 36, 34, 33, 31, 30, 28, 27, 26, 25, 24, 23, 23, 22, 22, 21,
+ 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 32, 34, 36, 38, 40, 38, 37, 35,
+ 34, 32, 31, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 21, 20, 20, 19, 19,
+ 19, 18, 18, 18, 18, 18, 31, 33, 35, 37, 39, 37, 36, 34, 33, 32, 30, 29,
+ 27, 26, 25, 24, 23, 23, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17,
+ 17, 17, 30, 32, 34, 36, 37, 36, 35, 33, 32, 31, 30, 28, 27, 26, 25, 24,
+ 23, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 29, 31,
+ 32, 34, 36, 35, 34, 33, 31, 30, 29, 28, 26, 25, 24, 23, 22, 22, 21, 20,
+ 20, 19, 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 28, 29, 31, 33, 35, 34,
+ 33, 32, 31, 29, 28, 27, 26, 25, 24, 23, 22, 21, 21, 20, 19, 19, 18, 18,
+ 17, 17, 17, 16, 16, 16, 16, 16, 27, 28, 30, 32, 34, 33, 32, 31, 30, 29,
+ 28, 26, 25, 24, 23, 23, 22, 21, 20, 20, 19, 18, 18, 17, 17, 17, 16, 16,
+ 16, 16, 16, 16, 26, 27, 29, 31, 32, 32, 31, 30, 29, 28, 27, 26, 25, 24,
+ 23, 22, 21, 21, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15,
+ 25, 26, 28, 30, 31, 30, 30, 29, 28, 27, 26, 25, 24, 23, 23, 22, 21, 20,
+ 20, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 24, 25, 27, 29,
+ 30, 29, 29, 28, 27, 26, 25, 25, 24, 23, 22, 21, 20, 20, 19, 19, 18, 17,
+ 17, 16, 16, 16, 15, 15, 15, 15, 15, 15, 24, 25, 27, 29, 30, 29, 29, 28,
+ 27, 26, 25, 25, 24, 23, 22, 21, 20, 20, 19, 19, 18, 17, 17, 16, 16, 16,
+ 15, 15, 15, 15, 15, 15, 24, 25, 27, 29, 30, 29, 29, 28, 27, 26, 25, 25,
+ 24, 23, 22, 21, 20, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15,
+ 15, 15, 24, 25, 27, 29, 30, 29, 29, 28, 27, 26, 25, 25, 24, 23, 22, 21,
+ 20, 20, 19, 19, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 15 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 175, 103, 93, 70, 103, 83, 76, 64, 93, 76, 55, 46, 70, 64, 46, 36,
+ /* Size 8 */
+ 162, 205, 104, 97, 90, 78, 67, 57, 205, 136, 102, 115, 111, 99, 86, 73,
+ 104, 102, 84, 88, 88, 83, 75, 66, 97, 115, 88, 75, 71, 67, 62, 56, 90,
+ 111, 88, 71, 62, 56, 52, 48, 78, 99, 83, 67, 56, 49, 45, 41, 67, 86, 75,
+ 62, 52, 45, 40, 36, 57, 73, 66, 56, 48, 41, 36, 33,
+ /* Size 16 */
+ 168, 190, 213, 160, 108, 104, 101, 97, 93, 87, 81, 75, 69, 64, 59, 59,
+ 190, 184, 177, 142, 107, 108, 110, 107, 104, 98, 92, 85, 79, 73, 67, 67,
+ 213, 177, 141, 123, 106, 112, 119, 117, 115, 109, 103, 96, 89, 82, 76,
+ 76, 160, 142, 123, 110, 96, 101, 105, 104, 103, 99, 94, 89, 83, 77, 72,
+ 72, 108, 107, 106, 96, 87, 89, 91, 91, 91, 89, 86, 82, 77, 73, 68, 68,
+ 104, 108, 112, 101, 89, 87, 85, 84, 82, 80, 78, 74, 71, 67, 63, 63, 101,
+ 110, 119, 105, 91, 85, 78, 76, 74, 72, 70, 67, 64, 61, 58, 58, 97, 107,
+ 117, 104, 91, 84, 76, 72, 69, 66, 64, 62, 59, 57, 54, 54, 93, 104, 115,
+ 103, 91, 82, 74, 69, 64, 61, 58, 56, 54, 52, 50, 50, 87, 98, 109, 99,
+ 89, 80, 72, 66, 61, 58, 55, 52, 50, 48, 46, 46, 81, 92, 103, 94, 86, 78,
+ 70, 64, 58, 55, 51, 49, 46, 45, 43, 43, 75, 85, 96, 89, 82, 74, 67, 62,
+ 56, 52, 49, 46, 44, 42, 40, 40, 69, 79, 89, 83, 77, 71, 64, 59, 54, 50,
+ 46, 44, 41, 40, 38, 38, 64, 73, 82, 77, 73, 67, 61, 57, 52, 48, 45, 42,
+ 40, 38, 36, 36, 59, 67, 76, 72, 68, 63, 58, 54, 50, 46, 43, 40, 38, 36,
+ 34, 34, 59, 67, 76, 72, 68, 63, 58, 54, 50, 46, 43, 40, 38, 36, 34, 34,
+ /* Size 32 */
+ 171, 182, 194, 205, 217, 190, 163, 137, 110, 108, 106, 104, 103, 101,
+ 99, 97, 95, 92, 89, 86, 83, 80, 77, 74, 71, 68, 65, 63, 60, 60, 60, 60,
+ 182, 186, 190, 194, 198, 176, 154, 132, 109, 109, 108, 108, 107, 105,
+ 104, 102, 100, 97, 94, 91, 88, 85, 82, 79, 76, 73, 70, 67, 64, 64, 64,
+ 64, 194, 190, 187, 184, 180, 162, 144, 127, 109, 110, 110, 111, 112,
+ 110, 109, 107, 106, 103, 100, 97, 94, 90, 87, 84, 80, 78, 75, 72, 69,
+ 69, 69, 69, 205, 194, 184, 173, 162, 148, 135, 122, 108, 110, 112, 114,
+ 116, 115, 114, 113, 111, 108, 105, 102, 99, 96, 92, 89, 85, 82, 79, 76,
+ 73, 73, 73, 73, 217, 198, 180, 162, 144, 135, 126, 117, 108, 111, 114,
+ 118, 121, 120, 119, 118, 117, 114, 111, 108, 104, 101, 97, 94, 90, 87,
+ 84, 80, 77, 77, 77, 77, 190, 176, 162, 148, 135, 127, 119, 111, 103,
+ 106, 108, 111, 114, 113, 112, 112, 111, 108, 106, 103, 100, 97, 94, 91,
+ 87, 84, 81, 78, 75, 75, 75, 75, 163, 154, 144, 135, 126, 119, 112, 105,
+ 98, 100, 103, 105, 107, 106, 106, 105, 105, 103, 101, 98, 96, 93, 90,
+ 87, 85, 82, 79, 76, 73, 73, 73, 73, 137, 132, 127, 122, 117, 111, 105,
+ 99, 94, 95, 97, 98, 100, 100, 99, 99, 99, 97, 95, 94, 92, 89, 87, 84,
+ 82, 79, 76, 74, 71, 71, 71, 71, 110, 109, 109, 108, 108, 103, 98, 94,
+ 89, 90, 91, 92, 93, 93, 93, 93, 93, 92, 90, 89, 87, 85, 83, 81, 79, 76,
+ 74, 72, 69, 69, 69, 69, 108, 109, 110, 110, 111, 106, 100, 95, 90, 90,
+ 90, 90, 89, 89, 89, 89, 89, 87, 86, 85, 83, 81, 79, 77, 75, 73, 71, 69,
+ 67, 67, 67, 67, 106, 108, 110, 112, 114, 108, 103, 97, 91, 90, 89, 87,
+ 86, 86, 85, 85, 84, 83, 82, 80, 79, 77, 76, 74, 72, 70, 68, 66, 64, 64,
+ 64, 64, 104, 108, 111, 114, 118, 111, 105, 98, 92, 90, 87, 85, 83, 82,
+ 81, 80, 79, 78, 77, 76, 75, 74, 72, 70, 69, 67, 65, 64, 62, 62, 62, 62,
+ 103, 107, 112, 116, 121, 114, 107, 100, 93, 89, 86, 83, 80, 78, 77, 76,
+ 75, 74, 73, 72, 71, 70, 68, 67, 66, 64, 63, 61, 60, 60, 60, 60, 101,
+ 105, 110, 115, 120, 113, 106, 100, 93, 89, 86, 82, 78, 77, 75, 74, 72,
+ 71, 70, 69, 68, 67, 65, 64, 63, 62, 60, 59, 57, 57, 57, 57, 99, 104,
+ 109, 114, 119, 112, 106, 99, 93, 89, 85, 81, 77, 75, 74, 72, 70, 69, 68,
+ 66, 65, 64, 63, 61, 60, 59, 58, 56, 55, 55, 55, 55, 97, 102, 107, 113,
+ 118, 112, 105, 99, 93, 89, 85, 80, 76, 74, 72, 70, 67, 66, 65, 64, 62,
+ 61, 60, 59, 58, 56, 55, 54, 53, 53, 53, 53, 95, 100, 106, 111, 117, 111,
+ 105, 99, 93, 89, 84, 79, 75, 72, 70, 67, 65, 64, 62, 61, 59, 58, 57, 56,
+ 55, 54, 53, 52, 51, 51, 51, 51, 92, 97, 103, 108, 114, 108, 103, 97, 92,
+ 87, 83, 78, 74, 71, 69, 66, 64, 62, 61, 59, 57, 56, 55, 54, 53, 52, 51,
+ 50, 49, 49, 49, 49, 89, 94, 100, 105, 111, 106, 101, 95, 90, 86, 82, 77,
+ 73, 70, 68, 65, 62, 61, 59, 57, 56, 55, 53, 52, 51, 50, 49, 48, 47, 47,
+ 47, 47, 86, 91, 97, 102, 108, 103, 98, 94, 89, 85, 80, 76, 72, 69, 66,
+ 64, 61, 59, 57, 56, 54, 53, 52, 50, 49, 48, 47, 46, 45, 45, 45, 45, 83,
+ 88, 94, 99, 104, 100, 96, 92, 87, 83, 79, 75, 71, 68, 65, 62, 59, 57,
+ 56, 54, 52, 51, 50, 48, 47, 46, 45, 44, 44, 44, 44, 44, 80, 85, 90, 96,
+ 101, 97, 93, 89, 85, 81, 77, 74, 70, 67, 64, 61, 58, 56, 55, 53, 51, 50,
+ 48, 47, 46, 45, 44, 43, 42, 42, 42, 42, 77, 82, 87, 92, 97, 94, 90, 87,
+ 83, 79, 76, 72, 68, 65, 63, 60, 57, 55, 53, 52, 50, 48, 47, 46, 45, 44,
+ 43, 42, 41, 41, 41, 41, 74, 79, 84, 89, 94, 91, 87, 84, 81, 77, 74, 70,
+ 67, 64, 61, 59, 56, 54, 52, 50, 48, 47, 46, 45, 43, 42, 42, 41, 40, 40,
+ 40, 40, 71, 76, 80, 85, 90, 87, 85, 82, 79, 75, 72, 69, 66, 63, 60, 58,
+ 55, 53, 51, 49, 47, 46, 45, 43, 42, 41, 40, 39, 38, 38, 38, 38, 68, 73,
+ 78, 82, 87, 84, 82, 79, 76, 73, 70, 67, 64, 62, 59, 56, 54, 52, 50, 48,
+ 46, 45, 44, 42, 41, 40, 39, 38, 37, 37, 37, 37, 65, 70, 75, 79, 84, 81,
+ 79, 76, 74, 71, 68, 65, 63, 60, 58, 55, 53, 51, 49, 47, 45, 44, 43, 42,
+ 40, 39, 38, 37, 37, 37, 37, 37, 63, 67, 72, 76, 80, 78, 76, 74, 72, 69,
+ 66, 64, 61, 59, 56, 54, 52, 50, 48, 46, 44, 43, 42, 41, 39, 38, 37, 37,
+ 36, 36, 36, 36, 60, 64, 69, 73, 77, 75, 73, 71, 69, 67, 64, 62, 60, 57,
+ 55, 53, 51, 49, 47, 45, 44, 42, 41, 40, 38, 37, 37, 36, 35, 35, 35, 35,
+ 60, 64, 69, 73, 77, 75, 73, 71, 69, 67, 64, 62, 60, 57, 55, 53, 51, 49,
+ 47, 45, 44, 42, 41, 40, 38, 37, 37, 36, 35, 35, 35, 35, 60, 64, 69, 73,
+ 77, 75, 73, 71, 69, 67, 64, 62, 60, 57, 55, 53, 51, 49, 47, 45, 44, 42,
+ 41, 40, 38, 37, 37, 36, 35, 35, 35, 35, 60, 64, 69, 73, 77, 75, 73, 71,
+ 69, 67, 64, 62, 60, 57, 55, 53, 51, 49, 47, 45, 44, 42, 41, 40, 38, 37,
+ 37, 36, 35, 35, 35, 35 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 58, 34, 21, 58, 38, 26, 19, 34, 26, 18, 14, 21, 19, 14, 12,
+ /* Size 8 */
+ 64, 86, 80, 59, 43, 33, 25, 21, 86, 75, 78, 66, 51, 39, 30, 24, 80, 78,
+ 55, 46, 39, 32, 27, 22, 59, 66, 46, 36, 30, 26, 23, 20, 43, 51, 39, 30,
+ 25, 22, 19, 17, 33, 39, 32, 26, 22, 19, 17, 16, 25, 30, 27, 23, 19, 17,
+ 16, 14, 21, 24, 22, 20, 17, 16, 14, 14,
+ /* Size 16 */
+ 64, 75, 86, 83, 80, 70, 59, 51, 43, 38, 33, 29, 25, 23, 21, 21, 75, 78,
+ 81, 80, 79, 71, 63, 55, 47, 41, 36, 32, 28, 25, 23, 23, 86, 81, 75, 77,
+ 78, 72, 66, 59, 51, 45, 39, 35, 30, 27, 24, 24, 83, 80, 77, 72, 67, 61,
+ 56, 51, 45, 40, 36, 32, 29, 26, 23, 23, 80, 79, 78, 67, 55, 51, 46, 43,
+ 39, 36, 32, 29, 27, 24, 22, 22, 70, 71, 72, 61, 51, 46, 41, 38, 35, 32,
+ 29, 27, 25, 23, 21, 21, 59, 63, 66, 56, 46, 41, 36, 33, 30, 28, 26, 24,
+ 23, 21, 20, 20, 51, 55, 59, 51, 43, 38, 33, 30, 28, 26, 24, 22, 21, 20,
+ 19, 19, 43, 47, 51, 45, 39, 35, 30, 28, 25, 23, 22, 21, 19, 18, 17, 17,
+ 38, 41, 45, 40, 36, 32, 28, 26, 23, 22, 20, 19, 18, 17, 17, 17, 33, 36,
+ 39, 36, 32, 29, 26, 24, 22, 20, 19, 18, 17, 16, 16, 16, 29, 32, 35, 32,
+ 29, 27, 24, 22, 21, 19, 18, 17, 16, 16, 15, 15, 25, 28, 30, 29, 27, 25,
+ 23, 21, 19, 18, 17, 16, 16, 15, 14, 14, 23, 25, 27, 26, 24, 23, 21, 20,
+ 18, 17, 16, 16, 15, 14, 14, 14, 21, 23, 24, 23, 22, 21, 20, 19, 17, 17,
+ 16, 15, 14, 14, 14, 14, 21, 23, 24, 23, 22, 21, 20, 19, 17, 17, 16, 15,
+ 14, 14, 14, 14,
+ /* Size 32 */
+ 64, 70, 75, 81, 86, 85, 83, 81, 80, 75, 70, 64, 59, 55, 51, 47, 43, 41,
+ 38, 35, 33, 31, 29, 27, 25, 24, 23, 22, 21, 21, 21, 21, 70, 73, 77, 80,
+ 84, 82, 81, 80, 79, 75, 70, 66, 61, 57, 53, 49, 45, 43, 40, 37, 34, 32,
+ 30, 29, 27, 25, 24, 23, 22, 22, 22, 22, 75, 77, 78, 79, 81, 80, 80, 79,
+ 79, 75, 71, 67, 63, 59, 55, 51, 47, 44, 41, 39, 36, 34, 32, 30, 28, 27,
+ 25, 24, 23, 23, 23, 23, 81, 80, 79, 78, 78, 78, 78, 78, 79, 75, 72, 68,
+ 65, 61, 57, 53, 49, 46, 43, 40, 37, 35, 33, 31, 29, 28, 26, 25, 24, 24,
+ 24, 24, 86, 84, 81, 78, 75, 76, 77, 77, 78, 75, 72, 69, 66, 62, 59, 55,
+ 51, 48, 45, 42, 39, 37, 35, 33, 30, 29, 27, 26, 24, 24, 24, 24, 85, 82,
+ 80, 78, 76, 75, 74, 73, 72, 70, 67, 64, 61, 58, 55, 51, 48, 45, 43, 40,
+ 37, 35, 33, 31, 29, 28, 27, 25, 24, 24, 24, 24, 83, 81, 80, 78, 77, 74,
+ 72, 69, 67, 64, 61, 59, 56, 54, 51, 48, 45, 43, 40, 38, 36, 34, 32, 30,
+ 29, 27, 26, 25, 23, 23, 23, 23, 81, 80, 79, 78, 77, 73, 69, 65, 61, 58,
+ 56, 54, 51, 49, 47, 44, 42, 40, 38, 36, 34, 32, 31, 29, 28, 26, 25, 24,
+ 23, 23, 23, 23, 80, 79, 79, 79, 78, 72, 67, 61, 55, 53, 51, 48, 46, 45,
+ 43, 41, 39, 37, 36, 34, 32, 31, 29, 28, 27, 26, 24, 23, 22, 22, 22, 22,
+ 75, 75, 75, 75, 75, 70, 64, 58, 53, 50, 48, 46, 44, 42, 40, 39, 37, 35,
+ 34, 32, 31, 29, 28, 27, 26, 25, 24, 23, 22, 22, 22, 22, 70, 70, 71, 72,
+ 72, 67, 61, 56, 51, 48, 46, 43, 41, 39, 38, 36, 35, 33, 32, 31, 29, 28,
+ 27, 26, 25, 24, 23, 22, 21, 21, 21, 21, 64, 66, 67, 68, 69, 64, 59, 54,
+ 48, 46, 43, 41, 38, 37, 35, 34, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23,
+ 22, 21, 20, 20, 20, 20, 59, 61, 63, 65, 66, 61, 56, 51, 46, 44, 41, 38,
+ 36, 34, 33, 32, 30, 29, 28, 27, 26, 25, 24, 23, 23, 22, 21, 20, 20, 20,
+ 20, 20, 55, 57, 59, 61, 62, 58, 54, 49, 45, 42, 39, 37, 34, 33, 32, 30,
+ 29, 28, 27, 26, 25, 24, 23, 23, 22, 21, 20, 20, 19, 19, 19, 19, 51, 53,
+ 55, 57, 59, 55, 51, 47, 43, 40, 38, 35, 33, 32, 30, 29, 28, 27, 26, 25,
+ 24, 23, 22, 22, 21, 20, 20, 19, 19, 19, 19, 19, 47, 49, 51, 53, 55, 51,
+ 48, 44, 41, 39, 36, 34, 32, 30, 29, 28, 26, 25, 24, 24, 23, 22, 21, 21,
+ 20, 20, 19, 19, 18, 18, 18, 18, 43, 45, 47, 49, 51, 48, 45, 42, 39, 37,
+ 35, 32, 30, 29, 28, 26, 25, 24, 23, 22, 22, 21, 21, 20, 19, 19, 18, 18,
+ 17, 17, 17, 17, 41, 43, 44, 46, 48, 45, 43, 40, 37, 35, 33, 31, 29, 28,
+ 27, 25, 24, 23, 23, 22, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 17, 17,
+ 38, 40, 41, 43, 45, 43, 40, 38, 36, 34, 32, 30, 28, 27, 26, 24, 23, 23,
+ 22, 21, 20, 20, 19, 19, 18, 18, 17, 17, 17, 17, 17, 17, 35, 37, 39, 40,
+ 42, 40, 38, 36, 34, 32, 31, 29, 27, 26, 25, 24, 22, 22, 21, 20, 20, 19,
+ 19, 18, 18, 17, 17, 17, 16, 16, 16, 16, 33, 34, 36, 37, 39, 37, 36, 34,
+ 32, 31, 29, 28, 26, 25, 24, 23, 22, 21, 20, 20, 19, 18, 18, 18, 17, 17,
+ 16, 16, 16, 16, 16, 16, 31, 32, 34, 35, 37, 35, 34, 32, 31, 29, 28, 27,
+ 25, 24, 23, 22, 21, 20, 20, 19, 18, 18, 18, 17, 17, 16, 16, 16, 15, 15,
+ 15, 15, 29, 30, 32, 33, 35, 33, 32, 31, 29, 28, 27, 26, 24, 23, 22, 21,
+ 21, 20, 19, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 27, 29,
+ 30, 31, 33, 31, 30, 29, 28, 27, 26, 25, 23, 23, 22, 21, 20, 19, 19, 18,
+ 18, 17, 17, 16, 16, 16, 15, 15, 15, 15, 15, 15, 25, 27, 28, 29, 30, 29,
+ 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 19, 18, 18, 17, 17, 16, 16,
+ 16, 15, 15, 15, 14, 14, 14, 14, 24, 25, 27, 28, 29, 28, 27, 26, 26, 25,
+ 24, 23, 22, 21, 20, 20, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15, 14,
+ 14, 14, 14, 14, 23, 24, 25, 26, 27, 27, 26, 25, 24, 24, 23, 22, 21, 20,
+ 20, 19, 18, 18, 17, 17, 16, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14,
+ 22, 23, 24, 25, 26, 25, 25, 24, 23, 23, 22, 21, 20, 20, 19, 19, 18, 17,
+ 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 21, 22, 23, 24,
+ 24, 24, 23, 23, 22, 22, 21, 20, 20, 19, 19, 18, 17, 17, 17, 16, 16, 15,
+ 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 21, 22, 23, 24, 24, 24, 23, 23,
+ 22, 22, 21, 20, 20, 19, 19, 18, 17, 17, 17, 16, 16, 15, 15, 15, 14, 14,
+ 14, 14, 14, 14, 14, 14, 21, 22, 23, 24, 24, 24, 23, 23, 22, 22, 21, 20,
+ 20, 19, 19, 18, 17, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14,
+ 14, 14, 21, 22, 23, 24, 24, 24, 23, 23, 22, 22, 21, 20, 20, 19, 19, 18,
+ 17, 17, 17, 16, 16, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 217, 196, 112, 65, 196, 124, 84, 58, 112, 84, 55, 43, 65, 58, 43, 34,
+ /* Size 8 */
+ 180, 246, 226, 167, 120, 88, 67, 53, 246, 212, 222, 187, 142, 107, 81,
+ 64, 226, 222, 153, 128, 107, 87, 71, 58, 167, 187, 128, 97, 81, 69, 59,
+ 50, 120, 142, 107, 81, 65, 56, 49, 44, 88, 107, 87, 69, 56, 48, 43, 39,
+ 67, 81, 71, 59, 49, 43, 38, 35, 53, 64, 58, 50, 44, 39, 35, 32,
+ /* Size 16 */
+ 188, 223, 257, 246, 236, 205, 174, 150, 125, 108, 92, 81, 70, 63, 56,
+ 56, 223, 231, 239, 237, 234, 209, 185, 161, 137, 119, 102, 90, 78, 70,
+ 62, 62, 257, 239, 221, 227, 232, 214, 195, 172, 148, 130, 111, 98, 85,
+ 76, 67, 67, 246, 237, 227, 211, 196, 180, 165, 148, 130, 116, 101, 90,
+ 80, 72, 64, 64, 236, 234, 232, 196, 160, 147, 134, 123, 112, 102, 91,
+ 83, 74, 67, 61, 61, 205, 209, 214, 180, 147, 133, 118, 108, 98, 90, 82,
+ 75, 68, 62, 57, 57, 174, 185, 195, 165, 134, 118, 102, 93, 84, 78, 72,
+ 67, 61, 57, 53, 53, 150, 161, 172, 148, 123, 108, 93, 85, 76, 71, 65,
+ 61, 56, 53, 49, 49, 125, 137, 148, 130, 112, 98, 84, 76, 68, 64, 59, 55,
+ 51, 49, 46, 46, 108, 119, 130, 116, 102, 90, 78, 71, 64, 59, 54, 51, 48,
+ 46, 43, 43, 92, 102, 111, 101, 91, 82, 72, 65, 59, 54, 50, 47, 45, 42,
+ 40, 40, 81, 90, 98, 90, 83, 75, 67, 61, 55, 51, 47, 45, 42, 40, 38, 38,
+ 70, 78, 85, 80, 74, 68, 61, 56, 51, 48, 45, 42, 40, 38, 36, 36, 63, 70,
+ 76, 72, 67, 62, 57, 53, 49, 46, 42, 40, 38, 37, 35, 35, 56, 62, 67, 64,
+ 61, 57, 53, 49, 46, 43, 40, 38, 36, 35, 34, 34, 56, 62, 67, 64, 61, 57,
+ 53, 49, 46, 43, 40, 38, 36, 35, 34, 34,
+ /* Size 32 */
+ 193, 210, 228, 245, 263, 258, 252, 247, 241, 226, 210, 194, 178, 166,
+ 153, 141, 128, 120, 111, 103, 94, 89, 83, 77, 72, 68, 65, 61, 57, 57,
+ 57, 57, 210, 221, 232, 243, 254, 250, 247, 244, 240, 226, 212, 198, 184,
+ 171, 159, 146, 134, 125, 117, 108, 99, 93, 87, 82, 76, 72, 68, 64, 60,
+ 60, 60, 60, 228, 232, 236, 240, 245, 243, 242, 241, 239, 227, 214, 202,
+ 189, 177, 165, 152, 140, 131, 122, 113, 104, 98, 92, 86, 80, 75, 71, 67,
+ 63, 63, 63, 63, 245, 243, 240, 238, 236, 236, 237, 238, 238, 227, 216,
+ 205, 194, 182, 170, 158, 146, 137, 127, 118, 109, 103, 96, 90, 83, 79,
+ 75, 70, 66, 66, 66, 66, 263, 254, 245, 236, 227, 229, 232, 235, 238,
+ 228, 219, 209, 200, 188, 176, 164, 152, 142, 133, 123, 114, 107, 101,
+ 94, 87, 83, 78, 73, 69, 69, 69, 69, 258, 250, 243, 236, 229, 227, 224,
+ 222, 219, 210, 202, 193, 184, 174, 163, 153, 143, 134, 126, 117, 109,
+ 103, 97, 90, 84, 80, 76, 71, 67, 67, 67, 67, 252, 247, 242, 237, 232,
+ 224, 216, 208, 201, 193, 185, 177, 169, 160, 151, 142, 133, 126, 118,
+ 111, 104, 98, 93, 87, 81, 77, 73, 69, 65, 65, 65, 65, 247, 244, 241,
+ 238, 235, 222, 208, 195, 182, 175, 168, 160, 153, 146, 139, 131, 124,
+ 118, 111, 105, 98, 93, 88, 83, 78, 75, 71, 67, 64, 64, 64, 64, 241, 240,
+ 239, 238, 238, 219, 201, 182, 164, 157, 151, 144, 137, 132, 126, 120,
+ 115, 109, 104, 99, 93, 89, 84, 80, 76, 72, 69, 65, 62, 62, 62, 62, 226,
+ 226, 227, 227, 228, 210, 193, 175, 157, 150, 143, 136, 129, 124, 118,
+ 113, 108, 103, 98, 93, 88, 84, 80, 76, 72, 69, 66, 63, 60, 60, 60, 60,
+ 210, 212, 214, 216, 219, 202, 185, 168, 151, 143, 136, 128, 121, 116,
+ 111, 106, 101, 96, 92, 88, 83, 80, 76, 73, 69, 66, 64, 61, 58, 58, 58,
+ 58, 194, 198, 202, 205, 209, 193, 177, 160, 144, 136, 128, 120, 112,
+ 108, 103, 98, 94, 90, 86, 82, 79, 75, 72, 69, 66, 64, 61, 59, 56, 56,
+ 56, 56, 178, 184, 189, 194, 200, 184, 169, 153, 137, 129, 121, 112, 104,
+ 100, 95, 91, 86, 83, 80, 77, 74, 71, 68, 66, 63, 61, 58, 56, 54, 54, 54,
+ 54, 166, 171, 177, 182, 188, 174, 160, 146, 132, 124, 116, 108, 100, 95,
+ 91, 87, 82, 79, 76, 73, 70, 68, 65, 63, 60, 58, 56, 54, 52, 52, 52, 52,
+ 153, 159, 165, 170, 176, 163, 151, 139, 126, 118, 111, 103, 95, 91, 87,
+ 82, 78, 75, 73, 70, 67, 65, 62, 60, 58, 56, 54, 52, 50, 50, 50, 50, 141,
+ 146, 152, 158, 164, 153, 142, 131, 120, 113, 106, 98, 91, 87, 82, 78,
+ 74, 71, 69, 66, 63, 61, 59, 57, 55, 54, 52, 50, 49, 49, 49, 49, 128,
+ 134, 140, 146, 152, 143, 133, 124, 115, 108, 101, 94, 86, 82, 78, 74,
+ 70, 68, 65, 63, 60, 58, 56, 54, 53, 51, 50, 48, 47, 47, 47, 47, 120,
+ 125, 131, 137, 142, 134, 126, 118, 109, 103, 96, 90, 83, 79, 75, 71, 68,
+ 65, 63, 60, 58, 56, 54, 53, 51, 50, 48, 47, 45, 45, 45, 45, 111, 117,
+ 122, 127, 133, 126, 118, 111, 104, 98, 92, 86, 80, 76, 73, 69, 65, 63,
+ 60, 58, 56, 54, 52, 51, 49, 48, 47, 45, 44, 44, 44, 44, 103, 108, 113,
+ 118, 123, 117, 111, 105, 99, 93, 88, 82, 77, 73, 70, 66, 63, 60, 58, 56,
+ 53, 52, 50, 49, 47, 46, 45, 44, 43, 43, 43, 43, 94, 99, 104, 109, 114,
+ 109, 104, 98, 93, 88, 83, 79, 74, 70, 67, 63, 60, 58, 56, 53, 51, 50,
+ 48, 47, 46, 44, 43, 42, 41, 41, 41, 41, 89, 93, 98, 103, 107, 103, 98,
+ 93, 89, 84, 80, 75, 71, 68, 65, 61, 58, 56, 54, 52, 50, 49, 47, 46, 44,
+ 43, 42, 41, 40, 40, 40, 40, 83, 87, 92, 96, 101, 97, 93, 88, 84, 80, 76,
+ 72, 68, 65, 62, 59, 56, 54, 52, 50, 48, 47, 46, 44, 43, 42, 41, 40, 39,
+ 39, 39, 39, 77, 82, 86, 90, 94, 90, 87, 83, 80, 76, 73, 69, 66, 63, 60,
+ 57, 54, 53, 51, 49, 47, 46, 44, 43, 42, 41, 40, 39, 38, 38, 38, 38, 72,
+ 76, 80, 83, 87, 84, 81, 78, 76, 72, 69, 66, 63, 60, 58, 55, 53, 51, 49,
+ 47, 46, 44, 43, 42, 41, 40, 39, 38, 37, 37, 37, 37, 68, 72, 75, 79, 83,
+ 80, 77, 75, 72, 69, 66, 64, 61, 58, 56, 54, 51, 50, 48, 46, 44, 43, 42,
+ 41, 40, 39, 38, 37, 37, 37, 37, 37, 65, 68, 71, 75, 78, 76, 73, 71, 69,
+ 66, 64, 61, 58, 56, 54, 52, 50, 48, 47, 45, 43, 42, 41, 40, 39, 38, 37,
+ 37, 36, 36, 36, 36, 61, 64, 67, 70, 73, 71, 69, 67, 65, 63, 61, 59, 56,
+ 54, 52, 50, 48, 47, 45, 44, 42, 41, 40, 39, 38, 37, 37, 36, 35, 35, 35,
+ 35, 57, 60, 63, 66, 69, 67, 65, 64, 62, 60, 58, 56, 54, 52, 50, 49, 47,
+ 45, 44, 43, 41, 40, 39, 38, 37, 37, 36, 35, 34, 34, 34, 34, 57, 60, 63,
+ 66, 69, 67, 65, 64, 62, 60, 58, 56, 54, 52, 50, 49, 47, 45, 44, 43, 41,
+ 40, 39, 38, 37, 37, 36, 35, 34, 34, 34, 34, 57, 60, 63, 66, 69, 67, 65,
+ 64, 62, 60, 58, 56, 54, 52, 50, 49, 47, 45, 44, 43, 41, 40, 39, 38, 37,
+ 37, 36, 35, 34, 34, 34, 34, 57, 60, 63, 66, 69, 67, 65, 64, 62, 60, 58,
+ 56, 54, 52, 50, 49, 47, 45, 44, 43, 41, 40, 39, 38, 37, 37, 36, 35, 34,
+ 34, 34, 34 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 39, 36, 28, 39, 32, 30, 26, 36, 30, 23, 20, 28, 26, 20, 16,
+ /* Size 8 */
+ 64, 80, 43, 40, 37, 33, 29, 25, 80, 55, 42, 47, 45, 41, 36, 31, 43, 42,
+ 35, 37, 37, 35, 32, 29, 40, 47, 37, 32, 31, 29, 27, 25, 37, 45, 37, 31,
+ 27, 25, 24, 22, 33, 41, 35, 29, 25, 23, 21, 20, 29, 36, 32, 27, 24, 21,
+ 19, 18, 25, 31, 29, 25, 22, 20, 18, 17,
+ /* Size 16 */
+ 64, 72, 80, 61, 43, 41, 40, 39, 37, 35, 33, 31, 29, 27, 25, 25, 72, 70,
+ 67, 55, 42, 43, 43, 42, 41, 39, 37, 35, 32, 30, 28, 28, 80, 67, 55, 48,
+ 42, 44, 47, 46, 45, 43, 41, 38, 36, 34, 31, 31, 61, 55, 48, 43, 39, 40,
+ 42, 41, 41, 39, 38, 36, 34, 32, 30, 30, 43, 42, 42, 39, 35, 36, 37, 37,
+ 37, 36, 35, 33, 32, 30, 29, 29, 41, 43, 44, 40, 36, 35, 34, 34, 34, 33,
+ 32, 31, 30, 28, 27, 27, 40, 43, 47, 42, 37, 34, 32, 31, 31, 30, 29, 28,
+ 27, 26, 25, 25, 39, 42, 46, 41, 37, 34, 31, 30, 29, 28, 27, 26, 25, 25,
+ 24, 24, 37, 41, 45, 41, 37, 34, 31, 29, 27, 26, 25, 24, 24, 23, 22, 22,
+ 35, 39, 43, 39, 36, 33, 30, 28, 26, 25, 24, 23, 22, 22, 21, 21, 33, 37,
+ 41, 38, 35, 32, 29, 27, 25, 24, 23, 22, 21, 20, 20, 20, 31, 35, 38, 36,
+ 33, 31, 28, 26, 24, 23, 22, 21, 20, 19, 19, 19, 29, 32, 36, 34, 32, 30,
+ 27, 25, 24, 22, 21, 20, 19, 18, 18, 18, 27, 30, 34, 32, 30, 28, 26, 25,
+ 23, 22, 20, 19, 18, 18, 17, 17, 25, 28, 31, 30, 29, 27, 25, 24, 22, 21,
+ 20, 19, 18, 17, 17, 17, 25, 28, 31, 30, 29, 27, 25, 24, 22, 21, 20, 19,
+ 18, 17, 17, 17,
+ /* Size 32 */
+ 64, 68, 72, 76, 80, 71, 61, 52, 43, 42, 41, 41, 40, 39, 39, 38, 37, 36,
+ 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 25, 25, 25, 68, 69, 71, 72,
+ 74, 66, 58, 50, 43, 42, 42, 42, 42, 41, 41, 40, 39, 38, 37, 36, 35, 34,
+ 33, 32, 31, 30, 29, 28, 27, 27, 27, 27, 72, 71, 70, 68, 67, 61, 55, 49,
+ 42, 43, 43, 43, 43, 43, 42, 42, 41, 40, 39, 38, 37, 36, 35, 34, 32, 31,
+ 30, 29, 28, 28, 28, 28, 76, 72, 68, 65, 61, 56, 52, 47, 42, 43, 44, 44,
+ 45, 45, 44, 44, 43, 42, 41, 40, 39, 38, 37, 35, 34, 33, 32, 31, 30, 30,
+ 30, 30, 80, 74, 67, 61, 55, 51, 48, 45, 42, 43, 44, 45, 47, 46, 46, 46,
+ 45, 44, 43, 42, 41, 40, 38, 37, 36, 35, 34, 33, 31, 31, 31, 31, 71, 66,
+ 61, 56, 51, 49, 46, 43, 40, 41, 42, 43, 44, 44, 44, 43, 43, 42, 41, 40,
+ 39, 38, 37, 36, 35, 34, 33, 32, 31, 31, 31, 31, 61, 58, 55, 52, 48, 46,
+ 43, 41, 39, 39, 40, 41, 42, 42, 41, 41, 41, 40, 39, 39, 38, 37, 36, 35,
+ 34, 33, 32, 31, 30, 30, 30, 30, 52, 50, 49, 47, 45, 43, 41, 39, 37, 38,
+ 38, 39, 39, 39, 39, 39, 39, 38, 38, 37, 36, 36, 35, 34, 33, 32, 31, 30,
+ 29, 29, 29, 29, 43, 43, 42, 42, 42, 40, 39, 37, 35, 36, 36, 36, 37, 37,
+ 37, 37, 37, 36, 36, 35, 35, 34, 33, 33, 32, 31, 30, 29, 29, 29, 29, 29,
+ 42, 42, 43, 43, 43, 41, 39, 38, 36, 36, 36, 36, 36, 36, 35, 35, 35, 35,
+ 34, 34, 33, 33, 32, 31, 31, 30, 29, 28, 28, 28, 28, 28, 41, 42, 43, 44,
+ 44, 42, 40, 38, 36, 36, 35, 35, 34, 34, 34, 34, 34, 33, 33, 32, 32, 31,
+ 31, 30, 30, 29, 28, 28, 27, 27, 27, 27, 41, 42, 43, 44, 45, 43, 41, 39,
+ 36, 36, 35, 34, 33, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30, 29, 28, 28,
+ 27, 27, 26, 26, 26, 26, 40, 42, 43, 45, 47, 44, 42, 39, 37, 36, 34, 33,
+ 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26, 25, 25,
+ 25, 25, 39, 41, 43, 45, 46, 44, 42, 39, 37, 36, 34, 33, 32, 31, 31, 30,
+ 30, 29, 29, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 24, 39, 41,
+ 42, 44, 46, 44, 41, 39, 37, 35, 34, 33, 31, 31, 30, 29, 29, 28, 28, 28,
+ 27, 27, 26, 26, 25, 25, 25, 24, 24, 24, 24, 24, 38, 40, 42, 44, 46, 43,
+ 41, 39, 37, 35, 34, 32, 31, 30, 29, 29, 28, 27, 27, 27, 26, 26, 25, 25,
+ 25, 24, 24, 23, 23, 23, 23, 23, 37, 39, 41, 43, 45, 43, 41, 39, 37, 35,
+ 34, 32, 31, 30, 29, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 22,
+ 22, 22, 22, 22, 36, 38, 40, 42, 44, 42, 40, 38, 36, 35, 33, 32, 30, 29,
+ 28, 27, 27, 26, 26, 25, 24, 24, 24, 23, 23, 23, 22, 22, 21, 21, 21, 21,
+ 35, 37, 39, 41, 43, 41, 39, 38, 36, 34, 33, 31, 30, 29, 28, 27, 26, 26,
+ 25, 24, 24, 23, 23, 23, 22, 22, 22, 21, 21, 21, 21, 21, 34, 36, 38, 40,
+ 42, 40, 39, 37, 35, 34, 32, 31, 30, 29, 28, 27, 26, 25, 24, 24, 23, 23,
+ 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 33, 35, 37, 39, 41, 39, 38, 36,
+ 35, 33, 32, 31, 29, 28, 27, 26, 25, 24, 24, 23, 23, 22, 22, 21, 21, 21,
+ 20, 20, 20, 20, 20, 20, 32, 34, 36, 38, 40, 38, 37, 36, 34, 33, 31, 30,
+ 29, 28, 27, 26, 25, 24, 23, 23, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19,
+ 19, 19, 31, 33, 35, 37, 38, 37, 36, 35, 33, 32, 31, 30, 28, 27, 26, 25,
+ 24, 24, 23, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 30, 32,
+ 34, 35, 37, 36, 35, 34, 33, 31, 30, 29, 28, 27, 26, 25, 24, 23, 23, 22,
+ 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 18, 29, 31, 32, 34, 36, 35,
+ 34, 33, 32, 31, 30, 28, 27, 26, 25, 25, 24, 23, 22, 22, 21, 20, 20, 20,
+ 19, 19, 18, 18, 18, 18, 18, 18, 28, 30, 31, 33, 35, 34, 33, 32, 31, 30,
+ 29, 28, 27, 26, 25, 24, 23, 23, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18,
+ 18, 18, 18, 18, 27, 29, 30, 32, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25,
+ 25, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17,
+ 26, 28, 29, 31, 33, 32, 31, 30, 29, 28, 28, 27, 26, 25, 24, 23, 22, 22,
+ 21, 21, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 25, 27, 28, 30,
+ 31, 31, 30, 29, 29, 28, 27, 26, 25, 24, 24, 23, 22, 21, 21, 20, 20, 19,
+ 19, 18, 18, 18, 17, 17, 17, 17, 17, 17, 25, 27, 28, 30, 31, 31, 30, 29,
+ 29, 28, 27, 26, 25, 24, 24, 23, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18,
+ 17, 17, 17, 17, 17, 17, 25, 27, 28, 30, 31, 31, 30, 29, 29, 28, 27, 26,
+ 25, 24, 24, 23, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17,
+ 17, 17, 25, 27, 28, 30, 31, 31, 30, 29, 29, 28, 27, 26, 25, 24, 24, 23,
+ 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 17 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 162, 98, 89, 68, 98, 80, 74, 63, 89, 74, 55, 47, 68, 63, 47, 38,
+ /* Size 8 */
+ 151, 190, 99, 93, 86, 76, 66, 57, 190, 128, 97, 108, 105, 94, 82, 71,
+ 99, 97, 81, 84, 85, 80, 72, 64, 93, 108, 84, 73, 69, 66, 61, 56, 86,
+ 105, 85, 69, 61, 56, 52, 49, 76, 94, 80, 66, 56, 50, 46, 43, 66, 82, 72,
+ 61, 52, 46, 41, 38, 57, 71, 64, 56, 49, 43, 38, 35,
+ /* Size 16 */
+ 156, 176, 196, 149, 102, 99, 96, 92, 89, 83, 78, 73, 68, 63, 58, 58,
+ 176, 170, 164, 133, 101, 103, 104, 101, 99, 93, 88, 82, 76, 71, 66, 66,
+ 196, 164, 132, 116, 100, 106, 112, 110, 108, 103, 97, 91, 85, 79, 73,
+ 73, 149, 133, 116, 104, 92, 96, 99, 99, 98, 94, 90, 85, 80, 75, 70, 70,
+ 102, 101, 100, 92, 84, 85, 87, 87, 87, 85, 82, 79, 75, 71, 67, 67, 99,
+ 103, 106, 96, 85, 83, 81, 80, 79, 77, 75, 72, 69, 66, 62, 62, 96, 104,
+ 112, 99, 87, 81, 76, 74, 71, 70, 68, 66, 63, 61, 58, 58, 92, 101, 110,
+ 99, 87, 80, 74, 70, 67, 65, 63, 61, 59, 56, 54, 54, 89, 99, 108, 98, 87,
+ 79, 71, 67, 63, 60, 58, 56, 54, 52, 50, 50, 83, 93, 103, 94, 85, 77, 70,
+ 65, 60, 57, 55, 53, 51, 49, 47, 47, 78, 88, 97, 90, 82, 75, 68, 63, 58,
+ 55, 51, 49, 47, 46, 44, 44, 73, 82, 91, 85, 79, 72, 66, 61, 56, 53, 49,
+ 47, 45, 43, 42, 42, 68, 76, 85, 80, 75, 69, 63, 59, 54, 51, 47, 45, 43,
+ 41, 39, 39, 63, 71, 79, 75, 71, 66, 61, 56, 52, 49, 46, 43, 41, 39, 38,
+ 38, 58, 66, 73, 70, 67, 62, 58, 54, 50, 47, 44, 42, 39, 38, 36, 36, 58,
+ 66, 73, 70, 67, 62, 58, 54, 50, 47, 44, 42, 39, 38, 36, 36,
+ /* Size 32 */
+ 158, 169, 179, 189, 199, 175, 152, 128, 104, 102, 101, 99, 97, 96, 94,
+ 92, 90, 88, 85, 82, 79, 77, 74, 71, 69, 66, 64, 62, 59, 59, 59, 59, 169,
+ 172, 176, 179, 183, 163, 143, 123, 103, 103, 102, 102, 101, 100, 98, 97,
+ 95, 93, 90, 87, 84, 82, 79, 76, 73, 71, 68, 66, 63, 63, 63, 63, 179,
+ 176, 173, 170, 167, 151, 135, 119, 103, 104, 104, 105, 106, 104, 103,
+ 102, 100, 97, 95, 92, 89, 86, 83, 81, 78, 75, 72, 70, 67, 67, 67, 67,
+ 189, 179, 170, 160, 150, 138, 126, 114, 102, 104, 106, 108, 110, 109,
+ 107, 106, 105, 102, 100, 97, 94, 91, 88, 85, 82, 79, 76, 74, 71, 71, 71,
+ 71, 199, 183, 167, 150, 134, 126, 118, 110, 102, 105, 108, 111, 114,
+ 113, 112, 111, 110, 107, 105, 102, 99, 96, 93, 90, 86, 84, 81, 78, 75,
+ 75, 75, 75, 175, 163, 151, 138, 126, 119, 112, 105, 98, 100, 103, 105,
+ 107, 107, 106, 106, 105, 102, 100, 98, 95, 92, 90, 87, 84, 81, 78, 76,
+ 73, 73, 73, 73, 152, 143, 135, 126, 118, 112, 106, 100, 94, 95, 97, 99,
+ 101, 101, 100, 100, 100, 98, 96, 94, 91, 89, 86, 84, 81, 79, 76, 74, 71,
+ 71, 71, 71, 128, 123, 119, 114, 110, 105, 100, 95, 89, 91, 92, 94, 95,
+ 95, 95, 94, 94, 93, 91, 89, 88, 85, 83, 81, 79, 76, 74, 72, 69, 69, 69,
+ 69, 104, 103, 103, 102, 102, 98, 94, 89, 85, 86, 87, 88, 89, 89, 89, 89,
+ 89, 88, 86, 85, 84, 82, 80, 78, 76, 74, 72, 70, 68, 68, 68, 68, 102,
+ 103, 104, 104, 105, 100, 95, 91, 86, 86, 86, 86, 86, 85, 85, 85, 85, 84,
+ 83, 81, 80, 78, 77, 75, 73, 71, 69, 67, 66, 66, 66, 66, 101, 102, 104,
+ 106, 108, 103, 97, 92, 87, 86, 85, 84, 83, 82, 82, 81, 81, 80, 79, 78,
+ 77, 75, 73, 72, 70, 69, 67, 65, 63, 63, 63, 63, 99, 102, 105, 108, 111,
+ 105, 99, 94, 88, 86, 84, 82, 80, 79, 78, 78, 77, 76, 75, 74, 73, 71, 70,
+ 69, 67, 66, 64, 63, 61, 61, 61, 61, 97, 101, 106, 110, 114, 107, 101,
+ 95, 89, 86, 83, 80, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 64,
+ 63, 62, 60, 59, 59, 59, 59, 96, 100, 104, 109, 113, 107, 101, 95, 89,
+ 85, 82, 79, 76, 74, 73, 72, 70, 69, 68, 68, 67, 65, 64, 63, 62, 61, 60,
+ 58, 57, 57, 57, 57, 94, 98, 103, 107, 112, 106, 100, 95, 89, 85, 82, 78,
+ 75, 73, 72, 70, 68, 67, 66, 65, 64, 63, 62, 61, 60, 58, 57, 56, 55, 55,
+ 55, 55, 92, 97, 102, 106, 111, 106, 100, 94, 89, 85, 81, 78, 74, 72, 70,
+ 68, 66, 65, 64, 63, 61, 60, 59, 58, 57, 56, 55, 54, 53, 53, 53, 53, 90,
+ 95, 100, 105, 110, 105, 100, 94, 89, 85, 81, 77, 73, 70, 68, 66, 64, 63,
+ 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 51, 51, 51, 88, 93, 97, 102,
+ 107, 102, 98, 93, 88, 84, 80, 76, 72, 69, 67, 65, 63, 61, 60, 59, 57,
+ 56, 55, 54, 53, 52, 51, 50, 49, 49, 49, 49, 85, 90, 95, 100, 105, 100,
+ 96, 91, 86, 83, 79, 75, 71, 68, 66, 64, 61, 60, 58, 57, 56, 55, 53, 52,
+ 51, 51, 50, 49, 48, 48, 48, 48, 82, 87, 92, 97, 102, 98, 94, 89, 85, 81,
+ 78, 74, 70, 68, 65, 63, 60, 59, 57, 55, 54, 53, 52, 51, 50, 49, 48, 47,
+ 46, 46, 46, 46, 79, 84, 89, 94, 99, 95, 91, 88, 84, 80, 77, 73, 69, 67,
+ 64, 61, 59, 57, 56, 54, 52, 51, 50, 49, 48, 47, 46, 46, 45, 45, 45, 45,
+ 77, 82, 86, 91, 96, 92, 89, 85, 82, 78, 75, 71, 68, 65, 63, 60, 58, 56,
+ 55, 53, 51, 50, 49, 48, 47, 46, 45, 44, 44, 44, 44, 44, 74, 79, 83, 88,
+ 93, 90, 86, 83, 80, 77, 73, 70, 67, 64, 62, 59, 57, 55, 53, 52, 50, 49,
+ 48, 47, 46, 45, 44, 43, 42, 42, 42, 42, 71, 76, 81, 85, 90, 87, 84, 81,
+ 78, 75, 72, 69, 66, 63, 61, 58, 56, 54, 52, 51, 49, 48, 47, 46, 45, 44,
+ 43, 42, 41, 41, 41, 41, 69, 73, 78, 82, 86, 84, 81, 79, 76, 73, 70, 67,
+ 64, 62, 60, 57, 55, 53, 51, 50, 48, 47, 46, 45, 43, 43, 42, 41, 40, 40,
+ 40, 40, 66, 71, 75, 79, 84, 81, 79, 76, 74, 71, 69, 66, 63, 61, 58, 56,
+ 54, 52, 51, 49, 47, 46, 45, 44, 43, 42, 41, 40, 39, 39, 39, 39, 64, 68,
+ 72, 76, 81, 78, 76, 74, 72, 69, 67, 64, 62, 60, 57, 55, 53, 51, 50, 48,
+ 46, 45, 44, 43, 42, 41, 40, 39, 38, 38, 38, 38, 62, 66, 70, 74, 78, 76,
+ 74, 72, 70, 67, 65, 63, 60, 58, 56, 54, 52, 50, 49, 47, 46, 44, 43, 42,
+ 41, 40, 39, 38, 38, 38, 38, 38, 59, 63, 67, 71, 75, 73, 71, 69, 68, 66,
+ 63, 61, 59, 57, 55, 53, 51, 49, 48, 46, 45, 44, 42, 41, 40, 39, 38, 38,
+ 37, 37, 37, 37, 59, 63, 67, 71, 75, 73, 71, 69, 68, 66, 63, 61, 59, 57,
+ 55, 53, 51, 49, 48, 46, 45, 44, 42, 41, 40, 39, 38, 38, 37, 37, 37, 37,
+ 59, 63, 67, 71, 75, 73, 71, 69, 68, 66, 63, 61, 59, 57, 55, 53, 51, 49,
+ 48, 46, 45, 44, 42, 41, 40, 39, 38, 38, 37, 37, 37, 37, 59, 63, 67, 71,
+ 75, 73, 71, 69, 68, 66, 63, 61, 59, 57, 55, 53, 51, 49, 48, 46, 45, 44,
+ 42, 41, 40, 39, 38, 38, 37, 37, 37, 37 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 58, 35, 23, 58, 39, 28, 21, 35, 28, 20, 16, 23, 21, 16, 14,
+ /* Size 8 */
+ 64, 85, 79, 60, 44, 34, 27, 23, 85, 74, 78, 66, 52, 40, 32, 26, 79, 78,
+ 55, 47, 40, 34, 29, 24, 60, 66, 47, 37, 32, 28, 25, 22, 44, 52, 40, 32,
+ 27, 24, 22, 20, 34, 40, 34, 28, 24, 21, 19, 18, 27, 32, 29, 25, 22, 19,
+ 18, 17, 23, 26, 24, 22, 20, 18, 17, 16,
+ /* Size 16 */
+ 64, 75, 85, 82, 79, 69, 60, 52, 44, 39, 34, 31, 27, 25, 23, 23, 75, 77,
+ 80, 79, 78, 71, 63, 55, 48, 43, 37, 33, 30, 27, 25, 25, 85, 80, 74, 76,
+ 78, 72, 66, 59, 52, 46, 40, 36, 32, 29, 26, 26, 82, 79, 76, 71, 66, 62,
+ 57, 51, 46, 42, 37, 34, 30, 28, 25, 25, 79, 78, 78, 66, 55, 51, 47, 44,
+ 40, 37, 34, 31, 29, 26, 24, 24, 69, 71, 72, 62, 51, 47, 42, 39, 36, 33,
+ 31, 29, 27, 25, 23, 23, 60, 63, 66, 57, 47, 42, 37, 34, 32, 30, 28, 26,
+ 25, 23, 22, 22, 52, 55, 59, 51, 44, 39, 34, 32, 29, 28, 26, 24, 23, 22,
+ 21, 21, 44, 48, 52, 46, 40, 36, 32, 29, 27, 25, 24, 23, 22, 21, 20, 20,
+ 39, 43, 46, 42, 37, 33, 30, 28, 25, 24, 22, 21, 20, 20, 19, 19, 34, 37,
+ 40, 37, 34, 31, 28, 26, 24, 22, 21, 20, 19, 19, 18, 18, 31, 33, 36, 34,
+ 31, 29, 26, 24, 23, 21, 20, 19, 19, 18, 17, 17, 27, 30, 32, 30, 29, 27,
+ 25, 23, 22, 20, 19, 19, 18, 17, 17, 17, 25, 27, 29, 28, 26, 25, 23, 22,
+ 21, 20, 19, 18, 17, 17, 16, 16, 23, 25, 26, 25, 24, 23, 22, 21, 20, 19,
+ 18, 17, 17, 16, 16, 16, 23, 25, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+ 17, 16, 16, 16,
+ /* Size 32 */
+ 64, 69, 75, 80, 85, 84, 82, 80, 79, 74, 69, 64, 60, 56, 52, 48, 44, 42,
+ 39, 37, 34, 32, 31, 29, 27, 26, 25, 24, 23, 23, 23, 23, 69, 73, 76, 79,
+ 83, 82, 81, 80, 78, 74, 70, 66, 61, 58, 54, 50, 46, 44, 41, 38, 36, 34,
+ 32, 30, 29, 27, 26, 25, 24, 24, 24, 24, 75, 76, 77, 79, 80, 79, 79, 79,
+ 78, 74, 71, 67, 63, 59, 55, 52, 48, 45, 43, 40, 37, 35, 33, 32, 30, 28,
+ 27, 26, 25, 25, 25, 25, 80, 79, 79, 78, 77, 77, 77, 78, 78, 75, 71, 68,
+ 65, 61, 57, 54, 50, 47, 44, 41, 39, 37, 35, 33, 31, 30, 28, 27, 26, 26,
+ 26, 26, 85, 83, 80, 77, 74, 75, 76, 77, 78, 75, 72, 69, 66, 63, 59, 55,
+ 52, 49, 46, 43, 40, 38, 36, 34, 32, 31, 29, 28, 26, 26, 26, 26, 84, 82,
+ 79, 77, 75, 74, 74, 73, 72, 69, 67, 64, 61, 58, 55, 52, 49, 46, 44, 41,
+ 39, 37, 35, 33, 31, 30, 29, 27, 26, 26, 26, 26, 82, 81, 79, 77, 76, 74,
+ 71, 69, 66, 64, 62, 59, 57, 54, 51, 49, 46, 44, 42, 39, 37, 35, 34, 32,
+ 30, 29, 28, 27, 25, 25, 25, 25, 80, 80, 79, 78, 77, 73, 69, 65, 61, 59,
+ 56, 54, 52, 50, 48, 45, 43, 41, 39, 37, 35, 34, 32, 31, 29, 28, 27, 26,
+ 25, 25, 25, 25, 79, 78, 78, 78, 78, 72, 66, 61, 55, 53, 51, 49, 47, 46,
+ 44, 42, 40, 39, 37, 36, 34, 33, 31, 30, 29, 27, 26, 25, 24, 24, 24, 24,
+ 74, 74, 74, 75, 75, 69, 64, 59, 53, 51, 49, 47, 45, 43, 41, 40, 38, 37,
+ 35, 34, 32, 31, 30, 29, 28, 27, 26, 25, 24, 24, 24, 24, 69, 70, 71, 71,
+ 72, 67, 62, 56, 51, 49, 47, 44, 42, 41, 39, 38, 36, 35, 33, 32, 31, 30,
+ 29, 28, 27, 26, 25, 24, 23, 23, 23, 23, 64, 66, 67, 68, 69, 64, 59, 54,
+ 49, 47, 44, 42, 40, 38, 37, 35, 34, 33, 32, 31, 29, 28, 28, 27, 26, 25,
+ 24, 23, 23, 23, 23, 23, 60, 61, 63, 65, 66, 61, 57, 52, 47, 45, 42, 40,
+ 37, 36, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 25, 24, 23, 23, 22, 22,
+ 22, 22, 56, 58, 59, 61, 63, 58, 54, 50, 46, 43, 41, 38, 36, 34, 33, 32,
+ 31, 30, 29, 28, 27, 26, 25, 25, 24, 23, 23, 22, 21, 21, 21, 21, 52, 54,
+ 55, 57, 59, 55, 51, 48, 44, 41, 39, 37, 34, 33, 32, 31, 29, 28, 28, 27,
+ 26, 25, 24, 24, 23, 23, 22, 21, 21, 21, 21, 21, 48, 50, 52, 54, 55, 52,
+ 49, 45, 42, 40, 38, 35, 33, 32, 31, 29, 28, 27, 26, 26, 25, 24, 24, 23,
+ 22, 22, 21, 21, 20, 20, 20, 20, 44, 46, 48, 50, 52, 49, 46, 43, 40, 38,
+ 36, 34, 32, 31, 29, 28, 27, 26, 25, 25, 24, 23, 23, 22, 22, 21, 21, 20,
+ 20, 20, 20, 20, 42, 44, 45, 47, 49, 46, 44, 41, 39, 37, 35, 33, 31, 30,
+ 28, 27, 26, 25, 25, 24, 23, 23, 22, 22, 21, 21, 20, 20, 19, 19, 19, 19,
+ 39, 41, 43, 44, 46, 44, 42, 39, 37, 35, 33, 32, 30, 29, 28, 26, 25, 25,
+ 24, 23, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 37, 38, 40, 41,
+ 43, 41, 39, 37, 36, 34, 32, 31, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21,
+ 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 34, 36, 37, 39, 40, 39, 37, 35,
+ 34, 32, 31, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 21, 20, 20, 19, 19,
+ 19, 18, 18, 18, 18, 18, 32, 34, 35, 37, 38, 37, 35, 34, 33, 31, 30, 28,
+ 27, 26, 25, 24, 23, 23, 22, 21, 21, 20, 20, 19, 19, 19, 18, 18, 18, 18,
+ 18, 18, 31, 32, 33, 35, 36, 35, 34, 32, 31, 30, 29, 28, 26, 25, 24, 24,
+ 23, 22, 21, 21, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 29, 30,
+ 32, 33, 34, 33, 32, 31, 30, 29, 28, 27, 25, 25, 24, 23, 22, 22, 21, 20,
+ 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 17, 17, 27, 29, 30, 31, 32, 31,
+ 30, 29, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 20, 19, 19, 19, 18,
+ 18, 18, 17, 17, 17, 17, 17, 17, 26, 27, 28, 30, 31, 30, 29, 28, 27, 27,
+ 26, 25, 24, 23, 23, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17,
+ 17, 17, 17, 17, 25, 26, 27, 28, 29, 29, 28, 27, 26, 26, 25, 24, 23, 23,
+ 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16,
+ 24, 25, 26, 27, 28, 27, 27, 26, 25, 25, 24, 23, 23, 22, 21, 21, 20, 20,
+ 19, 19, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 23, 24, 25, 26,
+ 26, 26, 25, 25, 24, 24, 23, 23, 22, 21, 21, 20, 20, 19, 19, 19, 18, 18,
+ 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 23, 24, 25, 26, 26, 26, 25, 25,
+ 24, 24, 23, 23, 22, 21, 21, 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17,
+ 16, 16, 16, 16, 16, 16, 23, 24, 25, 26, 26, 26, 25, 25, 24, 24, 23, 23,
+ 22, 21, 21, 20, 20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16,
+ 16, 16, 23, 24, 25, 26, 26, 26, 25, 25, 24, 24, 23, 23, 22, 21, 21, 20,
+ 20, 19, 19, 19, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 192, 174, 103, 63, 174, 113, 79, 57, 103, 79, 55, 44, 63, 57, 44, 37,
+ /* Size 8 */
+ 160, 216, 199, 149, 109, 82, 65, 53, 216, 187, 195, 166, 128, 98, 77,
+ 62, 199, 195, 137, 116, 98, 81, 67, 57, 149, 166, 116, 90, 76, 66, 57,
+ 50, 109, 128, 98, 76, 63, 55, 49, 45, 82, 98, 81, 66, 55, 48, 44, 40,
+ 65, 77, 67, 57, 49, 44, 40, 37, 53, 62, 57, 50, 45, 40, 37, 35,
+ /* Size 16 */
+ 167, 195, 224, 216, 207, 181, 155, 134, 113, 99, 85, 76, 67, 61, 55, 55,
+ 195, 202, 209, 207, 205, 184, 164, 143, 123, 108, 94, 84, 74, 67, 60,
+ 60, 224, 209, 194, 199, 203, 188, 172, 153, 133, 117, 102, 91, 80, 72,
+ 65, 65, 216, 207, 199, 186, 173, 160, 147, 132, 118, 106, 93, 84, 75,
+ 68, 62, 62, 207, 205, 203, 173, 143, 132, 121, 112, 102, 94, 85, 78, 70,
+ 65, 59, 59, 181, 184, 188, 160, 132, 120, 107, 99, 91, 84, 77, 71, 65,
+ 60, 56, 56, 155, 164, 172, 147, 121, 107, 94, 86, 79, 74, 69, 64, 60,
+ 56, 53, 53, 134, 143, 153, 132, 112, 99, 86, 79, 72, 68, 63, 59, 56, 53,
+ 50, 50, 113, 123, 133, 118, 102, 91, 79, 72, 66, 62, 57, 54, 51, 49, 47,
+ 47, 99, 108, 117, 106, 94, 84, 74, 68, 62, 58, 54, 51, 48, 46, 44, 44,
+ 85, 94, 102, 93, 85, 77, 69, 63, 57, 54, 50, 48, 46, 44, 42, 42, 76, 84,
+ 91, 84, 78, 71, 64, 59, 54, 51, 48, 46, 44, 42, 40, 40, 67, 74, 80, 75,
+ 70, 65, 60, 56, 51, 48, 46, 44, 42, 40, 39, 39, 61, 67, 72, 68, 65, 60,
+ 56, 53, 49, 46, 44, 42, 40, 39, 38, 38, 55, 60, 65, 62, 59, 56, 53, 50,
+ 47, 44, 42, 40, 39, 38, 36, 36, 55, 60, 65, 62, 59, 56, 53, 50, 47, 44,
+ 42, 40, 39, 38, 36, 36,
+ /* Size 32 */
+ 170, 185, 200, 214, 229, 225, 220, 215, 211, 198, 185, 171, 158, 147,
+ 137, 126, 116, 109, 101, 94, 87, 83, 78, 73, 69, 66, 62, 59, 56, 56, 56,
+ 56, 185, 194, 203, 212, 221, 219, 216, 213, 210, 198, 186, 174, 163,
+ 152, 142, 131, 121, 113, 106, 99, 91, 87, 82, 77, 72, 69, 65, 62, 59,
+ 59, 59, 59, 200, 203, 207, 210, 214, 213, 212, 210, 209, 199, 188, 178,
+ 167, 157, 146, 136, 126, 118, 111, 103, 96, 90, 85, 80, 75, 72, 68, 65,
+ 61, 61, 61, 61, 214, 212, 210, 208, 206, 207, 207, 208, 209, 199, 190,
+ 181, 172, 161, 151, 141, 131, 123, 115, 108, 100, 94, 89, 84, 78, 75,
+ 71, 67, 64, 64, 64, 64, 229, 221, 214, 206, 199, 201, 203, 205, 208,
+ 200, 192, 184, 176, 166, 156, 146, 136, 128, 120, 112, 104, 98, 93, 87,
+ 81, 78, 74, 70, 66, 66, 66, 66, 225, 219, 213, 207, 201, 199, 197, 194,
+ 192, 185, 178, 170, 163, 154, 145, 137, 128, 121, 114, 107, 100, 95, 89,
+ 84, 79, 75, 72, 68, 65, 65, 65, 65, 220, 216, 212, 207, 203, 197, 190,
+ 183, 177, 170, 163, 157, 150, 142, 135, 128, 120, 114, 108, 102, 95, 91,
+ 86, 81, 77, 73, 70, 67, 63, 63, 63, 63, 215, 213, 210, 208, 205, 194,
+ 183, 172, 161, 155, 149, 143, 137, 131, 125, 118, 112, 107, 102, 96, 91,
+ 87, 83, 78, 74, 71, 68, 65, 62, 62, 62, 62, 211, 210, 209, 209, 208,
+ 192, 177, 161, 146, 140, 135, 129, 124, 119, 114, 109, 105, 100, 96, 91,
+ 87, 83, 79, 75, 72, 69, 66, 63, 60, 60, 60, 60, 198, 198, 199, 199, 200,
+ 185, 170, 155, 140, 134, 128, 123, 117, 112, 108, 103, 99, 95, 91, 87,
+ 82, 79, 76, 72, 69, 66, 64, 61, 59, 59, 59, 59, 185, 186, 188, 190, 192,
+ 178, 163, 149, 135, 128, 122, 116, 110, 105, 101, 97, 93, 89, 86, 82,
+ 78, 75, 72, 69, 66, 64, 62, 59, 57, 57, 57, 57, 171, 174, 178, 181, 184,
+ 170, 157, 143, 129, 123, 116, 109, 103, 99, 95, 91, 87, 84, 81, 77, 74,
+ 72, 69, 66, 64, 62, 60, 57, 55, 55, 55, 55, 158, 163, 167, 172, 176,
+ 163, 150, 137, 124, 117, 110, 103, 96, 92, 88, 85, 81, 78, 75, 73, 70,
+ 68, 66, 63, 61, 59, 57, 55, 54, 54, 54, 54, 147, 152, 157, 161, 166,
+ 154, 142, 131, 119, 112, 105, 99, 92, 88, 85, 81, 77, 75, 72, 70, 67,
+ 65, 63, 61, 59, 57, 55, 54, 52, 52, 52, 52, 137, 142, 146, 151, 156,
+ 145, 135, 125, 114, 108, 101, 95, 88, 85, 81, 78, 74, 72, 69, 67, 64,
+ 62, 61, 59, 57, 55, 54, 52, 51, 51, 51, 51, 126, 131, 136, 141, 146,
+ 137, 128, 118, 109, 103, 97, 91, 85, 81, 78, 74, 71, 68, 66, 64, 61, 60,
+ 58, 56, 55, 53, 52, 50, 49, 49, 49, 49, 116, 121, 126, 131, 136, 128,
+ 120, 112, 105, 99, 93, 87, 81, 77, 74, 71, 67, 65, 63, 61, 59, 57, 56,
+ 54, 52, 51, 50, 49, 48, 48, 48, 48, 109, 113, 118, 123, 128, 121, 114,
+ 107, 100, 95, 89, 84, 78, 75, 72, 68, 65, 63, 61, 59, 57, 55, 54, 52,
+ 51, 50, 49, 48, 46, 46, 46, 46, 101, 106, 111, 115, 120, 114, 108, 102,
+ 96, 91, 86, 81, 75, 72, 69, 66, 63, 61, 59, 57, 55, 54, 52, 51, 49, 48,
+ 47, 46, 45, 45, 45, 45, 94, 99, 103, 108, 112, 107, 102, 96, 91, 87, 82,
+ 77, 73, 70, 67, 64, 61, 59, 57, 55, 53, 52, 51, 49, 48, 47, 46, 45, 44,
+ 44, 44, 44, 87, 91, 96, 100, 104, 100, 95, 91, 87, 82, 78, 74, 70, 67,
+ 64, 61, 59, 57, 55, 53, 51, 50, 49, 48, 47, 46, 45, 44, 43, 43, 43, 43,
+ 83, 87, 90, 94, 98, 95, 91, 87, 83, 79, 75, 72, 68, 65, 62, 60, 57, 55,
+ 54, 52, 50, 49, 48, 47, 45, 45, 44, 43, 42, 42, 42, 42, 78, 82, 85, 89,
+ 93, 89, 86, 83, 79, 76, 72, 69, 66, 63, 61, 58, 56, 54, 52, 51, 49, 48,
+ 47, 46, 44, 44, 43, 42, 41, 41, 41, 41, 73, 77, 80, 84, 87, 84, 81, 78,
+ 75, 72, 69, 66, 63, 61, 59, 56, 54, 52, 51, 49, 48, 47, 46, 45, 43, 43,
+ 42, 41, 40, 40, 40, 40, 69, 72, 75, 78, 81, 79, 77, 74, 72, 69, 66, 64,
+ 61, 59, 57, 55, 52, 51, 49, 48, 47, 45, 44, 43, 42, 42, 41, 40, 40, 40,
+ 40, 40, 66, 69, 72, 75, 78, 75, 73, 71, 69, 66, 64, 62, 59, 57, 55, 53,
+ 51, 50, 48, 47, 46, 45, 44, 43, 42, 41, 40, 40, 39, 39, 39, 39, 62, 65,
+ 68, 71, 74, 72, 70, 68, 66, 64, 62, 60, 57, 55, 54, 52, 50, 49, 47, 46,
+ 45, 44, 43, 42, 41, 40, 40, 39, 38, 38, 38, 38, 59, 62, 65, 67, 70, 68,
+ 67, 65, 63, 61, 59, 57, 55, 54, 52, 50, 49, 48, 46, 45, 44, 43, 42, 41,
+ 40, 40, 39, 38, 38, 38, 38, 38, 56, 59, 61, 64, 66, 65, 63, 62, 60, 59,
+ 57, 55, 54, 52, 51, 49, 48, 46, 45, 44, 43, 42, 41, 40, 40, 39, 38, 38,
+ 37, 37, 37, 37, 56, 59, 61, 64, 66, 65, 63, 62, 60, 59, 57, 55, 54, 52,
+ 51, 49, 48, 46, 45, 44, 43, 42, 41, 40, 40, 39, 38, 38, 37, 37, 37, 37,
+ 56, 59, 61, 64, 66, 65, 63, 62, 60, 59, 57, 55, 54, 52, 51, 49, 48, 46,
+ 45, 44, 43, 42, 41, 40, 40, 39, 38, 38, 37, 37, 37, 37, 56, 59, 61, 64,
+ 66, 65, 63, 62, 60, 59, 57, 55, 54, 52, 51, 49, 48, 46, 45, 44, 43, 42,
+ 41, 40, 40, 39, 38, 38, 37, 37, 37, 37 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 40, 37, 29, 40, 34, 32, 27, 37, 32, 25, 22, 29, 27, 22, 18,
+ /* Size 8 */
+ 64, 79, 44, 41, 39, 35, 31, 27, 79, 55, 43, 47, 46, 42, 37, 33, 44, 43,
+ 37, 38, 38, 36, 33, 30, 41, 47, 38, 34, 32, 31, 29, 27, 39, 46, 38, 32,
+ 29, 27, 25, 24, 35, 42, 36, 31, 27, 24, 23, 21, 31, 37, 33, 29, 25, 23,
+ 21, 20, 27, 33, 30, 27, 24, 21, 20, 19,
+ /* Size 16 */
+ 64, 72, 79, 62, 44, 42, 41, 40, 39, 37, 35, 33, 31, 29, 27, 27, 72, 69,
+ 67, 55, 43, 44, 44, 43, 42, 40, 38, 36, 34, 32, 30, 30, 79, 67, 55, 49,
+ 43, 45, 47, 47, 46, 44, 42, 39, 37, 35, 33, 33, 62, 55, 49, 44, 40, 41,
+ 43, 42, 42, 41, 39, 37, 35, 33, 31, 31, 44, 43, 43, 40, 37, 37, 38, 38,
+ 38, 37, 36, 35, 33, 32, 30, 30, 42, 44, 45, 41, 37, 37, 36, 35, 35, 34,
+ 33, 32, 31, 30, 28, 28, 41, 44, 47, 43, 38, 36, 34, 33, 32, 31, 31, 30,
+ 29, 28, 27, 27, 40, 43, 47, 42, 38, 35, 33, 32, 30, 29, 29, 28, 27, 26,
+ 25, 25, 39, 42, 46, 42, 38, 35, 32, 30, 29, 28, 27, 26, 25, 25, 24, 24,
+ 37, 40, 44, 41, 37, 34, 31, 29, 28, 27, 26, 25, 24, 23, 23, 23, 35, 38,
+ 42, 39, 36, 33, 31, 29, 27, 26, 24, 24, 23, 22, 21, 21, 33, 36, 39, 37,
+ 35, 32, 30, 28, 26, 25, 24, 23, 22, 21, 21, 21, 31, 34, 37, 35, 33, 31,
+ 29, 27, 25, 24, 23, 22, 21, 20, 20, 20, 29, 32, 35, 33, 32, 30, 28, 26,
+ 25, 23, 22, 21, 20, 20, 19, 19, 27, 30, 33, 31, 30, 28, 27, 25, 24, 23,
+ 21, 21, 20, 19, 19, 19, 27, 30, 33, 31, 30, 28, 27, 25, 24, 23, 21, 21,
+ 20, 19, 19, 19,
+ /* Size 32 */
+ 64, 68, 72, 75, 79, 70, 62, 53, 44, 43, 42, 42, 41, 41, 40, 39, 39, 38,
+ 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 27, 27, 27, 68, 69, 71, 72,
+ 73, 66, 58, 51, 43, 43, 43, 43, 43, 42, 42, 41, 40, 39, 38, 37, 36, 35,
+ 34, 33, 32, 31, 30, 29, 28, 28, 28, 28, 72, 71, 69, 68, 67, 61, 55, 49,
+ 43, 44, 44, 44, 44, 44, 43, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33,
+ 32, 31, 30, 30, 30, 30, 75, 72, 68, 65, 61, 57, 52, 48, 43, 44, 44, 45,
+ 46, 45, 45, 45, 44, 43, 42, 41, 40, 39, 38, 37, 35, 34, 33, 32, 31, 31,
+ 31, 31, 79, 73, 67, 61, 55, 52, 49, 46, 43, 44, 45, 46, 47, 47, 47, 46,
+ 46, 45, 44, 43, 42, 41, 39, 38, 37, 36, 35, 34, 33, 33, 33, 33, 70, 66,
+ 61, 57, 52, 49, 47, 44, 41, 42, 43, 44, 45, 45, 44, 44, 44, 43, 42, 41,
+ 40, 39, 38, 37, 36, 35, 34, 33, 32, 32, 32, 32, 62, 58, 55, 52, 49, 47,
+ 44, 42, 40, 40, 41, 42, 43, 42, 42, 42, 42, 41, 41, 40, 39, 38, 37, 36,
+ 35, 34, 33, 32, 31, 31, 31, 31, 53, 51, 49, 48, 46, 44, 42, 40, 38, 39,
+ 39, 40, 40, 40, 40, 40, 40, 39, 39, 38, 38, 37, 36, 35, 34, 33, 32, 32,
+ 31, 31, 31, 31, 44, 43, 43, 43, 43, 41, 40, 38, 37, 37, 37, 38, 38, 38,
+ 38, 38, 38, 38, 37, 37, 36, 35, 35, 34, 33, 32, 32, 31, 30, 30, 30, 30,
+ 43, 43, 44, 44, 44, 42, 40, 39, 37, 37, 37, 37, 37, 37, 37, 37, 37, 36,
+ 36, 35, 35, 34, 33, 33, 32, 31, 31, 30, 29, 29, 29, 29, 42, 43, 44, 44,
+ 45, 43, 41, 39, 37, 37, 37, 36, 36, 36, 35, 35, 35, 35, 34, 34, 33, 33,
+ 32, 32, 31, 30, 30, 29, 28, 28, 28, 28, 42, 43, 44, 45, 46, 44, 42, 40,
+ 38, 37, 36, 35, 35, 34, 34, 34, 33, 33, 33, 32, 32, 31, 31, 30, 30, 29,
+ 29, 28, 28, 28, 28, 28, 41, 43, 44, 46, 47, 45, 43, 40, 38, 37, 36, 35,
+ 34, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 27,
+ 27, 27, 41, 42, 44, 45, 47, 45, 42, 40, 38, 37, 36, 34, 33, 33, 32, 32,
+ 31, 31, 30, 30, 30, 29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 26, 40, 42,
+ 43, 45, 47, 44, 42, 40, 38, 37, 35, 34, 33, 32, 32, 31, 30, 30, 29, 29,
+ 29, 28, 28, 27, 27, 27, 26, 26, 25, 25, 25, 25, 39, 41, 43, 45, 46, 44,
+ 42, 40, 38, 37, 35, 34, 32, 32, 31, 30, 29, 29, 29, 28, 28, 27, 27, 27,
+ 26, 26, 25, 25, 25, 25, 25, 25, 39, 40, 42, 44, 46, 44, 42, 40, 38, 37,
+ 35, 33, 32, 31, 30, 29, 29, 28, 28, 27, 27, 26, 26, 26, 25, 25, 25, 24,
+ 24, 24, 24, 24, 38, 39, 41, 43, 45, 43, 41, 39, 38, 36, 35, 33, 32, 31,
+ 30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 25, 24, 24, 24, 23, 23, 23, 23,
+ 37, 38, 40, 42, 44, 42, 41, 39, 37, 36, 34, 33, 31, 30, 29, 29, 28, 27,
+ 27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 23, 23, 23, 36, 37, 39, 41,
+ 43, 41, 40, 38, 37, 35, 34, 32, 31, 30, 29, 28, 27, 27, 26, 26, 25, 25,
+ 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 35, 36, 38, 40, 42, 40, 39, 38,
+ 36, 35, 33, 32, 31, 30, 29, 28, 27, 26, 26, 25, 24, 24, 24, 23, 23, 22,
+ 22, 22, 21, 21, 21, 21, 34, 35, 37, 39, 41, 39, 38, 37, 35, 34, 33, 31,
+ 30, 29, 28, 27, 26, 26, 25, 25, 24, 24, 23, 23, 22, 22, 22, 21, 21, 21,
+ 21, 21, 33, 34, 36, 38, 39, 38, 37, 36, 35, 33, 32, 31, 30, 29, 28, 27,
+ 26, 25, 25, 24, 24, 23, 23, 22, 22, 22, 21, 21, 21, 21, 21, 21, 32, 33,
+ 35, 37, 38, 37, 36, 35, 34, 33, 32, 30, 29, 28, 27, 27, 26, 25, 24, 24,
+ 23, 23, 22, 22, 21, 21, 21, 21, 20, 20, 20, 20, 31, 32, 34, 35, 37, 36,
+ 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 25, 24, 23, 23, 22, 22, 21,
+ 21, 21, 20, 20, 20, 20, 20, 20, 30, 31, 33, 34, 36, 35, 34, 33, 32, 31,
+ 30, 29, 28, 27, 27, 26, 25, 24, 24, 23, 22, 22, 22, 21, 21, 20, 20, 20,
+ 19, 19, 19, 19, 29, 30, 32, 33, 35, 34, 33, 32, 32, 31, 30, 29, 28, 27,
+ 26, 25, 25, 24, 23, 23, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19,
+ 28, 29, 31, 32, 34, 33, 32, 32, 31, 30, 29, 28, 27, 27, 26, 25, 24, 24,
+ 23, 22, 22, 21, 21, 21, 20, 20, 19, 19, 19, 19, 19, 19, 27, 28, 30, 31,
+ 33, 32, 31, 31, 30, 29, 28, 28, 27, 26, 25, 25, 24, 23, 23, 22, 21, 21,
+ 21, 20, 20, 19, 19, 19, 19, 19, 19, 19, 27, 28, 30, 31, 33, 32, 31, 31,
+ 30, 29, 28, 28, 27, 26, 25, 25, 24, 23, 23, 22, 21, 21, 21, 20, 20, 19,
+ 19, 19, 19, 19, 19, 19, 27, 28, 30, 31, 33, 32, 31, 31, 30, 29, 28, 28,
+ 27, 26, 25, 25, 24, 23, 23, 22, 21, 21, 21, 20, 20, 19, 19, 19, 19, 19,
+ 19, 19, 27, 28, 30, 31, 33, 32, 31, 31, 30, 29, 28, 28, 27, 26, 25, 25,
+ 24, 23, 23, 22, 21, 21, 21, 20, 20, 19, 19, 19, 19, 19, 19, 19 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 152, 94, 86, 67, 94, 77, 72, 62, 86, 72, 55, 48, 67, 62, 48, 40,
+ /* Size 8 */
+ 141, 176, 95, 89, 83, 74, 65, 57, 176, 120, 93, 103, 100, 90, 80, 70,
+ 95, 93, 79, 81, 82, 77, 71, 64, 89, 103, 81, 71, 68, 65, 61, 56, 83,
+ 100, 82, 68, 60, 56, 53, 49, 74, 90, 77, 65, 56, 50, 47, 44, 65, 80, 71,
+ 61, 53, 47, 43, 40, 57, 70, 64, 56, 49, 44, 40, 37,
+ /* Size 16 */
+ 145, 163, 181, 139, 97, 94, 91, 88, 85, 81, 76, 71, 66, 62, 58, 58, 163,
+ 158, 152, 124, 96, 98, 99, 96, 94, 89, 84, 79, 74, 70, 65, 65, 181, 152,
+ 124, 110, 95, 101, 106, 104, 103, 98, 93, 87, 82, 77, 72, 72, 139, 124,
+ 110, 99, 88, 91, 95, 94, 93, 90, 86, 82, 77, 73, 69, 69, 97, 96, 95, 88,
+ 81, 82, 84, 84, 84, 82, 80, 76, 73, 69, 65, 65, 94, 98, 101, 91, 82, 80,
+ 79, 78, 77, 75, 73, 70, 68, 65, 62, 62, 91, 99, 106, 95, 84, 79, 74, 72,
+ 70, 68, 67, 65, 63, 60, 58, 58, 88, 96, 104, 94, 84, 78, 72, 69, 66, 64,
+ 62, 60, 58, 56, 54, 54, 85, 94, 103, 93, 84, 77, 70, 66, 62, 60, 58, 56,
+ 54, 52, 51, 51, 81, 89, 98, 90, 82, 75, 68, 64, 60, 57, 55, 53, 51, 50,
+ 48, 48, 76, 84, 93, 86, 80, 73, 67, 62, 58, 55, 52, 50, 48, 47, 45, 45,
+ 71, 79, 87, 82, 76, 70, 65, 60, 56, 53, 50, 48, 46, 45, 43, 43, 66, 74,
+ 82, 77, 73, 68, 63, 58, 54, 51, 48, 46, 44, 43, 41, 41, 62, 70, 77, 73,
+ 69, 65, 60, 56, 52, 50, 47, 45, 43, 41, 40, 40, 58, 65, 72, 69, 65, 62,
+ 58, 54, 51, 48, 45, 43, 41, 40, 38, 38, 58, 65, 72, 69, 65, 62, 58, 54,
+ 51, 48, 45, 43, 41, 40, 38, 38,
+ /* Size 32 */
+ 147, 156, 165, 175, 184, 163, 141, 120, 99, 97, 96, 94, 93, 91, 90, 88,
+ 87, 84, 82, 79, 77, 75, 72, 70, 67, 65, 63, 61, 59, 59, 59, 59, 156,
+ 160, 163, 166, 169, 151, 134, 116, 98, 98, 97, 97, 97, 95, 94, 92, 91,
+ 89, 86, 84, 81, 79, 76, 74, 71, 69, 67, 65, 62, 62, 62, 62, 165, 163,
+ 160, 157, 155, 140, 126, 112, 98, 98, 99, 100, 100, 99, 98, 97, 95, 93,
+ 91, 88, 86, 83, 80, 78, 75, 73, 71, 68, 66, 66, 66, 66, 175, 166, 157,
+ 149, 140, 129, 119, 108, 97, 99, 101, 102, 104, 103, 102, 101, 100, 97,
+ 95, 93, 90, 87, 85, 82, 79, 77, 74, 72, 69, 69, 69, 69, 184, 169, 155,
+ 140, 126, 118, 111, 104, 97, 100, 102, 105, 108, 107, 106, 105, 104,
+ 102, 99, 97, 94, 92, 89, 86, 83, 81, 78, 75, 73, 73, 73, 73, 163, 151,
+ 140, 129, 118, 112, 106, 99, 93, 95, 98, 100, 102, 101, 101, 100, 100,
+ 97, 95, 93, 91, 88, 86, 83, 81, 78, 76, 74, 71, 71, 71, 71, 141, 134,
+ 126, 119, 111, 106, 100, 95, 89, 91, 93, 95, 96, 96, 96, 95, 95, 93, 91,
+ 89, 88, 85, 83, 81, 79, 76, 74, 72, 70, 70, 70, 70, 120, 116, 112, 108,
+ 104, 99, 95, 90, 86, 87, 88, 89, 91, 91, 90, 90, 90, 89, 87, 86, 84, 82,
+ 80, 78, 76, 74, 72, 70, 68, 68, 68, 68, 99, 98, 98, 97, 97, 93, 89, 86,
+ 82, 83, 84, 84, 85, 85, 85, 85, 85, 84, 83, 82, 81, 79, 77, 76, 74, 72,
+ 70, 68, 66, 66, 66, 66, 97, 98, 98, 99, 100, 95, 91, 87, 83, 83, 83, 83,
+ 82, 82, 82, 82, 82, 81, 80, 79, 78, 76, 74, 73, 71, 70, 68, 66, 64, 64,
+ 64, 64, 96, 97, 99, 101, 102, 98, 93, 88, 84, 83, 82, 81, 80, 79, 79,
+ 79, 78, 77, 76, 75, 74, 73, 71, 70, 69, 67, 66, 64, 63, 63, 63, 63, 94,
+ 97, 100, 102, 105, 100, 95, 89, 84, 83, 81, 79, 77, 77, 76, 75, 75, 74,
+ 73, 72, 71, 70, 69, 67, 66, 65, 63, 62, 61, 61, 61, 61, 93, 97, 100,
+ 104, 108, 102, 96, 91, 85, 82, 80, 77, 75, 74, 73, 72, 71, 70, 69, 68,
+ 68, 67, 66, 65, 64, 62, 61, 60, 59, 59, 59, 59, 91, 95, 99, 103, 107,
+ 101, 96, 91, 85, 82, 79, 77, 74, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63,
+ 62, 61, 60, 59, 58, 57, 57, 57, 57, 90, 94, 98, 102, 106, 101, 96, 90,
+ 85, 82, 79, 76, 73, 71, 70, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58,
+ 57, 56, 55, 55, 55, 55, 88, 92, 97, 101, 105, 100, 95, 90, 85, 82, 79,
+ 75, 72, 70, 68, 67, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53,
+ 53, 53, 53, 87, 91, 95, 100, 104, 100, 95, 90, 85, 82, 78, 75, 71, 69,
+ 67, 65, 63, 62, 61, 60, 58, 58, 57, 56, 55, 54, 53, 52, 52, 52, 52, 52,
+ 84, 89, 93, 97, 102, 97, 93, 89, 84, 81, 77, 74, 70, 68, 66, 64, 62, 61,
+ 59, 58, 57, 56, 55, 54, 53, 53, 52, 51, 50, 50, 50, 50, 82, 86, 91, 95,
+ 99, 95, 91, 87, 83, 80, 76, 73, 69, 67, 65, 63, 61, 59, 58, 57, 56, 55,
+ 54, 53, 52, 51, 50, 50, 49, 49, 49, 49, 79, 84, 88, 93, 97, 93, 89, 86,
+ 82, 79, 75, 72, 68, 66, 64, 62, 60, 58, 57, 56, 54, 53, 52, 51, 50, 50,
+ 49, 48, 47, 47, 47, 47, 77, 81, 86, 90, 94, 91, 88, 84, 81, 78, 74, 71,
+ 68, 65, 63, 61, 58, 57, 56, 54, 53, 52, 51, 50, 49, 48, 47, 47, 46, 46,
+ 46, 46, 75, 79, 83, 87, 92, 88, 85, 82, 79, 76, 73, 70, 67, 64, 62, 60,
+ 58, 56, 55, 53, 52, 51, 50, 49, 48, 47, 46, 46, 45, 45, 45, 45, 72, 76,
+ 80, 85, 89, 86, 83, 80, 77, 74, 71, 69, 66, 63, 61, 59, 57, 55, 54, 52,
+ 51, 50, 49, 48, 47, 46, 45, 45, 44, 44, 44, 44, 70, 74, 78, 82, 86, 83,
+ 81, 78, 76, 73, 70, 67, 65, 62, 60, 58, 56, 54, 53, 51, 50, 49, 48, 47,
+ 46, 45, 44, 44, 43, 43, 43, 43, 67, 71, 75, 79, 83, 81, 79, 76, 74, 71,
+ 69, 66, 64, 61, 59, 57, 55, 53, 52, 50, 49, 48, 47, 46, 45, 44, 43, 43,
+ 42, 42, 42, 42, 65, 69, 73, 77, 81, 78, 76, 74, 72, 70, 67, 65, 62, 60,
+ 58, 56, 54, 53, 51, 50, 48, 47, 46, 45, 44, 43, 43, 42, 41, 41, 41, 41,
+ 63, 67, 71, 74, 78, 76, 74, 72, 70, 68, 66, 63, 61, 59, 57, 55, 53, 52,
+ 50, 49, 47, 46, 45, 44, 43, 43, 42, 41, 40, 40, 40, 40, 61, 65, 68, 72,
+ 75, 74, 72, 70, 68, 66, 64, 62, 60, 58, 56, 54, 52, 51, 50, 48, 47, 46,
+ 45, 44, 43, 42, 41, 40, 40, 40, 40, 40, 59, 62, 66, 69, 73, 71, 70, 68,
+ 66, 64, 63, 61, 59, 57, 55, 53, 52, 50, 49, 47, 46, 45, 44, 43, 42, 41,
+ 40, 40, 39, 39, 39, 39, 59, 62, 66, 69, 73, 71, 70, 68, 66, 64, 63, 61,
+ 59, 57, 55, 53, 52, 50, 49, 47, 46, 45, 44, 43, 42, 41, 40, 40, 39, 39,
+ 39, 39, 59, 62, 66, 69, 73, 71, 70, 68, 66, 64, 63, 61, 59, 57, 55, 53,
+ 52, 50, 49, 47, 46, 45, 44, 43, 42, 41, 40, 40, 39, 39, 39, 39, 59, 62,
+ 66, 69, 73, 71, 70, 68, 66, 64, 63, 61, 59, 57, 55, 53, 52, 50, 49, 47,
+ 46, 45, 44, 43, 42, 41, 40, 40, 39, 39, 39, 39 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 59, 37, 24, 59, 40, 29, 23, 37, 29, 22, 19, 24, 23, 19, 16,
+ /* Size 8 */
+ 64, 84, 78, 60, 45, 36, 29, 25, 84, 74, 77, 66, 52, 41, 34, 28, 78, 77,
+ 56, 48, 42, 36, 30, 27, 60, 66, 48, 39, 34, 30, 27, 24, 45, 52, 42, 34,
+ 29, 26, 24, 22, 36, 41, 36, 30, 26, 23, 22, 21, 29, 34, 30, 27, 24, 22,
+ 20, 19, 25, 28, 27, 24, 22, 21, 19, 19,
+ /* Size 16 */
+ 64, 74, 84, 81, 78, 69, 60, 53, 45, 41, 36, 33, 29, 27, 25, 25, 74, 77,
+ 79, 78, 77, 70, 63, 56, 49, 44, 39, 35, 32, 29, 27, 27, 84, 79, 74, 75,
+ 77, 71, 66, 59, 52, 47, 41, 38, 34, 31, 28, 28, 81, 78, 75, 71, 66, 62,
+ 57, 52, 47, 43, 38, 35, 32, 30, 28, 28, 78, 77, 77, 66, 56, 52, 48, 45,
+ 42, 39, 36, 33, 30, 29, 27, 27, 69, 70, 71, 62, 52, 48, 43, 40, 38, 35,
+ 33, 31, 29, 27, 25, 25, 60, 63, 66, 57, 48, 43, 39, 36, 34, 32, 30, 28,
+ 27, 26, 24, 24, 53, 56, 59, 52, 45, 40, 36, 34, 31, 30, 28, 27, 25, 24,
+ 23, 23, 45, 49, 52, 47, 42, 38, 34, 31, 29, 27, 26, 25, 24, 23, 22, 22,
+ 41, 44, 47, 43, 39, 35, 32, 30, 27, 26, 25, 24, 23, 22, 21, 21, 36, 39,
+ 41, 38, 36, 33, 30, 28, 26, 25, 23, 23, 22, 21, 21, 21, 33, 35, 38, 35,
+ 33, 31, 28, 27, 25, 24, 23, 22, 21, 21, 20, 20, 29, 32, 34, 32, 30, 29,
+ 27, 25, 24, 23, 22, 21, 20, 20, 19, 19, 27, 29, 31, 30, 29, 27, 26, 24,
+ 23, 22, 21, 21, 20, 20, 19, 19, 25, 27, 28, 28, 27, 25, 24, 23, 22, 21,
+ 21, 20, 19, 19, 19, 19, 25, 27, 28, 28, 27, 25, 24, 23, 22, 21, 21, 20,
+ 19, 19, 19, 19,
+ /* Size 32 */
+ 64, 69, 74, 79, 84, 83, 81, 80, 78, 73, 69, 64, 60, 56, 53, 49, 45, 43,
+ 41, 38, 36, 34, 33, 31, 29, 28, 27, 26, 25, 25, 25, 25, 69, 72, 75, 78,
+ 82, 81, 80, 79, 78, 74, 70, 66, 61, 58, 54, 51, 47, 45, 42, 40, 37, 35,
+ 34, 32, 30, 29, 28, 27, 26, 26, 26, 26, 74, 75, 77, 78, 79, 79, 78, 78,
+ 77, 74, 70, 67, 63, 59, 56, 52, 49, 46, 44, 41, 39, 37, 35, 33, 32, 30,
+ 29, 28, 27, 27, 27, 27, 79, 78, 78, 77, 76, 77, 77, 77, 77, 74, 71, 68,
+ 65, 61, 58, 54, 51, 48, 45, 43, 40, 38, 36, 35, 33, 31, 30, 29, 28, 28,
+ 28, 28, 84, 82, 79, 76, 74, 75, 75, 76, 77, 74, 71, 69, 66, 63, 59, 56,
+ 52, 50, 47, 44, 41, 40, 38, 36, 34, 32, 31, 30, 28, 28, 28, 28, 83, 81,
+ 79, 77, 75, 74, 73, 72, 72, 69, 67, 64, 62, 59, 56, 53, 50, 47, 45, 42,
+ 40, 38, 36, 35, 33, 32, 30, 29, 28, 28, 28, 28, 81, 80, 78, 77, 75, 73,
+ 71, 69, 66, 64, 62, 59, 57, 55, 52, 50, 47, 45, 43, 41, 38, 37, 35, 34,
+ 32, 31, 30, 29, 28, 28, 28, 28, 80, 79, 78, 77, 76, 72, 69, 65, 61, 59,
+ 57, 55, 53, 51, 48, 46, 44, 43, 41, 39, 37, 36, 34, 33, 31, 30, 29, 28,
+ 27, 27, 27, 27, 78, 78, 77, 77, 77, 72, 66, 61, 56, 54, 52, 50, 48, 47,
+ 45, 43, 42, 40, 39, 37, 36, 34, 33, 32, 30, 29, 29, 28, 27, 27, 27, 27,
+ 73, 74, 74, 74, 74, 69, 64, 59, 54, 52, 50, 48, 46, 44, 43, 41, 40, 38,
+ 37, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 26, 26, 26, 69, 70, 70, 71,
+ 71, 67, 62, 57, 52, 50, 48, 46, 43, 42, 40, 39, 38, 36, 35, 34, 33, 32,
+ 31, 30, 29, 28, 27, 26, 25, 25, 25, 25, 64, 66, 67, 68, 69, 64, 59, 55,
+ 50, 48, 46, 43, 41, 40, 38, 37, 36, 35, 33, 32, 31, 30, 30, 29, 28, 27,
+ 26, 26, 25, 25, 25, 25, 60, 61, 63, 65, 66, 62, 57, 53, 48, 46, 43, 41,
+ 39, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 28, 27, 26, 26, 25, 24, 24,
+ 24, 24, 56, 58, 59, 61, 63, 59, 55, 51, 47, 44, 42, 40, 37, 36, 35, 34,
+ 32, 32, 31, 30, 29, 28, 27, 27, 26, 25, 25, 24, 24, 24, 24, 24, 53, 54,
+ 56, 58, 59, 56, 52, 48, 45, 43, 40, 38, 36, 35, 34, 32, 31, 30, 30, 29,
+ 28, 27, 27, 26, 25, 25, 24, 24, 23, 23, 23, 23, 49, 51, 52, 54, 56, 53,
+ 50, 46, 43, 41, 39, 37, 35, 34, 32, 31, 30, 29, 28, 28, 27, 26, 26, 25,
+ 25, 24, 24, 23, 23, 23, 23, 23, 45, 47, 49, 51, 52, 50, 47, 44, 42, 40,
+ 38, 36, 34, 32, 31, 30, 29, 28, 27, 27, 26, 25, 25, 24, 24, 23, 23, 23,
+ 22, 22, 22, 22, 43, 45, 46, 48, 50, 47, 45, 43, 40, 38, 36, 35, 33, 32,
+ 30, 29, 28, 27, 27, 26, 25, 25, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22,
+ 41, 42, 44, 45, 47, 45, 43, 41, 39, 37, 35, 33, 32, 31, 30, 28, 27, 27,
+ 26, 25, 25, 24, 24, 23, 23, 22, 22, 22, 21, 21, 21, 21, 38, 40, 41, 43,
+ 44, 42, 41, 39, 37, 35, 34, 32, 31, 30, 29, 28, 27, 26, 25, 25, 24, 24,
+ 23, 23, 22, 22, 22, 21, 21, 21, 21, 21, 36, 37, 39, 40, 41, 40, 38, 37,
+ 36, 34, 33, 31, 30, 29, 28, 27, 26, 25, 25, 24, 23, 23, 23, 22, 22, 22,
+ 21, 21, 21, 21, 21, 21, 34, 35, 37, 38, 40, 38, 37, 36, 34, 33, 32, 30,
+ 29, 28, 27, 26, 25, 25, 24, 24, 23, 23, 22, 22, 21, 21, 21, 21, 20, 20,
+ 20, 20, 33, 34, 35, 36, 38, 36, 35, 34, 33, 32, 31, 30, 28, 27, 27, 26,
+ 25, 24, 24, 23, 23, 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 20, 31, 32,
+ 33, 35, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 24, 23, 23,
+ 22, 22, 22, 21, 21, 21, 20, 20, 20, 20, 20, 20, 29, 30, 32, 33, 34, 33,
+ 32, 31, 30, 30, 29, 28, 27, 26, 25, 25, 24, 23, 23, 22, 22, 21, 21, 21,
+ 20, 20, 20, 20, 19, 19, 19, 19, 28, 29, 30, 31, 32, 32, 31, 30, 29, 29,
+ 28, 27, 26, 25, 25, 24, 23, 23, 22, 22, 22, 21, 21, 21, 20, 20, 20, 19,
+ 19, 19, 19, 19, 27, 28, 29, 30, 31, 30, 30, 29, 29, 28, 27, 26, 26, 25,
+ 24, 24, 23, 23, 22, 22, 21, 21, 21, 20, 20, 20, 20, 19, 19, 19, 19, 19,
+ 26, 27, 28, 29, 30, 29, 29, 28, 28, 27, 26, 26, 25, 24, 24, 23, 23, 22,
+ 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 25, 26, 27, 28,
+ 28, 28, 28, 27, 27, 26, 25, 25, 24, 24, 23, 23, 22, 22, 21, 21, 21, 20,
+ 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 25, 26, 27, 28, 28, 28, 28, 27,
+ 27, 26, 25, 25, 24, 24, 23, 23, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19,
+ 19, 19, 19, 19, 19, 19, 25, 26, 27, 28, 28, 28, 28, 27, 27, 26, 25, 25,
+ 24, 24, 23, 23, 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19,
+ 19, 19, 25, 26, 27, 28, 28, 28, 28, 27, 27, 26, 25, 25, 24, 24, 23, 23,
+ 22, 22, 21, 21, 21, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 171, 157, 95, 62, 157, 104, 75, 57, 95, 75, 54, 45, 62, 57, 45, 39,
+ /* Size 8 */
+ 144, 192, 177, 135, 101, 78, 63, 53, 192, 167, 174, 149, 117, 91, 73,
+ 61, 177, 174, 125, 107, 92, 77, 65, 56, 135, 149, 107, 84, 73, 64, 57,
+ 51, 101, 117, 92, 73, 62, 55, 50, 46, 78, 91, 77, 64, 55, 49, 45, 42,
+ 63, 73, 65, 57, 50, 45, 42, 39, 53, 61, 56, 51, 46, 42, 39, 38,
+ /* Size 16 */
+ 149, 174, 199, 191, 184, 162, 140, 122, 104, 92, 81, 73, 65, 60, 55, 55,
+ 174, 180, 186, 184, 182, 165, 147, 130, 113, 100, 88, 79, 70, 65, 59,
+ 59, 199, 186, 173, 177, 181, 168, 154, 138, 121, 108, 95, 85, 76, 69,
+ 63, 63, 191, 184, 177, 166, 155, 144, 133, 120, 108, 98, 87, 80, 72, 66,
+ 61, 61, 184, 182, 181, 155, 129, 120, 111, 103, 95, 88, 80, 74, 68, 63,
+ 58, 58, 162, 165, 168, 144, 120, 110, 99, 92, 85, 79, 73, 68, 63, 59,
+ 55, 55, 140, 147, 154, 133, 111, 99, 88, 81, 75, 71, 66, 63, 59, 56, 53,
+ 53, 122, 130, 138, 120, 103, 92, 81, 75, 70, 66, 62, 58, 55, 53, 50, 50,
+ 104, 113, 121, 108, 95, 85, 75, 70, 64, 60, 57, 54, 52, 50, 48, 48, 92,
+ 100, 108, 98, 88, 79, 71, 66, 60, 57, 54, 51, 49, 47, 46, 46, 81, 88,
+ 95, 87, 80, 73, 66, 62, 57, 54, 51, 49, 47, 45, 44, 44, 73, 79, 85, 80,
+ 74, 68, 63, 58, 54, 51, 49, 47, 45, 44, 42, 42, 65, 70, 76, 72, 68, 63,
+ 59, 55, 52, 49, 47, 45, 43, 42, 41, 41, 60, 65, 69, 66, 63, 59, 56, 53,
+ 50, 47, 45, 44, 42, 41, 40, 40, 55, 59, 63, 61, 58, 55, 53, 50, 48, 46,
+ 44, 42, 41, 40, 39, 39, 55, 59, 63, 61, 58, 55, 53, 50, 48, 46, 44, 42,
+ 41, 40, 39, 39,
+ /* Size 32 */
+ 152, 165, 177, 190, 202, 198, 195, 191, 187, 176, 165, 153, 142, 133,
+ 124, 115, 106, 100, 94, 88, 82, 78, 74, 70, 66, 64, 61, 58, 56, 56, 56,
+ 56, 165, 173, 180, 188, 196, 193, 191, 189, 186, 176, 166, 156, 146,
+ 137, 128, 119, 111, 104, 98, 92, 86, 82, 77, 73, 69, 66, 63, 61, 58, 58,
+ 58, 58, 177, 180, 183, 186, 189, 188, 187, 187, 186, 177, 168, 159, 150,
+ 141, 132, 124, 115, 108, 102, 96, 89, 85, 80, 76, 72, 69, 66, 63, 60,
+ 60, 60, 60, 190, 188, 186, 185, 183, 183, 184, 184, 185, 177, 169, 161,
+ 154, 145, 136, 128, 119, 112, 106, 99, 93, 88, 84, 79, 74, 71, 68, 65,
+ 62, 62, 62, 62, 202, 196, 189, 183, 176, 178, 180, 182, 184, 177, 171,
+ 164, 157, 149, 140, 132, 123, 117, 110, 103, 96, 92, 87, 82, 77, 74, 71,
+ 67, 64, 64, 64, 64, 198, 193, 188, 183, 178, 177, 175, 173, 171, 165,
+ 159, 152, 146, 139, 131, 124, 117, 111, 105, 99, 93, 88, 84, 80, 75, 72,
+ 69, 66, 63, 63, 63, 63, 195, 191, 187, 184, 180, 175, 169, 164, 158,
+ 152, 147, 141, 135, 129, 123, 116, 110, 105, 99, 94, 89, 85, 81, 77, 73,
+ 70, 67, 65, 62, 62, 62, 62, 191, 189, 187, 184, 182, 173, 164, 154, 145,
+ 140, 134, 129, 124, 119, 114, 109, 103, 99, 94, 90, 85, 82, 78, 75, 71,
+ 68, 66, 63, 61, 61, 61, 61, 187, 186, 186, 185, 184, 171, 158, 145, 132,
+ 127, 122, 118, 113, 109, 105, 101, 97, 93, 89, 85, 82, 78, 75, 72, 69,
+ 67, 64, 62, 59, 59, 59, 59, 176, 176, 177, 177, 177, 165, 152, 140, 127,
+ 122, 117, 112, 107, 103, 99, 96, 92, 88, 85, 81, 78, 75, 72, 70, 67, 64,
+ 62, 60, 58, 58, 58, 58, 165, 166, 168, 169, 171, 159, 147, 134, 122,
+ 117, 112, 106, 101, 97, 94, 90, 87, 84, 81, 78, 75, 72, 69, 67, 64, 62,
+ 60, 58, 56, 56, 56, 56, 153, 156, 159, 161, 164, 152, 141, 129, 118,
+ 112, 106, 101, 95, 92, 88, 85, 82, 79, 76, 74, 71, 69, 67, 64, 62, 60,
+ 59, 57, 55, 55, 55, 55, 142, 146, 150, 154, 157, 146, 135, 124, 113,
+ 107, 101, 95, 89, 86, 83, 80, 77, 74, 72, 70, 68, 66, 64, 62, 60, 58,
+ 57, 55, 54, 54, 54, 54, 133, 137, 141, 145, 149, 139, 129, 119, 109,
+ 103, 97, 92, 86, 83, 80, 77, 74, 72, 69, 67, 65, 63, 62, 60, 58, 57, 55,
+ 54, 52, 52, 52, 52, 124, 128, 132, 136, 140, 131, 123, 114, 105, 99, 94,
+ 88, 83, 80, 77, 74, 71, 69, 67, 65, 63, 61, 59, 58, 56, 55, 54, 52, 51,
+ 51, 51, 51, 115, 119, 124, 128, 132, 124, 116, 109, 101, 96, 90, 85, 80,
+ 77, 74, 71, 68, 66, 64, 62, 60, 59, 57, 56, 54, 53, 52, 51, 50, 50, 50,
+ 50, 106, 111, 115, 119, 123, 117, 110, 103, 97, 92, 87, 82, 77, 74, 71,
+ 68, 65, 63, 61, 60, 58, 57, 55, 54, 53, 52, 51, 50, 48, 48, 48, 48, 100,
+ 104, 108, 112, 117, 111, 105, 99, 93, 88, 84, 79, 74, 72, 69, 66, 63,
+ 61, 60, 58, 56, 55, 54, 53, 51, 50, 49, 48, 47, 47, 47, 47, 94, 98, 102,
+ 106, 110, 105, 99, 94, 89, 85, 81, 76, 72, 69, 67, 64, 61, 60, 58, 56,
+ 55, 54, 52, 51, 50, 49, 48, 47, 46, 46, 46, 46, 88, 92, 96, 99, 103, 99,
+ 94, 90, 85, 81, 78, 74, 70, 67, 65, 62, 60, 58, 56, 55, 53, 52, 51, 50,
+ 49, 48, 47, 46, 46, 46, 46, 46, 82, 86, 89, 93, 96, 93, 89, 85, 82, 78,
+ 75, 71, 68, 65, 63, 60, 58, 56, 55, 53, 52, 51, 50, 49, 48, 47, 46, 45,
+ 45, 45, 45, 45, 78, 82, 85, 88, 92, 88, 85, 82, 78, 75, 72, 69, 66, 63,
+ 61, 59, 57, 55, 54, 52, 51, 50, 49, 48, 47, 46, 45, 45, 44, 44, 44, 44,
+ 74, 77, 80, 84, 87, 84, 81, 78, 75, 72, 69, 67, 64, 62, 59, 57, 55, 54,
+ 52, 51, 50, 49, 48, 47, 46, 45, 44, 44, 43, 43, 43, 43, 70, 73, 76, 79,
+ 82, 80, 77, 75, 72, 70, 67, 64, 62, 60, 58, 56, 54, 53, 51, 50, 49, 48,
+ 47, 46, 45, 44, 44, 43, 42, 42, 42, 42, 66, 69, 72, 74, 77, 75, 73, 71,
+ 69, 67, 64, 62, 60, 58, 56, 54, 53, 51, 50, 49, 48, 47, 46, 45, 44, 44,
+ 43, 42, 42, 42, 42, 42, 64, 66, 69, 71, 74, 72, 70, 68, 67, 64, 62, 60,
+ 58, 57, 55, 53, 52, 50, 49, 48, 47, 46, 45, 44, 44, 43, 42, 42, 41, 41,
+ 41, 41, 61, 63, 66, 68, 71, 69, 67, 66, 64, 62, 60, 59, 57, 55, 54, 52,
+ 51, 49, 48, 47, 46, 45, 44, 44, 43, 42, 42, 41, 41, 41, 41, 41, 58, 61,
+ 63, 65, 67, 66, 65, 63, 62, 60, 58, 57, 55, 54, 52, 51, 50, 48, 47, 46,
+ 45, 45, 44, 43, 42, 42, 41, 41, 40, 40, 40, 40, 56, 58, 60, 62, 64, 63,
+ 62, 61, 59, 58, 56, 55, 54, 52, 51, 50, 48, 47, 46, 46, 45, 44, 43, 42,
+ 42, 41, 41, 40, 40, 40, 40, 40, 56, 58, 60, 62, 64, 63, 62, 61, 59, 58,
+ 56, 55, 54, 52, 51, 50, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 41, 40,
+ 40, 40, 40, 40, 56, 58, 60, 62, 64, 63, 62, 61, 59, 58, 56, 55, 54, 52,
+ 51, 50, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 41, 40, 40, 40, 40, 40,
+ 56, 58, 60, 62, 64, 63, 62, 61, 59, 58, 56, 55, 54, 52, 51, 50, 48, 47,
+ 46, 46, 45, 44, 43, 42, 42, 41, 41, 40, 40, 40, 40, 40 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 41, 38, 31, 41, 35, 33, 29, 38, 33, 26, 23, 31, 29, 23, 20,
+ /* Size 8 */
+ 64, 79, 45, 42, 40, 36, 32, 29, 79, 55, 44, 48, 47, 43, 38, 34, 45, 44,
+ 38, 39, 39, 37, 35, 32, 42, 48, 39, 35, 33, 32, 31, 29, 40, 47, 39, 33,
+ 30, 28, 27, 26, 36, 43, 37, 32, 28, 26, 25, 23, 32, 38, 35, 31, 27, 25,
+ 23, 22, 29, 34, 32, 29, 26, 23, 22, 21,
+ /* Size 16 */
+ 64, 71, 79, 62, 45, 43, 42, 41, 40, 38, 36, 34, 32, 30, 29, 29, 71, 69,
+ 67, 56, 44, 45, 45, 44, 43, 41, 39, 37, 35, 33, 31, 31, 79, 67, 55, 50,
+ 44, 46, 48, 47, 47, 45, 43, 41, 38, 36, 34, 34, 62, 56, 50, 45, 41, 42,
+ 44, 43, 43, 42, 40, 38, 37, 35, 33, 33, 45, 44, 44, 41, 38, 39, 39, 39,
+ 39, 38, 37, 36, 35, 33, 32, 32, 43, 45, 46, 42, 39, 38, 37, 37, 36, 36,
+ 35, 34, 33, 31, 30, 30, 42, 45, 48, 44, 39, 37, 35, 34, 33, 33, 32, 31,
+ 31, 30, 29, 29, 41, 44, 47, 43, 39, 37, 34, 33, 32, 31, 30, 30, 29, 28,
+ 27, 27, 40, 43, 47, 43, 39, 36, 33, 32, 30, 29, 28, 28, 27, 26, 26, 26,
+ 38, 41, 45, 42, 38, 36, 33, 31, 29, 28, 27, 27, 26, 25, 25, 25, 36, 39,
+ 43, 40, 37, 35, 32, 30, 28, 27, 26, 25, 25, 24, 23, 23, 34, 37, 41, 38,
+ 36, 34, 31, 30, 28, 27, 25, 25, 24, 23, 23, 23, 32, 35, 38, 37, 35, 33,
+ 31, 29, 27, 26, 25, 24, 23, 22, 22, 22, 30, 33, 36, 35, 33, 31, 30, 28,
+ 26, 25, 24, 23, 22, 22, 21, 21, 29, 31, 34, 33, 32, 30, 29, 27, 26, 25,
+ 23, 23, 22, 21, 21, 21, 29, 31, 34, 33, 32, 30, 29, 27, 26, 25, 23, 23,
+ 22, 21, 21, 21,
+ /* Size 32 */
+ 64, 68, 71, 75, 79, 70, 62, 53, 45, 44, 43, 43, 42, 42, 41, 40, 40, 39,
+ 38, 37, 36, 35, 34, 33, 32, 31, 30, 30, 29, 29, 29, 29, 68, 69, 70, 72,
+ 73, 66, 59, 52, 44, 44, 44, 44, 44, 43, 43, 42, 42, 41, 40, 39, 38, 37,
+ 36, 35, 34, 33, 32, 31, 30, 30, 30, 30, 71, 70, 69, 68, 67, 61, 56, 50,
+ 44, 44, 45, 45, 45, 45, 44, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34,
+ 33, 32, 31, 31, 31, 31, 75, 72, 68, 65, 61, 57, 53, 48, 44, 45, 45, 46,
+ 47, 46, 46, 45, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 33,
+ 33, 33, 79, 73, 67, 61, 55, 52, 50, 47, 44, 45, 46, 47, 48, 48, 47, 47,
+ 47, 46, 45, 44, 43, 42, 41, 40, 38, 37, 36, 35, 34, 34, 34, 34, 70, 66,
+ 61, 57, 52, 50, 47, 45, 42, 43, 44, 45, 46, 46, 45, 45, 45, 44, 43, 42,
+ 42, 41, 39, 38, 37, 36, 36, 35, 34, 34, 34, 34, 62, 59, 56, 53, 50, 47,
+ 45, 43, 41, 42, 42, 43, 44, 43, 43, 43, 43, 42, 42, 41, 40, 39, 38, 37,
+ 37, 36, 35, 34, 33, 33, 33, 33, 53, 52, 50, 48, 47, 45, 43, 41, 39, 40,
+ 40, 41, 41, 41, 41, 41, 41, 41, 40, 39, 39, 38, 37, 36, 36, 35, 34, 33,
+ 32, 32, 32, 32, 45, 44, 44, 44, 44, 42, 41, 39, 38, 38, 39, 39, 39, 39,
+ 39, 39, 39, 39, 38, 38, 37, 37, 36, 35, 35, 34, 33, 32, 32, 32, 32, 32,
+ 44, 44, 44, 45, 45, 43, 42, 40, 38, 38, 38, 38, 38, 38, 38, 38, 38, 37,
+ 37, 37, 36, 36, 35, 34, 34, 33, 32, 32, 31, 31, 31, 31, 43, 44, 45, 45,
+ 46, 44, 42, 40, 39, 38, 38, 37, 37, 37, 37, 37, 36, 36, 36, 35, 35, 34,
+ 34, 33, 33, 32, 31, 31, 30, 30, 30, 30, 43, 44, 45, 46, 47, 45, 43, 41,
+ 39, 38, 37, 37, 36, 36, 35, 35, 35, 35, 34, 34, 34, 33, 33, 32, 32, 31,
+ 30, 30, 29, 29, 29, 29, 42, 44, 45, 47, 48, 46, 44, 41, 39, 38, 37, 36,
+ 35, 35, 34, 34, 33, 33, 33, 33, 32, 32, 31, 31, 31, 30, 30, 29, 29, 29,
+ 29, 29, 42, 43, 45, 46, 48, 46, 43, 41, 39, 38, 37, 36, 35, 34, 34, 33,
+ 33, 32, 32, 32, 31, 31, 30, 30, 30, 29, 29, 28, 28, 28, 28, 28, 41, 43,
+ 44, 46, 47, 45, 43, 41, 39, 38, 37, 35, 34, 34, 33, 32, 32, 31, 31, 31,
+ 30, 30, 30, 29, 29, 28, 28, 28, 27, 27, 27, 27, 40, 42, 44, 45, 47, 45,
+ 43, 41, 39, 38, 37, 35, 34, 33, 32, 32, 31, 31, 30, 30, 29, 29, 29, 28,
+ 28, 28, 27, 27, 26, 26, 26, 26, 40, 42, 43, 45, 47, 45, 43, 41, 39, 38,
+ 36, 35, 33, 33, 32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 27, 26, 26,
+ 26, 26, 26, 26, 39, 41, 42, 44, 46, 44, 42, 41, 39, 37, 36, 35, 33, 32,
+ 31, 31, 30, 29, 29, 28, 28, 28, 27, 27, 26, 26, 26, 25, 25, 25, 25, 25,
+ 38, 40, 41, 43, 45, 43, 42, 40, 38, 37, 36, 34, 33, 32, 31, 30, 29, 29,
+ 28, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 25, 37, 39, 40, 42,
+ 44, 42, 41, 39, 38, 37, 35, 34, 33, 32, 31, 30, 29, 28, 28, 27, 27, 26,
+ 26, 26, 25, 25, 25, 24, 24, 24, 24, 24, 36, 38, 39, 41, 43, 42, 40, 39,
+ 37, 36, 35, 34, 32, 31, 30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 25, 24,
+ 24, 24, 23, 23, 23, 23, 35, 37, 38, 40, 42, 41, 39, 38, 37, 36, 34, 33,
+ 32, 31, 30, 29, 28, 28, 27, 26, 26, 25, 25, 25, 24, 24, 24, 23, 23, 23,
+ 23, 23, 34, 36, 37, 39, 41, 39, 38, 37, 36, 35, 34, 33, 31, 30, 30, 29,
+ 28, 27, 27, 26, 25, 25, 25, 24, 24, 24, 23, 23, 23, 23, 23, 23, 33, 35,
+ 36, 38, 40, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 27, 26, 26,
+ 25, 25, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 32, 34, 35, 37, 38, 37,
+ 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 26, 25, 25, 24, 24, 23,
+ 23, 23, 22, 22, 22, 22, 22, 22, 31, 33, 34, 36, 37, 36, 36, 35, 34, 33,
+ 32, 31, 30, 29, 28, 28, 27, 26, 26, 25, 24, 24, 24, 23, 23, 22, 22, 22,
+ 22, 22, 22, 22, 30, 32, 33, 35, 36, 36, 35, 34, 33, 32, 31, 30, 30, 29,
+ 28, 27, 26, 26, 25, 25, 24, 24, 23, 23, 22, 22, 22, 22, 21, 21, 21, 21,
+ 30, 31, 32, 34, 35, 35, 34, 33, 32, 32, 31, 30, 29, 28, 28, 27, 26, 25,
+ 25, 24, 24, 23, 23, 23, 22, 22, 22, 21, 21, 21, 21, 21, 29, 30, 31, 33,
+ 34, 34, 33, 32, 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, 25, 24, 23, 23,
+ 23, 22, 22, 22, 21, 21, 21, 21, 21, 21, 29, 30, 31, 33, 34, 34, 33, 32,
+ 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, 25, 24, 23, 23, 23, 22, 22, 22,
+ 21, 21, 21, 21, 21, 21, 29, 30, 31, 33, 34, 34, 33, 32, 32, 31, 30, 29,
+ 29, 28, 27, 26, 26, 25, 25, 24, 23, 23, 23, 22, 22, 22, 21, 21, 21, 21,
+ 21, 21, 29, 30, 31, 33, 34, 34, 33, 32, 32, 31, 30, 29, 29, 28, 27, 26,
+ 26, 25, 25, 24, 23, 23, 23, 22, 22, 22, 21, 21, 21, 21, 21, 21 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 142, 90, 83, 66, 90, 75, 71, 62, 83, 71, 55, 49, 66, 62, 49, 41,
+ /* Size 8 */
+ 132, 163, 91, 85, 80, 72, 64, 57, 163, 113, 89, 98, 95, 87, 77, 68, 91,
+ 89, 76, 79, 79, 75, 69, 63, 85, 98, 79, 70, 67, 64, 60, 56, 80, 95, 79,
+ 67, 60, 56, 53, 50, 72, 87, 75, 64, 56, 51, 48, 45, 64, 77, 69, 60, 53,
+ 48, 44, 42, 57, 68, 63, 56, 50, 45, 42, 39,
+ /* Size 16 */
+ 136, 152, 168, 130, 93, 90, 88, 85, 82, 78, 74, 70, 65, 62, 58, 58, 152,
+ 147, 142, 117, 92, 93, 94, 92, 90, 86, 81, 77, 72, 68, 64, 64, 168, 142,
+ 116, 104, 91, 96, 101, 99, 98, 93, 89, 84, 79, 75, 70, 70, 130, 117,
+ 104, 94, 85, 88, 91, 90, 90, 86, 83, 79, 75, 71, 67, 67, 93, 92, 91, 85,
+ 78, 80, 81, 81, 81, 79, 77, 74, 71, 68, 65, 65, 90, 93, 96, 88, 80, 78,
+ 76, 76, 75, 73, 71, 69, 67, 64, 61, 61, 88, 94, 101, 91, 81, 76, 72, 70,
+ 68, 67, 66, 64, 62, 60, 58, 58, 85, 92, 99, 90, 81, 76, 70, 68, 65, 63,
+ 62, 60, 58, 56, 55, 55, 82, 90, 98, 90, 81, 75, 68, 65, 61, 60, 58, 56,
+ 54, 53, 51, 51, 78, 86, 93, 86, 79, 73, 67, 63, 60, 57, 55, 53, 52, 50,
+ 49, 49, 74, 81, 89, 83, 77, 71, 66, 62, 58, 55, 52, 51, 49, 48, 47, 47,
+ 70, 77, 84, 79, 74, 69, 64, 60, 56, 53, 51, 49, 47, 46, 45, 45, 65, 72,
+ 79, 75, 71, 67, 62, 58, 54, 52, 49, 47, 46, 44, 43, 43, 62, 68, 75, 71,
+ 68, 64, 60, 56, 53, 50, 48, 46, 44, 43, 42, 42, 58, 64, 70, 67, 65, 61,
+ 58, 55, 51, 49, 47, 45, 43, 42, 40, 40, 58, 64, 70, 67, 65, 61, 58, 55,
+ 51, 49, 47, 45, 43, 42, 40, 40,
+ /* Size 32 */
+ 137, 146, 154, 162, 170, 151, 132, 113, 94, 93, 92, 90, 89, 88, 86, 85,
+ 83, 81, 79, 77, 75, 73, 71, 68, 66, 64, 63, 61, 59, 59, 59, 59, 146,
+ 148, 151, 154, 157, 141, 125, 110, 94, 93, 93, 93, 92, 91, 90, 89, 87,
+ 85, 83, 81, 79, 76, 74, 72, 70, 68, 66, 64, 62, 62, 62, 62, 154, 151,
+ 149, 146, 144, 131, 119, 106, 93, 94, 94, 95, 96, 94, 93, 92, 91, 89,
+ 87, 85, 83, 80, 78, 76, 73, 71, 69, 67, 65, 65, 65, 65, 162, 154, 146,
+ 139, 131, 122, 112, 103, 93, 94, 96, 97, 99, 98, 97, 96, 95, 93, 91, 89,
+ 86, 84, 82, 79, 77, 75, 72, 70, 68, 68, 68, 68, 170, 157, 144, 131, 118,
+ 112, 105, 99, 93, 95, 97, 100, 102, 101, 101, 100, 99, 97, 95, 93, 90,
+ 88, 85, 83, 80, 78, 76, 73, 71, 71, 71, 71, 151, 141, 131, 122, 112,
+ 106, 100, 95, 89, 91, 93, 95, 97, 97, 96, 95, 95, 93, 91, 89, 87, 85,
+ 83, 81, 78, 76, 74, 72, 70, 70, 70, 70, 132, 125, 119, 112, 105, 100,
+ 96, 91, 86, 87, 89, 91, 92, 92, 91, 91, 91, 89, 88, 86, 84, 82, 80, 78,
+ 76, 74, 72, 70, 68, 68, 68, 68, 113, 110, 106, 103, 99, 95, 91, 87, 83,
+ 84, 85, 86, 87, 87, 87, 87, 87, 85, 84, 83, 81, 80, 78, 76, 74, 72, 70,
+ 69, 67, 67, 67, 67, 94, 94, 93, 93, 93, 89, 86, 83, 79, 80, 81, 81, 82,
+ 82, 82, 82, 82, 81, 80, 79, 78, 77, 75, 74, 72, 70, 69, 67, 65, 65, 65,
+ 65, 93, 93, 94, 94, 95, 91, 87, 84, 80, 80, 80, 80, 80, 80, 79, 79, 79,
+ 78, 77, 76, 75, 74, 73, 71, 70, 68, 67, 65, 64, 64, 64, 64, 92, 93, 94,
+ 96, 97, 93, 89, 85, 81, 80, 79, 78, 77, 77, 77, 76, 76, 75, 74, 73, 72,
+ 71, 70, 69, 67, 66, 65, 63, 62, 62, 62, 62, 90, 93, 95, 97, 100, 95, 91,
+ 86, 81, 80, 78, 77, 75, 74, 74, 73, 73, 72, 71, 70, 69, 68, 67, 66, 65,
+ 64, 63, 61, 60, 60, 60, 60, 89, 92, 96, 99, 102, 97, 92, 87, 82, 80, 77,
+ 75, 73, 72, 71, 70, 69, 69, 68, 67, 67, 66, 65, 64, 63, 62, 61, 60, 59,
+ 59, 59, 59, 88, 91, 94, 98, 101, 97, 92, 87, 82, 80, 77, 74, 72, 71, 70,
+ 69, 68, 67, 66, 65, 64, 64, 63, 62, 61, 60, 59, 58, 57, 57, 57, 57, 86,
+ 90, 93, 97, 101, 96, 91, 87, 82, 79, 77, 74, 71, 70, 68, 67, 66, 65, 64,
+ 63, 62, 62, 61, 60, 59, 58, 57, 56, 55, 55, 55, 55, 85, 89, 92, 96, 100,
+ 95, 91, 87, 82, 79, 76, 73, 70, 69, 67, 66, 64, 63, 62, 61, 60, 60, 59,
+ 58, 57, 56, 55, 55, 54, 54, 54, 54, 83, 87, 91, 95, 99, 95, 91, 87, 82,
+ 79, 76, 73, 69, 68, 66, 64, 62, 61, 60, 59, 58, 58, 57, 56, 55, 54, 54,
+ 53, 52, 52, 52, 52, 81, 85, 89, 93, 97, 93, 89, 85, 81, 78, 75, 72, 69,
+ 67, 65, 63, 61, 60, 59, 58, 57, 56, 55, 55, 54, 53, 52, 52, 51, 51, 51,
+ 51, 79, 83, 87, 91, 95, 91, 88, 84, 80, 77, 74, 71, 68, 66, 64, 62, 60,
+ 59, 58, 57, 56, 55, 54, 53, 53, 52, 51, 50, 50, 50, 50, 50, 77, 81, 85,
+ 89, 93, 89, 86, 83, 79, 76, 73, 70, 67, 65, 63, 61, 59, 58, 57, 56, 54,
+ 54, 53, 52, 51, 50, 50, 49, 48, 48, 48, 48, 75, 79, 83, 86, 90, 87, 84,
+ 81, 78, 75, 72, 69, 67, 64, 62, 60, 58, 57, 56, 54, 53, 52, 52, 51, 50,
+ 49, 48, 48, 47, 47, 47, 47, 73, 76, 80, 84, 88, 85, 82, 80, 77, 74, 71,
+ 68, 66, 64, 62, 60, 58, 56, 55, 54, 52, 52, 51, 50, 49, 48, 48, 47, 46,
+ 46, 46, 46, 71, 74, 78, 82, 85, 83, 80, 78, 75, 73, 70, 67, 65, 63, 61,
+ 59, 57, 55, 54, 53, 52, 51, 50, 49, 48, 47, 47, 46, 45, 45, 45, 45, 68,
+ 72, 76, 79, 83, 81, 78, 76, 74, 71, 69, 66, 64, 62, 60, 58, 56, 55, 53,
+ 52, 51, 50, 49, 48, 47, 46, 46, 45, 44, 44, 44, 44, 66, 70, 73, 77, 80,
+ 78, 76, 74, 72, 70, 67, 65, 63, 61, 59, 57, 55, 54, 53, 51, 50, 49, 48,
+ 47, 46, 45, 45, 44, 44, 44, 44, 44, 64, 68, 71, 75, 78, 76, 74, 72, 70,
+ 68, 66, 64, 62, 60, 58, 56, 54, 53, 52, 50, 49, 48, 47, 46, 45, 45, 44,
+ 44, 43, 43, 43, 43, 63, 66, 69, 72, 76, 74, 72, 70, 69, 67, 65, 63, 61,
+ 59, 57, 55, 54, 52, 51, 50, 48, 48, 47, 46, 45, 44, 44, 43, 42, 42, 42,
+ 42, 61, 64, 67, 70, 73, 72, 70, 69, 67, 65, 63, 61, 60, 58, 56, 55, 53,
+ 52, 50, 49, 48, 47, 46, 45, 44, 44, 43, 42, 42, 42, 42, 42, 59, 62, 65,
+ 68, 71, 70, 68, 67, 65, 64, 62, 60, 59, 57, 55, 54, 52, 51, 50, 48, 47,
+ 46, 45, 44, 44, 43, 42, 42, 41, 41, 41, 41, 59, 62, 65, 68, 71, 70, 68,
+ 67, 65, 64, 62, 60, 59, 57, 55, 54, 52, 51, 50, 48, 47, 46, 45, 44, 44,
+ 43, 42, 42, 41, 41, 41, 41, 59, 62, 65, 68, 71, 70, 68, 67, 65, 64, 62,
+ 60, 59, 57, 55, 54, 52, 51, 50, 48, 47, 46, 45, 44, 44, 43, 42, 42, 41,
+ 41, 41, 41, 59, 62, 65, 68, 71, 70, 68, 67, 65, 64, 62, 60, 59, 57, 55,
+ 54, 52, 51, 50, 48, 47, 46, 45, 44, 44, 43, 42, 42, 41, 41, 41,
+ 41 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 59, 38, 26, 59, 41, 31, 25, 38, 31, 24, 21, 26, 25, 21, 19,
+ /* Size 8 */
+ 64, 83, 77, 60, 47, 37, 31, 28, 83, 73, 76, 66, 53, 43, 36, 31, 77, 76,
+ 56, 49, 43, 37, 32, 29, 60, 66, 49, 40, 35, 32, 29, 27, 47, 53, 43, 35,
+ 31, 28, 26, 25, 37, 43, 37, 32, 28, 26, 24, 23, 31, 36, 32, 29, 26, 24,
+ 23, 22, 28, 31, 29, 27, 25, 23, 22, 21,
+ /* Size 16 */
+ 64, 73, 83, 80, 77, 69, 60, 53, 47, 42, 37, 34, 31, 30, 28, 28, 73, 76,
+ 78, 77, 77, 70, 63, 56, 50, 45, 40, 37, 34, 31, 29, 29, 83, 78, 73, 75,
+ 76, 71, 66, 59, 53, 48, 43, 39, 36, 33, 31, 31, 80, 77, 75, 70, 66, 62,
+ 58, 53, 48, 44, 40, 37, 34, 32, 30, 30, 77, 77, 76, 66, 56, 53, 49, 46,
+ 43, 40, 37, 35, 32, 31, 29, 29, 69, 70, 71, 62, 53, 49, 45, 42, 39, 37,
+ 35, 33, 31, 29, 28, 28, 60, 63, 66, 58, 49, 45, 40, 38, 35, 34, 32, 31,
+ 29, 28, 27, 27, 53, 56, 59, 53, 46, 42, 38, 35, 33, 32, 30, 29, 28, 27,
+ 26, 26, 47, 50, 53, 48, 43, 39, 35, 33, 31, 30, 28, 27, 26, 26, 25, 25,
+ 42, 45, 48, 44, 40, 37, 34, 32, 30, 28, 27, 26, 25, 25, 24, 24, 37, 40,
+ 43, 40, 37, 35, 32, 30, 28, 27, 26, 25, 24, 24, 23, 23, 34, 37, 39, 37,
+ 35, 33, 31, 29, 27, 26, 25, 24, 24, 23, 23, 23, 31, 34, 36, 34, 32, 31,
+ 29, 28, 26, 25, 24, 24, 23, 23, 22, 22, 30, 31, 33, 32, 31, 29, 28, 27,
+ 26, 25, 24, 23, 23, 22, 22, 22, 28, 29, 31, 30, 29, 28, 27, 26, 25, 24,
+ 23, 23, 22, 22, 21, 21, 28, 29, 31, 30, 29, 28, 27, 26, 25, 24, 23, 23,
+ 22, 22, 21, 21,
+ /* Size 32 */
+ 64, 69, 73, 78, 83, 81, 80, 79, 77, 73, 69, 64, 60, 57, 53, 50, 47, 44,
+ 42, 40, 37, 36, 34, 33, 31, 30, 30, 29, 28, 28, 28, 28, 69, 72, 75, 78,
+ 80, 80, 79, 78, 77, 73, 69, 65, 62, 58, 55, 52, 48, 46, 44, 41, 39, 37,
+ 36, 34, 33, 31, 30, 29, 28, 28, 28, 28, 73, 75, 76, 77, 78, 78, 77, 77,
+ 77, 73, 70, 66, 63, 60, 56, 53, 50, 47, 45, 43, 40, 38, 37, 35, 34, 32,
+ 31, 30, 29, 29, 29, 29, 78, 78, 77, 76, 76, 76, 76, 76, 76, 73, 70, 67,
+ 64, 61, 58, 55, 51, 49, 46, 44, 41, 40, 38, 36, 35, 33, 32, 31, 30, 30,
+ 30, 30, 83, 80, 78, 76, 73, 74, 75, 75, 76, 74, 71, 68, 66, 63, 59, 56,
+ 53, 50, 48, 45, 43, 41, 39, 37, 36, 34, 33, 32, 31, 31, 31, 31, 81, 80,
+ 78, 76, 74, 73, 72, 72, 71, 69, 66, 64, 62, 59, 56, 53, 51, 48, 46, 44,
+ 41, 40, 38, 36, 35, 34, 33, 31, 30, 30, 30, 30, 80, 79, 77, 76, 75, 72,
+ 70, 68, 66, 64, 62, 60, 58, 55, 53, 50, 48, 46, 44, 42, 40, 39, 37, 36,
+ 34, 33, 32, 31, 30, 30, 30, 30, 79, 78, 77, 76, 75, 72, 68, 65, 61, 59,
+ 57, 55, 53, 51, 49, 47, 46, 44, 42, 40, 39, 37, 36, 35, 33, 32, 31, 30,
+ 29, 29, 29, 29, 77, 77, 77, 76, 76, 71, 66, 61, 56, 54, 53, 51, 49, 48,
+ 46, 45, 43, 42, 40, 39, 37, 36, 35, 34, 32, 32, 31, 30, 29, 29, 29, 29,
+ 73, 73, 73, 73, 74, 69, 64, 59, 54, 53, 51, 49, 47, 45, 44, 43, 41, 40,
+ 39, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 28, 28, 28, 69, 69, 70, 70,
+ 71, 66, 62, 57, 53, 51, 49, 47, 45, 43, 42, 41, 39, 38, 37, 36, 35, 34,
+ 33, 32, 31, 30, 29, 29, 28, 28, 28, 28, 64, 65, 66, 67, 68, 64, 60, 55,
+ 51, 49, 47, 45, 42, 41, 40, 39, 37, 36, 35, 34, 33, 32, 32, 31, 30, 29,
+ 29, 28, 27, 27, 27, 27, 60, 62, 63, 64, 66, 62, 58, 53, 49, 47, 45, 42,
+ 40, 39, 38, 37, 35, 35, 34, 33, 32, 31, 31, 30, 29, 28, 28, 27, 27, 27,
+ 27, 27, 57, 58, 60, 61, 63, 59, 55, 51, 48, 45, 43, 41, 39, 38, 37, 35,
+ 34, 33, 33, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26, 26, 26, 26, 53, 55,
+ 56, 58, 59, 56, 53, 49, 46, 44, 42, 40, 38, 37, 35, 34, 33, 32, 32, 31,
+ 30, 30, 29, 28, 28, 27, 27, 26, 26, 26, 26, 26, 50, 52, 53, 55, 56, 53,
+ 50, 47, 45, 43, 41, 39, 37, 35, 34, 33, 32, 31, 31, 30, 29, 29, 28, 28,
+ 27, 27, 26, 26, 25, 25, 25, 25, 47, 48, 50, 51, 53, 51, 48, 46, 43, 41,
+ 39, 37, 35, 34, 33, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26, 26, 26, 25,
+ 25, 25, 25, 25, 44, 46, 47, 49, 50, 48, 46, 44, 42, 40, 38, 36, 35, 33,
+ 32, 31, 30, 30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 25, 24, 24, 24, 24,
+ 42, 44, 45, 46, 48, 46, 44, 42, 40, 39, 37, 35, 34, 33, 32, 31, 30, 29,
+ 28, 28, 27, 27, 26, 26, 25, 25, 25, 24, 24, 24, 24, 24, 40, 41, 43, 44,
+ 45, 44, 42, 40, 39, 37, 36, 34, 33, 32, 31, 30, 29, 28, 28, 27, 27, 26,
+ 26, 25, 25, 25, 24, 24, 24, 24, 24, 24, 37, 39, 40, 41, 43, 41, 40, 39,
+ 37, 36, 35, 33, 32, 31, 30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24,
+ 24, 24, 23, 23, 23, 23, 36, 37, 38, 40, 41, 40, 39, 37, 36, 35, 34, 32,
+ 31, 30, 30, 29, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 24, 23, 23, 23,
+ 23, 23, 34, 36, 37, 38, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28,
+ 27, 27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 23, 23, 23, 23, 33, 34,
+ 35, 36, 37, 36, 36, 35, 34, 33, 32, 31, 30, 29, 28, 28, 27, 26, 26, 25,
+ 25, 24, 24, 24, 23, 23, 23, 23, 22, 22, 22, 22, 31, 33, 34, 35, 36, 35,
+ 34, 33, 32, 32, 31, 30, 29, 28, 28, 27, 26, 26, 25, 25, 24, 24, 24, 23,
+ 23, 23, 23, 22, 22, 22, 22, 22, 30, 31, 32, 33, 34, 34, 33, 32, 32, 31,
+ 30, 29, 28, 28, 27, 27, 26, 25, 25, 25, 24, 24, 23, 23, 23, 23, 22, 22,
+ 22, 22, 22, 22, 30, 30, 31, 32, 33, 33, 32, 31, 31, 30, 29, 29, 28, 27,
+ 27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22,
+ 29, 29, 30, 31, 32, 31, 31, 30, 30, 29, 29, 28, 27, 27, 26, 26, 25, 25,
+ 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 28, 28, 29, 30,
+ 31, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 23, 23,
+ 23, 22, 22, 22, 22, 22, 21, 21, 21, 21, 28, 28, 29, 30, 31, 30, 30, 29,
+ 29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 22, 22, 22,
+ 22, 22, 21, 21, 21, 21, 28, 28, 29, 30, 31, 30, 30, 29, 29, 28, 28, 27,
+ 27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21,
+ 21, 21, 28, 28, 29, 30, 31, 30, 30, 29, 29, 28, 28, 27, 27, 26, 26, 25,
+ 25, 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 21, 21, 21, 21 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 155, 142, 90, 61, 142, 97, 72, 56, 90, 72, 54, 46, 61, 56, 46, 41,
+ /* Size 8 */
+ 131, 172, 159, 123, 94, 74, 62, 53, 172, 151, 157, 135, 108, 86, 70, 60,
+ 159, 157, 115, 99, 86, 74, 64, 56, 123, 135, 99, 80, 70, 63, 56, 51, 94,
+ 108, 86, 70, 61, 55, 50, 47, 74, 86, 74, 63, 55, 50, 46, 44, 62, 70, 64,
+ 56, 50, 46, 44, 42, 53, 60, 56, 51, 47, 44, 42, 40,
+ /* Size 16 */
+ 136, 157, 177, 171, 165, 146, 127, 112, 97, 87, 77, 70, 64, 59, 55, 55,
+ 157, 162, 167, 165, 163, 148, 133, 119, 104, 93, 83, 75, 68, 63, 58, 58,
+ 177, 167, 156, 159, 162, 151, 140, 126, 111, 100, 89, 81, 73, 67, 62,
+ 62, 171, 165, 159, 150, 140, 131, 121, 111, 100, 91, 83, 76, 69, 65, 60,
+ 60, 165, 163, 162, 140, 118, 110, 103, 96, 89, 83, 76, 71, 66, 62, 58,
+ 58, 146, 148, 151, 131, 110, 102, 93, 87, 81, 76, 71, 66, 62, 59, 55,
+ 55, 127, 133, 140, 121, 103, 93, 83, 78, 72, 68, 65, 61, 58, 56, 53, 53,
+ 112, 119, 126, 111, 96, 87, 78, 72, 67, 64, 61, 58, 55, 53, 51, 51, 97,
+ 104, 111, 100, 89, 81, 72, 67, 63, 60, 57, 54, 52, 50, 49, 49, 87, 93,
+ 100, 91, 83, 76, 68, 64, 60, 57, 54, 52, 50, 49, 47, 47, 77, 83, 89, 83,
+ 76, 71, 65, 61, 57, 54, 51, 50, 48, 47, 45, 45, 70, 75, 81, 76, 71, 66,
+ 61, 58, 54, 52, 50, 48, 46, 45, 44, 44, 64, 68, 73, 69, 66, 62, 58, 55,
+ 52, 50, 48, 46, 45, 44, 43, 43, 59, 63, 67, 65, 62, 59, 56, 53, 50, 49,
+ 47, 45, 44, 43, 42, 42, 55, 58, 62, 60, 58, 55, 53, 51, 49, 47, 45, 44,
+ 43, 42, 41, 41, 55, 58, 62, 60, 58, 55, 53, 51, 49, 47, 45, 44, 43, 42,
+ 41, 41,
+ /* Size 32 */
+ 138, 148, 159, 170, 180, 177, 174, 171, 167, 158, 148, 139, 129, 122,
+ 114, 106, 99, 94, 88, 83, 78, 75, 71, 68, 65, 62, 60, 58, 56, 56, 56,
+ 56, 148, 155, 162, 168, 175, 173, 171, 169, 167, 158, 150, 141, 132,
+ 125, 117, 110, 102, 97, 92, 86, 81, 78, 74, 71, 67, 65, 62, 60, 58, 58,
+ 58, 58, 159, 162, 164, 167, 169, 169, 168, 167, 166, 159, 151, 143, 136,
+ 128, 121, 113, 106, 100, 95, 90, 84, 80, 77, 73, 69, 67, 64, 62, 59, 59,
+ 59, 59, 170, 168, 167, 165, 164, 164, 165, 165, 166, 159, 152, 146, 139,
+ 132, 124, 117, 110, 104, 98, 93, 87, 83, 79, 76, 72, 69, 66, 64, 61, 61,
+ 61, 61, 180, 175, 169, 164, 158, 160, 162, 163, 165, 159, 154, 148, 142,
+ 135, 128, 120, 113, 107, 102, 96, 90, 86, 82, 78, 74, 71, 68, 66, 63,
+ 63, 63, 63, 177, 173, 169, 164, 160, 159, 157, 155, 154, 149, 143, 138,
+ 133, 126, 120, 114, 108, 102, 97, 92, 87, 83, 80, 76, 72, 70, 67, 64,
+ 62, 62, 62, 62, 174, 171, 168, 165, 162, 157, 152, 147, 143, 138, 133,
+ 128, 123, 118, 113, 107, 102, 97, 93, 88, 84, 81, 77, 74, 70, 68, 66,
+ 63, 61, 61, 61, 61, 171, 169, 167, 165, 163, 155, 147, 139, 132, 127,
+ 123, 118, 114, 109, 105, 101, 96, 92, 89, 85, 81, 78, 75, 72, 69, 66,
+ 64, 62, 60, 60, 60, 60, 167, 167, 166, 166, 165, 154, 143, 132, 120,
+ 116, 112, 108, 104, 101, 98, 94, 91, 87, 84, 81, 78, 75, 72, 70, 67, 65,
+ 63, 61, 59, 59, 59, 59, 158, 158, 159, 159, 159, 149, 138, 127, 116,
+ 112, 108, 104, 99, 96, 93, 90, 86, 83, 81, 78, 75, 72, 70, 67, 65, 63,
+ 61, 59, 57, 57, 57, 57, 148, 150, 151, 152, 154, 143, 133, 123, 112,
+ 108, 103, 99, 94, 91, 88, 85, 82, 79, 77, 74, 72, 70, 67, 65, 63, 61,
+ 60, 58, 56, 56, 56, 56, 139, 141, 143, 146, 148, 138, 128, 118, 108,
+ 104, 99, 94, 89, 86, 83, 81, 78, 76, 73, 71, 69, 67, 65, 63, 61, 60, 58,
+ 57, 55, 55, 55, 55, 129, 132, 136, 139, 142, 133, 123, 114, 104, 99, 94,
+ 89, 84, 81, 79, 76, 74, 72, 70, 68, 66, 64, 62, 61, 59, 58, 57, 55, 54,
+ 54, 54, 54, 122, 125, 128, 132, 135, 126, 118, 109, 101, 96, 91, 86, 81,
+ 79, 76, 74, 71, 69, 67, 66, 64, 62, 61, 59, 58, 56, 55, 54, 53, 53, 53,
+ 53, 114, 117, 121, 124, 128, 120, 113, 105, 98, 93, 88, 83, 79, 76, 74,
+ 71, 69, 67, 65, 63, 62, 60, 59, 57, 56, 55, 54, 53, 52, 52, 52, 52, 106,
+ 110, 113, 117, 120, 114, 107, 101, 94, 90, 85, 81, 76, 74, 71, 69, 66,
+ 64, 63, 61, 60, 58, 57, 56, 55, 54, 53, 52, 51, 51, 51, 51, 99, 102,
+ 106, 110, 113, 108, 102, 96, 91, 86, 82, 78, 74, 71, 69, 66, 64, 62, 61,
+ 59, 57, 56, 55, 54, 53, 52, 51, 50, 49, 49, 49, 49, 94, 97, 100, 104,
+ 107, 102, 97, 92, 87, 83, 79, 76, 72, 69, 67, 64, 62, 61, 59, 58, 56,
+ 55, 54, 53, 52, 51, 50, 49, 49, 49, 49, 49, 88, 92, 95, 98, 102, 97, 93,
+ 89, 84, 81, 77, 73, 70, 67, 65, 63, 61, 59, 58, 56, 55, 54, 53, 52, 51,
+ 50, 49, 49, 48, 48, 48, 48, 83, 86, 90, 93, 96, 92, 88, 85, 81, 78, 74,
+ 71, 68, 66, 63, 61, 59, 58, 56, 55, 54, 53, 52, 51, 50, 49, 48, 48, 47,
+ 47, 47, 47, 78, 81, 84, 87, 90, 87, 84, 81, 78, 75, 72, 69, 66, 64, 62,
+ 60, 57, 56, 55, 54, 52, 51, 50, 50, 49, 48, 47, 47, 46, 46, 46, 46, 75,
+ 78, 80, 83, 86, 83, 81, 78, 75, 72, 70, 67, 64, 62, 60, 58, 56, 55, 54,
+ 53, 51, 51, 50, 49, 48, 47, 47, 46, 46, 46, 46, 46, 71, 74, 77, 79, 82,
+ 80, 77, 75, 72, 70, 67, 65, 62, 61, 59, 57, 55, 54, 53, 52, 50, 50, 49,
+ 48, 47, 47, 46, 45, 45, 45, 45, 45, 68, 71, 73, 76, 78, 76, 74, 72, 70,
+ 67, 65, 63, 61, 59, 57, 56, 54, 53, 52, 51, 50, 49, 48, 47, 47, 46, 45,
+ 45, 44, 44, 44, 44, 65, 67, 69, 72, 74, 72, 70, 69, 67, 65, 63, 61, 59,
+ 58, 56, 55, 53, 52, 51, 50, 49, 48, 47, 47, 46, 45, 45, 44, 44, 44, 44,
+ 44, 62, 65, 67, 69, 71, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55, 54, 52,
+ 51, 50, 49, 48, 47, 47, 46, 45, 45, 44, 44, 43, 43, 43, 43, 60, 62, 64,
+ 66, 68, 67, 66, 64, 63, 61, 60, 58, 57, 55, 54, 53, 51, 50, 49, 48, 47,
+ 47, 46, 45, 45, 44, 44, 43, 43, 43, 43, 43, 58, 60, 62, 64, 66, 64, 63,
+ 62, 61, 59, 58, 57, 55, 54, 53, 52, 50, 49, 49, 48, 47, 46, 45, 45, 44,
+ 44, 43, 43, 42, 42, 42, 42, 56, 58, 59, 61, 63, 62, 61, 60, 59, 57, 56,
+ 55, 54, 53, 52, 51, 49, 49, 48, 47, 46, 46, 45, 44, 44, 43, 43, 42, 42,
+ 42, 42, 42, 56, 58, 59, 61, 63, 62, 61, 60, 59, 57, 56, 55, 54, 53, 52,
+ 51, 49, 49, 48, 47, 46, 46, 45, 44, 44, 43, 43, 42, 42, 42, 42, 42, 56,
+ 58, 59, 61, 63, 62, 61, 60, 59, 57, 56, 55, 54, 53, 52, 51, 49, 49, 48,
+ 47, 46, 46, 45, 44, 44, 43, 43, 42, 42, 42, 42, 42, 56, 58, 59, 61, 63,
+ 62, 61, 60, 59, 57, 56, 55, 54, 53, 52, 51, 49, 49, 48, 47, 46, 46, 45,
+ 44, 44, 43, 43, 42, 42, 42, 42, 42 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 42, 40, 32, 42, 36, 34, 31, 40, 34, 28, 25, 32, 31, 25, 22,
+ /* Size 8 */
+ 64, 78, 46, 43, 41, 37, 34, 31, 78, 56, 45, 49, 48, 44, 40, 36, 46, 45,
+ 39, 40, 41, 39, 36, 33, 43, 49, 40, 36, 35, 34, 32, 30, 41, 48, 41, 35,
+ 32, 30, 29, 28, 37, 44, 39, 34, 30, 28, 27, 26, 34, 40, 36, 32, 29, 27,
+ 25, 24, 31, 36, 33, 30, 28, 26, 24, 23,
+ /* Size 16 */
+ 64, 71, 78, 62, 46, 45, 43, 42, 41, 39, 37, 36, 34, 32, 31, 31, 71, 69,
+ 67, 56, 45, 46, 46, 45, 44, 43, 41, 39, 37, 35, 33, 33, 78, 67, 56, 50,
+ 45, 47, 49, 48, 48, 46, 44, 42, 40, 38, 36, 36, 62, 56, 50, 46, 42, 43,
+ 45, 44, 44, 43, 41, 40, 38, 36, 35, 35, 46, 45, 45, 42, 39, 40, 40, 40,
+ 41, 40, 39, 38, 36, 35, 33, 33, 45, 46, 47, 43, 40, 39, 38, 38, 38, 37,
+ 36, 35, 34, 33, 32, 32, 43, 46, 49, 45, 40, 38, 36, 36, 35, 34, 34, 33,
+ 32, 31, 30, 30, 42, 45, 48, 44, 40, 38, 36, 35, 34, 33, 32, 31, 31, 30,
+ 29, 29, 41, 44, 48, 44, 41, 38, 35, 34, 32, 31, 30, 30, 29, 28, 28, 28,
+ 39, 43, 46, 43, 40, 37, 34, 33, 31, 30, 29, 29, 28, 27, 27, 27, 37, 41,
+ 44, 41, 39, 36, 34, 32, 30, 29, 28, 27, 27, 26, 26, 26, 36, 39, 42, 40,
+ 38, 35, 33, 31, 30, 29, 27, 27, 26, 25, 25, 25, 34, 37, 40, 38, 36, 34,
+ 32, 31, 29, 28, 27, 26, 25, 25, 24, 24, 32, 35, 38, 36, 35, 33, 31, 30,
+ 28, 27, 26, 25, 25, 24, 23, 23, 31, 33, 36, 35, 33, 32, 30, 29, 28, 27,
+ 26, 25, 24, 23, 23, 23, 31, 33, 36, 35, 33, 32, 30, 29, 28, 27, 26, 25,
+ 24, 23, 23, 23,
+ /* Size 32 */
+ 64, 67, 71, 74, 78, 70, 62, 54, 46, 45, 45, 44, 43, 43, 42, 42, 41, 40,
+ 39, 38, 37, 36, 36, 35, 34, 33, 32, 31, 31, 31, 31, 31, 67, 69, 70, 71,
+ 72, 66, 59, 52, 45, 45, 45, 45, 45, 44, 44, 43, 43, 42, 41, 40, 39, 38,
+ 37, 36, 35, 34, 34, 33, 32, 32, 32, 32, 71, 70, 69, 68, 67, 61, 56, 51,
+ 45, 46, 46, 46, 46, 46, 45, 45, 44, 43, 43, 42, 41, 40, 39, 38, 37, 36,
+ 35, 34, 33, 33, 33, 33, 74, 71, 68, 65, 61, 57, 53, 49, 45, 46, 46, 47,
+ 48, 47, 47, 46, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 34,
+ 34, 34, 78, 72, 67, 61, 56, 53, 50, 48, 45, 46, 47, 48, 49, 49, 48, 48,
+ 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 36, 36, 36, 70, 66,
+ 61, 57, 53, 51, 48, 46, 44, 44, 45, 46, 47, 47, 46, 46, 46, 45, 44, 44,
+ 43, 42, 41, 40, 39, 38, 37, 36, 35, 35, 35, 35, 62, 59, 56, 53, 50, 48,
+ 46, 44, 42, 43, 43, 44, 45, 45, 44, 44, 44, 43, 43, 42, 41, 41, 40, 39,
+ 38, 37, 36, 35, 35, 35, 35, 35, 54, 52, 51, 49, 48, 46, 44, 42, 41, 41,
+ 42, 42, 43, 43, 42, 42, 42, 42, 41, 41, 40, 39, 39, 38, 37, 36, 36, 35,
+ 34, 34, 34, 34, 46, 45, 45, 45, 45, 44, 42, 41, 39, 40, 40, 40, 40, 40,
+ 40, 41, 41, 40, 40, 39, 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 33, 33,
+ 45, 45, 46, 46, 46, 44, 43, 41, 40, 40, 40, 39, 39, 39, 39, 39, 39, 39,
+ 38, 38, 38, 37, 36, 36, 35, 35, 34, 33, 33, 33, 33, 33, 45, 45, 46, 46,
+ 47, 45, 43, 42, 40, 40, 39, 39, 38, 38, 38, 38, 38, 37, 37, 37, 36, 36,
+ 35, 35, 34, 34, 33, 32, 32, 32, 32, 32, 44, 45, 46, 47, 48, 46, 44, 42,
+ 40, 39, 39, 38, 37, 37, 37, 37, 36, 36, 36, 35, 35, 35, 34, 34, 33, 33,
+ 32, 32, 31, 31, 31, 31, 43, 45, 46, 48, 49, 47, 45, 43, 40, 39, 38, 37,
+ 36, 36, 36, 35, 35, 35, 34, 34, 34, 33, 33, 33, 32, 32, 31, 31, 30, 30,
+ 30, 30, 43, 44, 46, 47, 49, 47, 45, 43, 40, 39, 38, 37, 36, 36, 35, 35,
+ 34, 34, 34, 33, 33, 33, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 42, 44,
+ 45, 47, 48, 46, 44, 42, 40, 39, 38, 37, 36, 35, 35, 34, 34, 33, 33, 32,
+ 32, 32, 31, 31, 31, 30, 30, 29, 29, 29, 29, 29, 42, 43, 45, 46, 48, 46,
+ 44, 42, 41, 39, 38, 37, 35, 35, 34, 33, 33, 32, 32, 32, 31, 31, 31, 30,
+ 30, 29, 29, 29, 28, 28, 28, 28, 41, 43, 44, 46, 48, 46, 44, 42, 41, 39,
+ 38, 36, 35, 34, 34, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28,
+ 28, 28, 28, 28, 40, 42, 43, 45, 47, 45, 43, 42, 40, 39, 37, 36, 35, 34,
+ 33, 32, 32, 31, 31, 30, 30, 29, 29, 29, 28, 28, 28, 28, 27, 27, 27, 27,
+ 39, 41, 43, 44, 46, 44, 43, 41, 40, 38, 37, 36, 34, 34, 33, 32, 31, 31,
+ 30, 30, 29, 29, 29, 28, 28, 28, 27, 27, 27, 27, 27, 27, 38, 40, 42, 43,
+ 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32, 32, 31, 30, 30, 29, 29, 28,
+ 28, 28, 27, 27, 27, 26, 26, 26, 26, 26, 37, 39, 41, 42, 44, 43, 41, 40,
+ 39, 38, 36, 35, 34, 33, 32, 31, 30, 30, 29, 29, 28, 28, 27, 27, 27, 26,
+ 26, 26, 26, 26, 26, 26, 36, 38, 40, 41, 43, 42, 41, 39, 38, 37, 36, 35,
+ 33, 33, 32, 31, 30, 29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25,
+ 25, 25, 36, 37, 39, 40, 42, 41, 40, 39, 38, 36, 35, 34, 33, 32, 31, 31,
+ 30, 29, 29, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 25, 35, 36,
+ 38, 39, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 29, 28, 28,
+ 27, 27, 26, 26, 26, 25, 25, 25, 24, 24, 24, 24, 34, 35, 37, 38, 40, 39,
+ 38, 37, 36, 35, 34, 33, 32, 31, 31, 30, 29, 28, 28, 27, 27, 26, 26, 26,
+ 25, 25, 25, 24, 24, 24, 24, 24, 33, 34, 36, 37, 39, 38, 37, 36, 35, 35,
+ 34, 33, 32, 31, 30, 29, 29, 28, 28, 27, 26, 26, 26, 25, 25, 25, 24, 24,
+ 24, 24, 24, 24, 32, 34, 35, 36, 38, 37, 36, 36, 35, 34, 33, 32, 31, 31,
+ 30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 25, 24, 24, 24, 23, 23, 23, 23,
+ 31, 33, 34, 35, 37, 36, 35, 35, 34, 33, 32, 32, 31, 30, 29, 29, 28, 28,
+ 27, 26, 26, 25, 25, 25, 24, 24, 24, 23, 23, 23, 23, 23, 31, 32, 33, 34,
+ 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26, 26, 25,
+ 25, 24, 24, 24, 23, 23, 23, 23, 23, 23, 31, 32, 33, 34, 36, 35, 35, 34,
+ 33, 33, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24,
+ 23, 23, 23, 23, 23, 23, 31, 32, 33, 34, 36, 35, 35, 34, 33, 33, 32, 31,
+ 30, 30, 29, 28, 28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 23,
+ 23, 23, 31, 32, 33, 34, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
+ 28, 27, 27, 26, 26, 25, 25, 24, 24, 24, 23, 23, 23, 23, 23, 23 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 133, 87, 80, 65, 87, 73, 69, 61, 80, 69, 56, 50, 65, 61, 50, 43,
+ /* Size 8 */
+ 124, 152, 87, 83, 78, 70, 63, 57, 152, 107, 86, 94, 91, 84, 75, 67, 87,
+ 86, 74, 77, 77, 73, 68, 62, 83, 94, 77, 69, 66, 63, 60, 56, 78, 91, 77,
+ 66, 60, 56, 54, 51, 70, 84, 73, 63, 56, 52, 49, 47, 63, 75, 68, 60, 54,
+ 49, 46, 44, 57, 67, 62, 56, 51, 47, 44, 41,
+ /* Size 16 */
+ 127, 141, 155, 122, 89, 87, 84, 82, 80, 76, 72, 68, 65, 61, 58, 58, 141,
+ 137, 133, 111, 88, 89, 90, 88, 87, 83, 79, 75, 71, 67, 63, 63, 155, 133,
+ 110, 99, 88, 92, 96, 95, 93, 90, 86, 81, 77, 73, 69, 69, 122, 111, 99,
+ 90, 82, 85, 87, 87, 86, 83, 80, 77, 73, 70, 66, 66, 89, 88, 88, 82, 76,
+ 77, 78, 79, 79, 77, 75, 72, 70, 67, 64, 64, 87, 89, 92, 85, 77, 76, 74,
+ 74, 73, 71, 70, 68, 66, 63, 61, 61, 84, 90, 96, 87, 78, 74, 70, 69, 67,
+ 66, 65, 63, 62, 60, 58, 58, 82, 88, 95, 87, 79, 74, 69, 67, 64, 63, 61,
+ 60, 58, 57, 55, 55, 80, 87, 93, 86, 79, 73, 67, 64, 61, 59, 58, 56, 55,
+ 54, 52, 52, 76, 83, 90, 83, 77, 71, 66, 63, 59, 57, 55, 54, 53, 51, 50,
+ 50, 72, 79, 86, 80, 75, 70, 65, 61, 58, 55, 53, 52, 50, 49, 48, 48, 68,
+ 75, 81, 77, 72, 68, 63, 60, 56, 54, 52, 50, 49, 47, 46, 46, 65, 71, 77,
+ 73, 70, 66, 62, 58, 55, 53, 50, 49, 47, 46, 45, 45, 61, 67, 73, 70, 67,
+ 63, 60, 57, 54, 51, 49, 47, 46, 45, 43, 43, 58, 63, 69, 66, 64, 61, 58,
+ 55, 52, 50, 48, 46, 45, 43, 42, 42, 58, 63, 69, 66, 64, 61, 58, 55, 52,
+ 50, 48, 46, 45, 43, 42, 42,
+ /* Size 32 */
+ 129, 136, 143, 150, 157, 141, 124, 107, 90, 89, 88, 87, 86, 84, 83, 82,
+ 81, 79, 77, 75, 73, 71, 69, 67, 65, 64, 62, 60, 59, 59, 59, 59, 136,
+ 138, 141, 143, 146, 132, 118, 104, 90, 90, 89, 89, 88, 87, 86, 85, 84,
+ 82, 80, 78, 76, 74, 72, 71, 69, 67, 65, 63, 62, 62, 62, 62, 143, 141,
+ 139, 137, 134, 123, 112, 101, 89, 90, 90, 91, 91, 90, 89, 89, 88, 86,
+ 84, 82, 80, 78, 76, 74, 72, 70, 68, 66, 64, 64, 64, 64, 150, 143, 137,
+ 130, 123, 114, 106, 98, 89, 90, 92, 93, 94, 93, 93, 92, 91, 89, 87, 85,
+ 83, 81, 79, 77, 75, 73, 71, 69, 67, 67, 67, 67, 157, 146, 134, 123, 111,
+ 106, 100, 94, 89, 91, 93, 95, 97, 97, 96, 95, 95, 93, 91, 89, 87, 85,
+ 82, 80, 78, 76, 74, 72, 70, 70, 70, 70, 141, 132, 123, 114, 106, 101,
+ 96, 91, 86, 88, 89, 91, 93, 92, 92, 91, 91, 89, 87, 86, 84, 82, 80, 78,
+ 76, 74, 72, 70, 68, 68, 68, 68, 124, 118, 112, 106, 100, 96, 91, 87, 83,
+ 84, 86, 87, 88, 88, 88, 87, 87, 86, 84, 83, 81, 80, 78, 76, 74, 72, 71,
+ 69, 67, 67, 67, 67, 107, 104, 101, 98, 94, 91, 87, 84, 80, 81, 82, 83,
+ 84, 84, 84, 83, 83, 82, 81, 80, 79, 77, 76, 74, 72, 71, 69, 68, 66, 66,
+ 66, 66, 90, 90, 89, 89, 89, 86, 83, 80, 77, 78, 78, 79, 79, 79, 80, 80,
+ 80, 79, 78, 77, 76, 75, 73, 72, 71, 69, 68, 66, 65, 65, 65, 65, 89, 90,
+ 90, 90, 91, 88, 84, 81, 78, 78, 77, 77, 77, 77, 77, 77, 77, 76, 75, 74,
+ 73, 72, 71, 70, 69, 67, 66, 64, 63, 63, 63, 63, 88, 89, 90, 92, 93, 89,
+ 86, 82, 78, 77, 77, 76, 75, 75, 75, 74, 74, 73, 72, 72, 71, 70, 69, 68,
+ 66, 65, 64, 63, 62, 62, 62, 62, 87, 89, 91, 93, 95, 91, 87, 83, 79, 77,
+ 76, 75, 73, 73, 72, 72, 71, 70, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61,
+ 60, 60, 60, 60, 86, 88, 91, 94, 97, 93, 88, 84, 79, 77, 75, 73, 71, 70,
+ 70, 69, 68, 68, 67, 66, 66, 65, 64, 63, 62, 61, 60, 59, 59, 59, 59, 59,
+ 84, 87, 90, 93, 97, 92, 88, 84, 79, 77, 75, 73, 70, 69, 68, 68, 67, 66,
+ 65, 65, 64, 63, 62, 61, 61, 60, 59, 58, 57, 57, 57, 57, 83, 86, 89, 93,
+ 96, 92, 88, 84, 80, 77, 75, 72, 70, 68, 67, 66, 65, 64, 64, 63, 62, 61,
+ 60, 60, 59, 58, 57, 57, 56, 56, 56, 56, 82, 85, 89, 92, 95, 91, 87, 83,
+ 80, 77, 74, 72, 69, 68, 66, 65, 63, 63, 62, 61, 60, 59, 59, 58, 57, 57,
+ 56, 55, 54, 54, 54, 54, 81, 84, 88, 91, 95, 91, 87, 83, 80, 77, 74, 71,
+ 68, 67, 65, 63, 62, 61, 60, 59, 58, 58, 57, 56, 56, 55, 54, 54, 53, 53,
+ 53, 53, 79, 82, 86, 89, 93, 89, 86, 82, 79, 76, 73, 70, 68, 66, 64, 63,
+ 61, 60, 59, 58, 57, 57, 56, 55, 54, 54, 53, 52, 52, 52, 52, 52, 77, 80,
+ 84, 87, 91, 87, 84, 81, 78, 75, 72, 70, 67, 65, 64, 62, 60, 59, 58, 57,
+ 56, 55, 55, 54, 53, 53, 52, 51, 51, 51, 51, 51, 75, 78, 82, 85, 89, 86,
+ 83, 80, 77, 74, 72, 69, 66, 65, 63, 61, 59, 58, 57, 56, 55, 54, 53, 53,
+ 52, 51, 51, 50, 50, 50, 50, 50, 73, 76, 80, 83, 87, 84, 81, 79, 76, 73,
+ 71, 68, 66, 64, 62, 60, 58, 57, 56, 55, 54, 53, 52, 52, 51, 50, 50, 49,
+ 48, 48, 48, 48, 71, 74, 78, 81, 85, 82, 80, 77, 75, 72, 70, 67, 65, 63,
+ 61, 59, 58, 57, 55, 54, 53, 52, 52, 51, 50, 49, 49, 48, 48, 48, 48, 48,
+ 69, 72, 76, 79, 82, 80, 78, 76, 73, 71, 69, 66, 64, 62, 60, 59, 57, 56,
+ 55, 53, 52, 52, 51, 50, 49, 49, 48, 47, 47, 47, 47, 47, 67, 71, 74, 77,
+ 80, 78, 76, 74, 72, 70, 68, 65, 63, 61, 60, 58, 56, 55, 54, 53, 52, 51,
+ 50, 49, 48, 48, 47, 47, 46, 46, 46, 46, 65, 69, 72, 75, 78, 76, 74, 72,
+ 71, 69, 66, 64, 62, 61, 59, 57, 56, 54, 53, 52, 51, 50, 49, 48, 48, 47,
+ 46, 46, 45, 45, 45, 45, 64, 67, 70, 73, 76, 74, 72, 71, 69, 67, 65, 63,
+ 61, 60, 58, 57, 55, 54, 53, 51, 50, 49, 49, 48, 47, 46, 46, 45, 45, 45,
+ 45, 45, 62, 65, 68, 71, 74, 72, 71, 69, 68, 66, 64, 62, 60, 59, 57, 56,
+ 54, 53, 52, 51, 50, 49, 48, 47, 46, 46, 45, 45, 44, 44, 44, 44, 60, 63,
+ 66, 69, 72, 70, 69, 68, 66, 64, 63, 61, 59, 58, 57, 55, 54, 52, 51, 50,
+ 49, 48, 47, 47, 46, 45, 45, 44, 43, 43, 43, 43, 59, 62, 64, 67, 70, 68,
+ 67, 66, 65, 63, 62, 60, 59, 57, 56, 54, 53, 52, 51, 50, 48, 48, 47, 46,
+ 45, 45, 44, 43, 43, 43, 43, 43, 59, 62, 64, 67, 70, 68, 67, 66, 65, 63,
+ 62, 60, 59, 57, 56, 54, 53, 52, 51, 50, 48, 48, 47, 46, 45, 45, 44, 43,
+ 43, 43, 43, 43, 59, 62, 64, 67, 70, 68, 67, 66, 65, 63, 62, 60, 59, 57,
+ 56, 54, 53, 52, 51, 50, 48, 48, 47, 46, 45, 45, 44, 43, 43, 43, 43, 43,
+ 59, 62, 64, 67, 70, 68, 67, 66, 65, 63, 62, 60, 59, 57, 56, 54, 53, 52,
+ 51, 50, 48, 48, 47, 46, 45, 45, 44, 43, 43, 43, 43, 43 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 59, 40, 29, 59, 42, 33, 27, 40, 33, 26, 23, 29, 27, 23, 21,
+ /* Size 8 */
+ 64, 82, 76, 60, 48, 39, 34, 30, 82, 73, 75, 66, 54, 44, 38, 33, 76, 75,
+ 57, 50, 44, 39, 35, 31, 60, 66, 50, 42, 37, 34, 31, 29, 48, 54, 44, 37,
+ 33, 31, 29, 27, 39, 44, 39, 34, 31, 29, 27, 26, 34, 38, 35, 31, 29, 27,
+ 26, 25, 30, 33, 31, 29, 27, 26, 25, 24,
+ /* Size 16 */
+ 64, 73, 82, 79, 76, 68, 60, 54, 48, 44, 39, 36, 34, 32, 30, 30, 73, 75,
+ 77, 76, 76, 69, 63, 57, 51, 46, 42, 39, 36, 34, 31, 31, 82, 77, 73, 74,
+ 75, 71, 66, 60, 54, 49, 44, 41, 38, 35, 33, 33, 79, 76, 74, 70, 66, 62,
+ 58, 54, 49, 45, 42, 39, 36, 34, 32, 32, 76, 76, 75, 66, 57, 53, 50, 47,
+ 44, 42, 39, 37, 35, 33, 31, 31, 68, 69, 71, 62, 53, 50, 46, 43, 41, 39,
+ 37, 35, 33, 32, 30, 30, 60, 63, 66, 58, 50, 46, 42, 40, 37, 36, 34, 33,
+ 31, 30, 29, 29, 54, 57, 60, 54, 47, 43, 40, 37, 35, 34, 32, 31, 30, 29,
+ 28, 28, 48, 51, 54, 49, 44, 41, 37, 35, 33, 32, 31, 30, 29, 28, 27, 27,
+ 44, 46, 49, 45, 42, 39, 36, 34, 32, 31, 30, 29, 28, 27, 27, 27, 39, 42,
+ 44, 42, 39, 37, 34, 32, 31, 30, 29, 28, 27, 27, 26, 26, 36, 39, 41, 39,
+ 37, 35, 33, 31, 30, 29, 28, 27, 26, 26, 25, 25, 34, 36, 38, 36, 35, 33,
+ 31, 30, 29, 28, 27, 26, 26, 25, 25, 25, 32, 34, 35, 34, 33, 32, 30, 29,
+ 28, 27, 27, 26, 25, 25, 25, 25, 30, 31, 33, 32, 31, 30, 29, 28, 27, 27,
+ 26, 25, 25, 25, 24, 24, 30, 31, 33, 32, 31, 30, 29, 28, 27, 27, 26, 25,
+ 25, 25, 24, 24,
+ /* Size 32 */
+ 64, 68, 73, 77, 82, 80, 79, 78, 76, 72, 68, 64, 60, 57, 54, 51, 48, 46,
+ 44, 41, 39, 38, 36, 35, 34, 33, 32, 31, 30, 30, 30, 30, 68, 71, 74, 77,
+ 79, 79, 78, 77, 76, 72, 69, 65, 62, 59, 56, 52, 49, 47, 45, 43, 41, 39,
+ 38, 36, 35, 34, 33, 32, 31, 31, 31, 31, 73, 74, 75, 76, 77, 77, 76, 76,
+ 76, 73, 69, 66, 63, 60, 57, 54, 51, 49, 46, 44, 42, 40, 39, 37, 36, 35,
+ 34, 32, 31, 31, 31, 31, 77, 77, 76, 75, 75, 75, 75, 75, 76, 73, 70, 67,
+ 64, 61, 58, 55, 52, 50, 48, 45, 43, 41, 40, 38, 37, 35, 34, 33, 32, 32,
+ 32, 32, 82, 79, 77, 75, 73, 73, 74, 75, 75, 73, 71, 68, 66, 63, 60, 57,
+ 54, 51, 49, 47, 44, 43, 41, 39, 38, 36, 35, 34, 33, 33, 33, 33, 80, 79,
+ 77, 75, 73, 73, 72, 71, 71, 68, 66, 64, 62, 59, 57, 54, 51, 49, 47, 45,
+ 43, 41, 40, 38, 37, 36, 35, 34, 32, 32, 32, 32, 79, 78, 76, 75, 74, 72,
+ 70, 68, 66, 64, 62, 60, 58, 56, 54, 51, 49, 47, 45, 44, 42, 40, 39, 37,
+ 36, 35, 34, 33, 32, 32, 32, 32, 78, 77, 76, 75, 75, 71, 68, 65, 61, 60,
+ 58, 56, 54, 52, 50, 49, 47, 45, 44, 42, 40, 39, 38, 37, 35, 34, 33, 33,
+ 32, 32, 32, 32, 76, 76, 76, 76, 75, 71, 66, 61, 57, 55, 53, 52, 50, 49,
+ 47, 46, 44, 43, 42, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 31, 31, 31,
+ 72, 72, 73, 73, 73, 68, 64, 60, 55, 53, 52, 50, 48, 47, 45, 44, 43, 41,
+ 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 31, 31, 31, 31, 68, 69, 69, 70,
+ 71, 66, 62, 58, 53, 52, 50, 48, 46, 45, 43, 42, 41, 40, 39, 38, 37, 36,
+ 35, 34, 33, 32, 32, 31, 30, 30, 30, 30, 64, 65, 66, 67, 68, 64, 60, 56,
+ 52, 50, 48, 46, 44, 43, 41, 40, 39, 38, 37, 36, 35, 35, 34, 33, 32, 32,
+ 31, 30, 30, 30, 30, 30, 60, 62, 63, 64, 66, 62, 58, 54, 50, 48, 46, 44,
+ 42, 41, 40, 38, 37, 37, 36, 35, 34, 33, 33, 32, 31, 31, 30, 30, 29, 29,
+ 29, 29, 57, 59, 60, 61, 63, 59, 56, 52, 49, 47, 45, 43, 41, 40, 38, 37,
+ 36, 36, 35, 34, 33, 33, 32, 31, 31, 30, 30, 29, 29, 29, 29, 29, 54, 56,
+ 57, 58, 60, 57, 54, 50, 47, 45, 43, 41, 40, 38, 37, 36, 35, 35, 34, 33,
+ 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 28, 28, 51, 52, 54, 55, 57, 54,
+ 51, 49, 46, 44, 42, 40, 38, 37, 36, 35, 34, 34, 33, 32, 32, 31, 31, 30,
+ 30, 29, 29, 28, 28, 28, 28, 28, 48, 49, 51, 52, 54, 51, 49, 47, 44, 43,
+ 41, 39, 37, 36, 35, 34, 33, 33, 32, 31, 31, 30, 30, 29, 29, 28, 28, 28,
+ 27, 27, 27, 27, 46, 47, 49, 50, 51, 49, 47, 45, 43, 41, 40, 38, 37, 36,
+ 35, 34, 33, 32, 31, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 27, 27, 27,
+ 44, 45, 46, 48, 49, 47, 45, 44, 42, 40, 39, 37, 36, 35, 34, 33, 32, 31,
+ 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 27, 27, 27, 27, 41, 43, 44, 45,
+ 47, 45, 44, 42, 40, 39, 38, 36, 35, 34, 33, 32, 31, 31, 30, 30, 29, 29,
+ 28, 28, 28, 27, 27, 27, 26, 26, 26, 26, 39, 41, 42, 43, 44, 43, 42, 40,
+ 39, 38, 37, 35, 34, 33, 32, 32, 31, 30, 30, 29, 29, 28, 28, 27, 27, 27,
+ 27, 26, 26, 26, 26, 26, 38, 39, 40, 41, 43, 41, 40, 39, 38, 37, 36, 35,
+ 33, 33, 32, 31, 30, 30, 29, 29, 28, 28, 27, 27, 27, 27, 26, 26, 26, 26,
+ 26, 26, 36, 38, 39, 40, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 31,
+ 30, 29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 26, 25, 25, 25, 25, 35, 36,
+ 37, 38, 39, 38, 37, 37, 36, 35, 34, 33, 32, 31, 31, 30, 29, 29, 28, 28,
+ 27, 27, 27, 26, 26, 26, 26, 25, 25, 25, 25, 25, 34, 35, 36, 37, 38, 37,
+ 36, 35, 35, 34, 33, 32, 31, 31, 30, 30, 29, 28, 28, 28, 27, 27, 26, 26,
+ 26, 26, 25, 25, 25, 25, 25, 25, 33, 34, 35, 35, 36, 36, 35, 34, 34, 33,
+ 32, 32, 31, 30, 30, 29, 28, 28, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25,
+ 25, 25, 25, 25, 32, 33, 34, 34, 35, 35, 34, 33, 33, 32, 32, 31, 30, 30,
+ 29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 25, 25, 25,
+ 31, 32, 32, 33, 34, 34, 33, 33, 32, 31, 31, 30, 30, 29, 29, 28, 28, 27,
+ 27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 24, 24, 24, 24, 30, 31, 31, 32,
+ 33, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 27, 26, 26, 26,
+ 25, 25, 25, 25, 25, 24, 24, 24, 24, 24, 30, 31, 31, 32, 33, 32, 32, 32,
+ 31, 31, 30, 30, 29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25, 25,
+ 25, 24, 24, 24, 24, 24, 30, 31, 31, 32, 33, 32, 32, 32, 31, 31, 30, 30,
+ 29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 24, 24, 24,
+ 24, 24, 30, 31, 31, 32, 33, 32, 32, 32, 31, 31, 30, 30, 29, 29, 28, 28,
+ 27, 27, 27, 26, 26, 26, 25, 25, 25, 25, 25, 24, 24, 24, 24, 24 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 141, 130, 85, 60, 130, 91, 70, 56, 85, 70, 55, 48, 60, 56, 48, 43,
+ /* Size 8 */
+ 120, 155, 144, 113, 88, 72, 61, 53, 155, 137, 143, 124, 100, 82, 68, 59,
+ 144, 143, 106, 93, 82, 71, 63, 56, 113, 124, 93, 77, 68, 62, 56, 52, 88,
+ 100, 82, 68, 60, 55, 51, 48, 72, 82, 71, 62, 55, 51, 48, 46, 61, 68, 63,
+ 56, 51, 48, 45, 44, 53, 59, 56, 52, 48, 46, 44, 42,
+ /* Size 16 */
+ 124, 142, 160, 154, 149, 133, 117, 104, 91, 82, 74, 68, 63, 59, 55, 55,
+ 142, 146, 150, 149, 148, 135, 122, 110, 97, 88, 79, 73, 66, 62, 58, 58,
+ 160, 150, 141, 144, 147, 137, 128, 115, 103, 94, 84, 77, 70, 66, 61, 61,
+ 154, 149, 144, 136, 128, 120, 112, 103, 94, 86, 79, 73, 67, 63, 59, 59,
+ 149, 148, 147, 128, 109, 103, 96, 90, 84, 79, 73, 69, 64, 61, 58, 58,
+ 133, 135, 137, 120, 103, 95, 87, 82, 77, 73, 68, 65, 61, 58, 56, 56,
+ 117, 122, 128, 112, 96, 87, 79, 74, 70, 67, 63, 61, 58, 56, 53, 53, 104,
+ 110, 115, 103, 90, 82, 74, 70, 66, 63, 60, 58, 55, 53, 52, 52, 91, 97,
+ 103, 94, 84, 77, 70, 66, 62, 59, 57, 55, 53, 51, 50, 50, 82, 88, 94, 86,
+ 79, 73, 67, 63, 59, 57, 54, 53, 51, 50, 48, 48, 74, 79, 84, 79, 73, 68,
+ 63, 60, 57, 54, 52, 51, 49, 48, 47, 47, 68, 73, 77, 73, 69, 65, 61, 58,
+ 55, 53, 51, 49, 48, 47, 46, 46, 63, 66, 70, 67, 64, 61, 58, 55, 53, 51,
+ 49, 48, 47, 46, 45, 45, 59, 62, 66, 63, 61, 58, 56, 53, 51, 50, 48, 47,
+ 46, 45, 44, 44, 55, 58, 61, 59, 58, 56, 53, 52, 50, 48, 47, 46, 45, 44,
+ 44, 44, 55, 58, 61, 59, 58, 56, 53, 52, 50, 48, 47, 46, 45, 44, 44, 44,
+ /* Size 32 */
+ 126, 135, 144, 153, 162, 159, 157, 154, 151, 143, 135, 127, 118, 112,
+ 105, 99, 92, 88, 84, 79, 75, 72, 69, 66, 63, 62, 60, 58, 56, 56, 56, 56,
+ 135, 141, 146, 152, 157, 156, 154, 152, 151, 143, 136, 129, 121, 115,
+ 108, 102, 96, 91, 87, 82, 78, 75, 71, 68, 65, 63, 61, 59, 57, 57, 57,
+ 57, 144, 146, 148, 151, 153, 152, 151, 151, 150, 144, 137, 130, 124,
+ 118, 111, 105, 99, 94, 89, 85, 80, 77, 74, 71, 67, 65, 63, 61, 59, 59,
+ 59, 59, 153, 152, 151, 149, 148, 148, 149, 149, 150, 144, 138, 132, 127,
+ 120, 114, 108, 102, 97, 92, 87, 83, 79, 76, 73, 69, 67, 65, 63, 60, 60,
+ 60, 60, 162, 157, 153, 148, 143, 145, 146, 148, 149, 144, 139, 134, 130,
+ 123, 117, 111, 105, 100, 95, 90, 85, 82, 78, 75, 71, 69, 67, 64, 62, 62,
+ 62, 62, 159, 156, 152, 148, 145, 143, 142, 141, 139, 135, 130, 126, 121,
+ 116, 111, 105, 100, 96, 91, 87, 83, 79, 76, 73, 70, 68, 65, 63, 61, 61,
+ 61, 61, 157, 154, 151, 149, 146, 142, 138, 134, 130, 126, 122, 118, 113,
+ 109, 104, 100, 95, 91, 88, 84, 80, 77, 74, 71, 68, 66, 64, 62, 60, 60,
+ 60, 60, 154, 152, 151, 149, 148, 141, 134, 127, 120, 117, 113, 109, 105,
+ 102, 98, 94, 90, 87, 84, 81, 77, 75, 72, 69, 67, 65, 63, 61, 59, 59, 59,
+ 59, 151, 151, 150, 150, 149, 139, 130, 120, 111, 108, 104, 101, 97, 94,
+ 91, 89, 86, 83, 80, 77, 75, 72, 70, 68, 65, 64, 62, 60, 58, 58, 58, 58,
+ 143, 143, 144, 144, 144, 135, 126, 117, 108, 104, 100, 97, 93, 90, 87,
+ 85, 82, 79, 77, 74, 72, 70, 68, 66, 64, 62, 61, 59, 57, 57, 57, 57, 135,
+ 136, 137, 138, 139, 130, 122, 113, 104, 100, 96, 93, 89, 86, 84, 81, 78,
+ 76, 74, 72, 69, 68, 66, 64, 62, 61, 59, 58, 56, 56, 56, 56, 127, 129,
+ 130, 132, 134, 126, 118, 109, 101, 97, 93, 88, 84, 82, 80, 77, 75, 73,
+ 71, 69, 67, 65, 64, 62, 60, 59, 58, 57, 55, 55, 55, 55, 118, 121, 124,
+ 127, 130, 121, 113, 105, 97, 93, 89, 84, 80, 78, 76, 73, 71, 69, 68, 66,
+ 64, 63, 62, 60, 59, 58, 57, 55, 54, 54, 54, 54, 112, 115, 118, 120, 123,
+ 116, 109, 102, 94, 90, 86, 82, 78, 76, 73, 71, 69, 67, 66, 64, 63, 61,
+ 60, 59, 58, 56, 55, 54, 53, 53, 53, 53, 105, 108, 111, 114, 117, 111,
+ 104, 98, 91, 87, 84, 80, 76, 73, 71, 69, 67, 65, 64, 62, 61, 60, 59, 57,
+ 56, 55, 54, 53, 52, 52, 52, 52, 99, 102, 105, 108, 111, 105, 100, 94,
+ 89, 85, 81, 77, 73, 71, 69, 67, 65, 63, 62, 60, 59, 58, 57, 56, 55, 54,
+ 53, 52, 51, 51, 51, 51, 92, 96, 99, 102, 105, 100, 95, 90, 86, 82, 78,
+ 75, 71, 69, 67, 65, 63, 61, 60, 59, 57, 56, 55, 55, 54, 53, 52, 51, 51,
+ 51, 51, 51, 88, 91, 94, 97, 100, 96, 91, 87, 83, 79, 76, 73, 69, 67, 65,
+ 63, 61, 60, 59, 57, 56, 55, 54, 54, 53, 52, 51, 51, 50, 50, 50, 50, 84,
+ 87, 89, 92, 95, 91, 88, 84, 80, 77, 74, 71, 68, 66, 64, 62, 60, 59, 58,
+ 56, 55, 54, 53, 53, 52, 51, 50, 50, 49, 49, 49, 49, 79, 82, 85, 87, 90,
+ 87, 84, 81, 77, 74, 72, 69, 66, 64, 62, 60, 59, 57, 56, 55, 54, 53, 52,
+ 52, 51, 50, 50, 49, 48, 48, 48, 48, 75, 78, 80, 83, 85, 83, 80, 77, 75,
+ 72, 69, 67, 64, 63, 61, 59, 57, 56, 55, 54, 53, 52, 51, 51, 50, 49, 49,
+ 48, 48, 48, 48, 48, 72, 75, 77, 79, 82, 79, 77, 75, 72, 70, 68, 65, 63,
+ 61, 60, 58, 56, 55, 54, 53, 52, 51, 51, 50, 49, 49, 48, 48, 47, 47, 47,
+ 47, 69, 71, 74, 76, 78, 76, 74, 72, 70, 68, 66, 64, 62, 60, 59, 57, 55,
+ 54, 53, 52, 51, 51, 50, 49, 49, 48, 48, 47, 47, 47, 47, 47, 66, 68, 71,
+ 73, 75, 73, 71, 69, 68, 66, 64, 62, 60, 59, 57, 56, 55, 54, 53, 52, 51,
+ 50, 49, 49, 48, 48, 47, 47, 46, 46, 46, 46, 63, 65, 67, 69, 71, 70, 68,
+ 67, 65, 64, 62, 60, 59, 58, 56, 55, 54, 53, 52, 51, 50, 49, 49, 48, 47,
+ 47, 47, 46, 46, 46, 46, 46, 62, 63, 65, 67, 69, 68, 66, 65, 64, 62, 61,
+ 59, 58, 56, 55, 54, 53, 52, 51, 50, 49, 49, 48, 48, 47, 47, 46, 46, 45,
+ 45, 45, 45, 60, 61, 63, 65, 67, 65, 64, 63, 62, 61, 59, 58, 57, 55, 54,
+ 53, 52, 51, 50, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 45, 45, 58,
+ 59, 61, 63, 64, 63, 62, 61, 60, 59, 58, 57, 55, 54, 53, 52, 51, 51, 50,
+ 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 45, 45, 45, 56, 57, 59, 60, 62,
+ 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 51, 50, 49, 48, 48, 47, 47,
+ 46, 46, 45, 45, 45, 44, 44, 44, 44, 56, 57, 59, 60, 62, 61, 60, 59, 58,
+ 57, 56, 55, 54, 53, 52, 51, 51, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45,
+ 45, 44, 44, 44, 44, 56, 57, 59, 60, 62, 61, 60, 59, 58, 57, 56, 55, 54,
+ 53, 52, 51, 51, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 44, 44, 44,
+ 44, 56, 57, 59, 60, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 51,
+ 50, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 44, 44, 44, 44 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 44, 41, 34, 44, 38, 36, 33, 41, 36, 30, 27, 34, 33, 27, 25,
+ /* Size 8 */
+ 64, 77, 47, 45, 42, 39, 36, 33, 77, 56, 46, 50, 49, 45, 41, 37, 47, 46,
+ 41, 42, 42, 40, 38, 35, 45, 50, 42, 38, 37, 36, 34, 32, 42, 49, 42, 37,
+ 34, 32, 31, 30, 39, 45, 40, 36, 32, 30, 29, 28, 36, 41, 38, 34, 31, 29,
+ 27, 26, 33, 37, 35, 32, 30, 28, 26, 25,
+ /* Size 16 */
+ 64, 71, 77, 62, 47, 46, 45, 43, 42, 41, 39, 37, 36, 34, 33, 33, 71, 69,
+ 67, 57, 46, 47, 47, 46, 46, 44, 42, 40, 38, 37, 35, 35, 77, 67, 56, 51,
+ 46, 48, 50, 49, 49, 47, 45, 43, 41, 39, 37, 37, 62, 57, 51, 47, 43, 45,
+ 46, 46, 45, 44, 43, 41, 40, 38, 36, 36, 47, 46, 46, 43, 41, 41, 42, 42,
+ 42, 41, 40, 39, 38, 37, 35, 35, 46, 47, 48, 45, 41, 41, 40, 40, 39, 39,
+ 38, 37, 36, 35, 34, 34, 45, 47, 50, 46, 42, 40, 38, 37, 37, 36, 36, 35,
+ 34, 33, 32, 32, 43, 46, 49, 46, 42, 40, 37, 36, 35, 35, 34, 33, 33, 32,
+ 31, 31, 42, 46, 49, 45, 42, 39, 37, 35, 34, 33, 32, 32, 31, 30, 30, 30,
+ 41, 44, 47, 44, 41, 39, 36, 35, 33, 32, 31, 31, 30, 29, 29, 29, 39, 42,
+ 45, 43, 40, 38, 36, 34, 32, 31, 30, 30, 29, 28, 28, 28, 37, 40, 43, 41,
+ 39, 37, 35, 33, 32, 31, 30, 29, 28, 28, 27, 27, 36, 38, 41, 40, 38, 36,
+ 34, 33, 31, 30, 29, 28, 27, 27, 26, 26, 34, 37, 39, 38, 37, 35, 33, 32,
+ 30, 29, 28, 28, 27, 26, 26, 26, 33, 35, 37, 36, 35, 34, 32, 31, 30, 29,
+ 28, 27, 26, 26, 25, 25, 33, 35, 37, 36, 35, 34, 32, 31, 30, 29, 28, 27,
+ 26, 26, 25, 25,
+ /* Size 32 */
+ 64, 67, 71, 74, 77, 69, 62, 54, 47, 46, 46, 45, 45, 44, 43, 43, 42, 42,
+ 41, 40, 39, 38, 37, 36, 36, 35, 34, 33, 33, 33, 33, 33, 67, 68, 70, 71,
+ 72, 66, 59, 53, 47, 46, 46, 46, 46, 45, 45, 44, 44, 43, 42, 41, 40, 40,
+ 39, 38, 37, 36, 35, 35, 34, 34, 34, 34, 71, 70, 69, 68, 67, 62, 57, 51,
+ 46, 47, 47, 47, 47, 47, 46, 46, 46, 45, 44, 43, 42, 41, 40, 39, 38, 38,
+ 37, 36, 35, 35, 35, 35, 74, 71, 68, 65, 61, 58, 54, 50, 46, 47, 47, 48,
+ 49, 48, 48, 47, 47, 46, 45, 44, 44, 43, 42, 41, 40, 39, 38, 37, 36, 36,
+ 36, 36, 77, 72, 67, 61, 56, 54, 51, 49, 46, 47, 48, 49, 50, 50, 49, 49,
+ 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 37, 37, 37, 69, 66,
+ 62, 58, 54, 51, 49, 47, 45, 46, 46, 47, 48, 48, 47, 47, 47, 46, 45, 45,
+ 44, 43, 42, 41, 40, 39, 39, 38, 37, 37, 37, 37, 62, 59, 57, 54, 51, 49,
+ 47, 45, 43, 44, 45, 45, 46, 46, 46, 45, 45, 45, 44, 43, 43, 42, 41, 40,
+ 40, 39, 38, 37, 36, 36, 36, 36, 54, 53, 51, 50, 49, 47, 45, 44, 42, 43,
+ 43, 43, 44, 44, 44, 44, 44, 43, 43, 42, 42, 41, 40, 39, 39, 38, 37, 36,
+ 36, 36, 36, 36, 47, 47, 46, 46, 46, 45, 43, 42, 41, 41, 41, 42, 42, 42,
+ 42, 42, 42, 42, 41, 41, 40, 40, 39, 38, 38, 37, 37, 36, 35, 35, 35, 35,
+ 46, 46, 47, 47, 47, 46, 44, 43, 41, 41, 41, 41, 41, 41, 41, 41, 41, 40,
+ 40, 40, 39, 39, 38, 37, 37, 36, 36, 35, 34, 34, 34, 34, 46, 46, 47, 47,
+ 48, 46, 45, 43, 41, 41, 41, 40, 40, 40, 40, 40, 39, 39, 39, 38, 38, 37,
+ 37, 36, 36, 35, 35, 34, 34, 34, 34, 34, 45, 46, 47, 48, 49, 47, 45, 43,
+ 42, 41, 40, 40, 39, 39, 39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 35, 35,
+ 34, 34, 33, 33, 33, 33, 45, 46, 47, 49, 50, 48, 46, 44, 42, 41, 40, 39,
+ 38, 38, 37, 37, 37, 36, 36, 36, 36, 35, 35, 35, 34, 34, 33, 33, 32, 32,
+ 32, 32, 44, 45, 47, 48, 50, 48, 46, 44, 42, 41, 40, 39, 38, 37, 37, 36,
+ 36, 36, 35, 35, 35, 34, 34, 34, 33, 33, 33, 32, 32, 32, 32, 32, 43, 45,
+ 46, 48, 49, 47, 46, 44, 42, 41, 40, 39, 37, 37, 36, 36, 35, 35, 35, 34,
+ 34, 34, 33, 33, 33, 32, 32, 32, 31, 31, 31, 31, 43, 44, 46, 47, 49, 47,
+ 45, 44, 42, 41, 40, 38, 37, 36, 36, 35, 35, 34, 34, 34, 33, 33, 33, 32,
+ 32, 32, 31, 31, 31, 31, 31, 31, 42, 44, 46, 47, 49, 47, 45, 44, 42, 41,
+ 39, 38, 37, 36, 35, 35, 34, 34, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30,
+ 30, 30, 30, 30, 42, 43, 45, 46, 48, 46, 45, 43, 42, 40, 39, 38, 36, 36,
+ 35, 34, 34, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 29,
+ 41, 42, 44, 45, 47, 45, 44, 43, 41, 40, 39, 37, 36, 35, 35, 34, 33, 33,
+ 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 29, 29, 29, 40, 41, 43, 44,
+ 46, 45, 43, 42, 41, 40, 38, 37, 36, 35, 34, 34, 33, 32, 32, 31, 31, 30,
+ 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 39, 40, 42, 44, 45, 44, 43, 42,
+ 40, 39, 38, 37, 36, 35, 34, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29,
+ 28, 28, 28, 28, 28, 28, 38, 40, 41, 43, 44, 43, 42, 41, 40, 39, 37, 36,
+ 35, 34, 34, 33, 32, 32, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28,
+ 28, 28, 37, 39, 40, 42, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 33,
+ 32, 31, 31, 30, 30, 29, 29, 29, 28, 28, 28, 27, 27, 27, 27, 27, 36, 38,
+ 39, 41, 42, 41, 40, 39, 38, 37, 36, 36, 35, 34, 33, 32, 31, 31, 30, 30,
+ 29, 29, 29, 28, 28, 28, 27, 27, 27, 27, 27, 27, 36, 37, 38, 40, 41, 40,
+ 40, 39, 38, 37, 36, 35, 34, 33, 33, 32, 31, 31, 30, 29, 29, 29, 28, 28,
+ 27, 27, 27, 27, 26, 26, 26, 26, 35, 36, 38, 39, 40, 39, 39, 38, 37, 36,
+ 35, 35, 34, 33, 32, 32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 27, 26,
+ 26, 26, 26, 26, 34, 35, 37, 38, 39, 39, 38, 37, 37, 36, 35, 34, 33, 33,
+ 32, 31, 30, 30, 29, 29, 28, 28, 28, 27, 27, 27, 26, 26, 26, 26, 26, 26,
+ 33, 35, 36, 37, 38, 38, 37, 36, 36, 35, 34, 34, 33, 32, 32, 31, 30, 30,
+ 29, 29, 28, 28, 27, 27, 27, 26, 26, 26, 26, 26, 26, 26, 33, 34, 35, 36,
+ 37, 37, 36, 36, 35, 34, 34, 33, 32, 32, 31, 31, 30, 29, 29, 28, 28, 28,
+ 27, 27, 26, 26, 26, 26, 25, 25, 25, 25, 33, 34, 35, 36, 37, 37, 36, 36,
+ 35, 34, 34, 33, 32, 32, 31, 31, 30, 29, 29, 28, 28, 28, 27, 27, 26, 26,
+ 26, 26, 25, 25, 25, 25, 33, 34, 35, 36, 37, 37, 36, 36, 35, 34, 34, 33,
+ 32, 32, 31, 31, 30, 29, 29, 28, 28, 28, 27, 27, 26, 26, 26, 26, 25, 25,
+ 25, 25, 33, 34, 35, 36, 37, 37, 36, 36, 35, 34, 34, 33, 32, 32, 31, 31,
+ 30, 29, 29, 28, 28, 28, 27, 27, 26, 26, 26, 26, 25, 25, 25, 25 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 125, 84, 78, 64, 84, 72, 68, 61, 78, 68, 56, 51, 64, 61, 51, 45,
+ /* Size 8 */
+ 117, 141, 84, 80, 76, 69, 63, 57, 141, 102, 83, 90, 88, 81, 73, 66, 84,
+ 83, 73, 75, 75, 72, 67, 62, 80, 90, 75, 68, 65, 63, 60, 57, 76, 88, 75,
+ 65, 60, 57, 54, 52, 69, 81, 72, 63, 57, 53, 50, 48, 63, 73, 67, 60, 54,
+ 50, 47, 45, 57, 66, 62, 57, 52, 48, 45, 43,
+ /* Size 16 */
+ 119, 132, 144, 115, 86, 84, 82, 79, 77, 74, 71, 67, 64, 61, 58, 58, 132,
+ 128, 124, 105, 85, 86, 87, 85, 83, 80, 77, 73, 69, 66, 63, 63, 144, 124,
+ 104, 94, 84, 88, 92, 91, 89, 86, 83, 79, 75, 71, 68, 68, 115, 105, 94,
+ 87, 79, 82, 84, 83, 83, 80, 78, 75, 72, 69, 65, 65, 86, 85, 84, 79, 74,
+ 75, 76, 76, 76, 75, 73, 71, 68, 66, 63, 63, 84, 86, 88, 82, 75, 74, 73,
+ 72, 71, 70, 69, 67, 65, 63, 61, 61, 82, 87, 92, 84, 76, 73, 69, 68, 66,
+ 65, 64, 63, 61, 60, 58, 58, 79, 85, 91, 83, 76, 72, 68, 66, 64, 62, 61,
+ 60, 58, 57, 56, 56, 77, 83, 89, 83, 76, 71, 66, 64, 61, 59, 58, 57, 55,
+ 54, 53, 53, 74, 80, 86, 80, 75, 70, 65, 62, 59, 58, 56, 55, 53, 52, 51,
+ 51, 71, 77, 83, 78, 73, 69, 64, 61, 58, 56, 54, 53, 51, 50, 49, 49, 67,
+ 73, 79, 75, 71, 67, 63, 60, 57, 55, 53, 51, 50, 49, 48, 48, 64, 69, 75,
+ 72, 68, 65, 61, 58, 55, 53, 51, 50, 48, 47, 46, 46, 61, 66, 71, 69, 66,
+ 63, 60, 57, 54, 52, 50, 49, 47, 46, 45, 45, 58, 63, 68, 65, 63, 61, 58,
+ 56, 53, 51, 49, 48, 46, 45, 44, 44, 58, 63, 68, 65, 63, 61, 58, 56, 53,
+ 51, 49, 48, 46, 45, 44, 44,
+ /* Size 32 */
+ 120, 127, 133, 140, 146, 131, 116, 101, 87, 86, 85, 84, 82, 81, 80, 79,
+ 78, 76, 75, 73, 71, 70, 68, 66, 65, 63, 62, 60, 59, 59, 59, 59, 127,
+ 129, 131, 134, 136, 123, 111, 99, 86, 86, 86, 85, 85, 84, 83, 82, 81,
+ 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, 63, 61, 61, 61, 61, 133, 131,
+ 129, 127, 126, 116, 106, 96, 86, 86, 87, 87, 88, 87, 86, 85, 84, 83, 81,
+ 79, 77, 76, 74, 72, 70, 69, 67, 65, 64, 64, 64, 64, 140, 134, 127, 121,
+ 115, 108, 101, 93, 86, 87, 88, 89, 90, 89, 89, 88, 87, 86, 84, 82, 81,
+ 79, 77, 75, 73, 71, 70, 68, 66, 66, 66, 66, 146, 136, 126, 115, 105,
+ 100, 95, 90, 85, 87, 89, 91, 93, 92, 92, 91, 90, 89, 87, 85, 84, 82, 80,
+ 78, 76, 74, 72, 70, 68, 68, 68, 68, 131, 123, 116, 108, 100, 96, 92, 87,
+ 83, 84, 86, 87, 89, 88, 88, 88, 87, 86, 84, 83, 81, 79, 78, 76, 74, 72,
+ 71, 69, 67, 67, 67, 67, 116, 111, 106, 101, 95, 92, 88, 84, 80, 81, 83,
+ 84, 85, 85, 84, 84, 84, 83, 81, 80, 79, 77, 76, 74, 72, 71, 69, 68, 66,
+ 66, 66, 66, 101, 99, 96, 93, 90, 87, 84, 81, 78, 78, 79, 80, 81, 81, 81,
+ 81, 81, 80, 79, 78, 76, 75, 74, 72, 71, 69, 68, 67, 65, 65, 65, 65, 87,
+ 86, 86, 86, 85, 83, 80, 78, 75, 75, 76, 77, 77, 77, 77, 77, 77, 76, 76,
+ 75, 74, 73, 72, 70, 69, 68, 67, 65, 64, 64, 64, 64, 86, 86, 86, 87, 87,
+ 84, 81, 78, 75, 75, 75, 75, 75, 75, 75, 75, 75, 74, 73, 73, 72, 71, 70,
+ 69, 67, 66, 65, 64, 63, 63, 63, 63, 85, 86, 87, 88, 89, 86, 83, 79, 76,
+ 75, 75, 74, 73, 73, 73, 73, 72, 72, 71, 70, 70, 69, 68, 67, 66, 65, 63,
+ 62, 61, 61, 61, 61, 84, 85, 87, 89, 91, 87, 84, 80, 77, 75, 74, 73, 72,
+ 71, 71, 70, 70, 69, 68, 68, 67, 66, 66, 65, 64, 63, 62, 61, 60, 60, 60,
+ 60, 82, 85, 88, 90, 93, 89, 85, 81, 77, 75, 73, 72, 70, 69, 68, 68, 67,
+ 67, 66, 65, 65, 64, 63, 63, 62, 61, 60, 59, 59, 59, 59, 59, 81, 84, 87,
+ 89, 92, 88, 85, 81, 77, 75, 73, 71, 69, 68, 67, 67, 66, 65, 65, 64, 63,
+ 63, 62, 61, 61, 60, 59, 58, 57, 57, 57, 57, 80, 83, 86, 89, 92, 88, 84,
+ 81, 77, 75, 73, 71, 68, 67, 66, 65, 64, 64, 63, 62, 62, 61, 60, 60, 59,
+ 58, 58, 57, 56, 56, 56, 56, 79, 82, 85, 88, 91, 88, 84, 81, 77, 75, 73,
+ 70, 68, 67, 65, 64, 63, 62, 62, 61, 60, 59, 59, 58, 58, 57, 56, 56, 55,
+ 55, 55, 55, 78, 81, 84, 87, 90, 87, 84, 81, 77, 75, 72, 70, 67, 66, 64,
+ 63, 62, 61, 60, 59, 58, 58, 57, 57, 56, 55, 55, 54, 54, 54, 54, 54, 76,
+ 80, 83, 86, 89, 86, 83, 80, 76, 74, 72, 69, 67, 65, 64, 62, 61, 60, 59,
+ 58, 57, 57, 56, 56, 55, 54, 54, 53, 53, 53, 53, 53, 75, 78, 81, 84, 87,
+ 84, 81, 79, 76, 73, 71, 68, 66, 65, 63, 62, 60, 59, 58, 57, 56, 56, 55,
+ 55, 54, 53, 53, 52, 52, 52, 52, 52, 73, 76, 79, 82, 85, 83, 80, 78, 75,
+ 73, 70, 68, 65, 64, 62, 61, 59, 58, 57, 56, 55, 55, 54, 54, 53, 52, 52,
+ 51, 51, 51, 51, 51, 71, 74, 77, 81, 84, 81, 79, 76, 74, 72, 70, 67, 65,
+ 63, 62, 60, 58, 57, 56, 55, 54, 54, 53, 52, 52, 51, 51, 50, 50, 50, 50,
+ 50, 70, 73, 76, 79, 82, 79, 77, 75, 73, 71, 69, 66, 64, 63, 61, 59, 58,
+ 57, 56, 55, 54, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 49, 68, 71, 74,
+ 77, 80, 78, 76, 74, 72, 70, 68, 66, 63, 62, 60, 59, 57, 56, 55, 54, 53,
+ 52, 52, 51, 50, 50, 49, 49, 48, 48, 48, 48, 66, 69, 72, 75, 78, 76, 74,
+ 72, 70, 69, 67, 65, 63, 61, 60, 58, 57, 56, 55, 54, 52, 52, 51, 50, 50,
+ 49, 49, 48, 48, 48, 48, 48, 65, 67, 70, 73, 76, 74, 72, 71, 69, 67, 66,
+ 64, 62, 61, 59, 58, 56, 55, 54, 53, 52, 51, 50, 50, 49, 48, 48, 47, 47,
+ 47, 47, 47, 63, 66, 69, 71, 74, 72, 71, 69, 68, 66, 65, 63, 61, 60, 58,
+ 57, 55, 54, 53, 52, 51, 51, 50, 49, 48, 48, 47, 47, 46, 46, 46, 46, 62,
+ 64, 67, 70, 72, 71, 69, 68, 67, 65, 63, 62, 60, 59, 58, 56, 55, 54, 53,
+ 52, 51, 50, 49, 49, 48, 47, 47, 46, 46, 46, 46, 46, 60, 63, 65, 68, 70,
+ 69, 68, 67, 65, 64, 62, 61, 59, 58, 57, 56, 54, 53, 52, 51, 50, 50, 49,
+ 48, 47, 47, 46, 46, 45, 45, 45, 45, 59, 61, 64, 66, 68, 67, 66, 65, 64,
+ 63, 61, 60, 59, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 48, 47, 46, 46,
+ 45, 45, 45, 45, 45, 59, 61, 64, 66, 68, 67, 66, 65, 64, 63, 61, 60, 59,
+ 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 48, 47, 46, 46, 45, 45, 45, 45,
+ 45, 59, 61, 64, 66, 68, 67, 66, 65, 64, 63, 61, 60, 59, 57, 56, 55, 54,
+ 53, 52, 51, 50, 49, 48, 48, 47, 46, 46, 45, 45, 45, 45, 45, 59, 61, 64,
+ 66, 68, 67, 66, 65, 64, 63, 61, 60, 59, 57, 56, 55, 54, 53, 52, 51, 50,
+ 49, 48, 48, 47, 46, 46, 45, 45, 45, 45, 45 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 60, 41, 31, 60, 44, 35, 29, 41, 35, 29, 26, 31, 29, 26, 24,
+ /* Size 8 */
+ 64, 80, 75, 61, 49, 41, 36, 33, 80, 72, 74, 66, 55, 46, 40, 35, 75, 74,
+ 57, 51, 46, 41, 37, 34, 61, 66, 51, 43, 39, 36, 34, 32, 49, 55, 46, 39,
+ 36, 33, 32, 30, 41, 46, 41, 36, 33, 31, 30, 29, 36, 40, 37, 34, 32, 30,
+ 29, 28, 33, 35, 34, 32, 30, 29, 28, 27,
+ /* Size 16 */
+ 64, 72, 80, 78, 75, 68, 61, 55, 49, 45, 41, 39, 36, 34, 33, 33, 72, 74,
+ 76, 75, 75, 69, 63, 57, 52, 48, 43, 41, 38, 36, 34, 34, 80, 76, 72, 73,
+ 74, 70, 66, 60, 55, 50, 46, 43, 40, 37, 35, 35, 78, 75, 73, 69, 66, 62,
+ 58, 54, 50, 47, 43, 41, 38, 36, 34, 34, 75, 75, 74, 66, 57, 54, 51, 49,
+ 46, 43, 41, 39, 37, 35, 34, 34, 68, 69, 70, 62, 54, 51, 47, 45, 43, 41,
+ 39, 37, 35, 34, 33, 33, 61, 63, 66, 58, 51, 47, 43, 41, 39, 38, 36, 35,
+ 34, 33, 32, 32, 55, 57, 60, 54, 49, 45, 41, 39, 37, 36, 35, 34, 33, 32,
+ 31, 31, 49, 52, 55, 50, 46, 43, 39, 37, 36, 34, 33, 32, 32, 31, 30, 30,
+ 45, 48, 50, 47, 43, 41, 38, 36, 34, 33, 32, 31, 31, 30, 30, 30, 41, 43,
+ 46, 43, 41, 39, 36, 35, 33, 32, 31, 31, 30, 29, 29, 29, 39, 41, 43, 41,
+ 39, 37, 35, 34, 32, 31, 31, 30, 29, 29, 28, 28, 36, 38, 40, 38, 37, 35,
+ 34, 33, 32, 31, 30, 29, 29, 28, 28, 28, 34, 36, 37, 36, 35, 34, 33, 32,
+ 31, 30, 29, 29, 28, 28, 28, 28, 33, 34, 35, 34, 34, 33, 32, 31, 30, 30,
+ 29, 28, 28, 28, 27, 27, 33, 34, 35, 34, 34, 33, 32, 31, 30, 30, 29, 28,
+ 28, 28, 27, 27,
+ /* Size 32 */
+ 64, 68, 72, 76, 80, 79, 78, 77, 75, 72, 68, 64, 61, 58, 55, 52, 49, 47,
+ 45, 43, 41, 40, 39, 37, 36, 35, 34, 33, 33, 33, 33, 33, 68, 71, 73, 76,
+ 78, 77, 77, 76, 75, 72, 69, 65, 62, 59, 56, 53, 50, 48, 46, 44, 42, 41,
+ 40, 38, 37, 36, 35, 34, 33, 33, 33, 33, 72, 73, 74, 75, 76, 76, 75, 75,
+ 75, 72, 69, 66, 63, 60, 57, 55, 52, 50, 48, 46, 43, 42, 41, 39, 38, 37,
+ 36, 35, 34, 34, 34, 34, 76, 76, 75, 75, 74, 74, 74, 74, 75, 72, 70, 67,
+ 64, 62, 59, 56, 53, 51, 49, 47, 45, 43, 42, 40, 39, 38, 37, 36, 35, 35,
+ 35, 35, 80, 78, 76, 74, 72, 72, 73, 74, 74, 72, 70, 68, 66, 63, 60, 57,
+ 55, 52, 50, 48, 46, 44, 43, 41, 40, 38, 37, 36, 35, 35, 35, 35, 79, 77,
+ 76, 74, 72, 72, 71, 71, 70, 68, 66, 64, 62, 60, 57, 55, 52, 50, 48, 47,
+ 45, 43, 42, 40, 39, 38, 37, 36, 35, 35, 35, 35, 78, 77, 75, 74, 73, 71,
+ 69, 68, 66, 64, 62, 60, 58, 56, 54, 52, 50, 49, 47, 45, 43, 42, 41, 39,
+ 38, 37, 36, 35, 34, 34, 34, 34, 77, 76, 75, 74, 74, 71, 68, 65, 62, 60,
+ 58, 56, 55, 53, 51, 50, 48, 47, 45, 44, 42, 41, 40, 39, 38, 37, 36, 35,
+ 34, 34, 34, 34, 75, 75, 75, 75, 74, 70, 66, 62, 57, 56, 54, 53, 51, 50,
+ 49, 47, 46, 45, 43, 42, 41, 40, 39, 38, 37, 36, 35, 35, 34, 34, 34, 34,
+ 72, 72, 72, 72, 72, 68, 64, 60, 56, 54, 53, 51, 49, 48, 47, 46, 44, 43,
+ 42, 41, 40, 39, 38, 37, 36, 35, 35, 34, 33, 33, 33, 33, 68, 69, 69, 70,
+ 70, 66, 62, 58, 54, 53, 51, 49, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38,
+ 37, 36, 35, 35, 34, 33, 33, 33, 33, 33, 64, 65, 66, 67, 68, 64, 60, 56,
+ 53, 51, 49, 47, 45, 44, 43, 42, 41, 40, 39, 38, 38, 37, 36, 35, 35, 34,
+ 33, 33, 32, 32, 32, 32, 61, 62, 63, 64, 66, 62, 58, 55, 51, 49, 47, 45,
+ 43, 42, 41, 40, 39, 39, 38, 37, 36, 36, 35, 35, 34, 33, 33, 32, 32, 32,
+ 32, 32, 58, 59, 60, 62, 63, 60, 56, 53, 50, 48, 46, 44, 42, 41, 40, 39,
+ 38, 38, 37, 36, 36, 35, 34, 34, 33, 33, 32, 32, 31, 31, 31, 31, 55, 56,
+ 57, 59, 60, 57, 54, 51, 49, 47, 45, 43, 41, 40, 39, 38, 37, 37, 36, 35,
+ 35, 34, 34, 33, 33, 32, 32, 31, 31, 31, 31, 31, 52, 53, 55, 56, 57, 55,
+ 52, 50, 47, 46, 44, 42, 40, 39, 38, 37, 37, 36, 35, 35, 34, 34, 33, 33,
+ 32, 32, 31, 31, 31, 31, 31, 31, 49, 50, 52, 53, 55, 52, 50, 48, 46, 44,
+ 43, 41, 39, 38, 37, 37, 36, 35, 34, 34, 33, 33, 32, 32, 32, 31, 31, 31,
+ 30, 30, 30, 30, 47, 48, 50, 51, 52, 50, 49, 47, 45, 43, 42, 40, 39, 38,
+ 37, 36, 35, 34, 34, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30,
+ 45, 46, 48, 49, 50, 48, 47, 45, 43, 42, 41, 39, 38, 37, 36, 35, 34, 34,
+ 33, 33, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 43, 44, 46, 47,
+ 48, 47, 45, 44, 42, 41, 40, 38, 37, 36, 35, 35, 34, 33, 33, 32, 32, 31,
+ 31, 31, 30, 30, 30, 29, 29, 29, 29, 29, 41, 42, 43, 45, 46, 45, 43, 42,
+ 41, 40, 39, 38, 36, 36, 35, 34, 33, 33, 32, 32, 31, 31, 31, 30, 30, 30,
+ 29, 29, 29, 29, 29, 29, 40, 41, 42, 43, 44, 43, 42, 41, 40, 39, 38, 37,
+ 36, 35, 34, 34, 33, 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 29, 29,
+ 29, 29, 39, 40, 41, 42, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 34, 33,
+ 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 37, 38,
+ 39, 40, 41, 40, 39, 39, 38, 37, 36, 35, 35, 34, 33, 33, 32, 32, 31, 31,
+ 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 36, 37, 38, 39, 40, 39,
+ 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29,
+ 29, 29, 28, 28, 28, 28, 28, 28, 35, 36, 37, 38, 38, 38, 37, 37, 36, 35,
+ 35, 34, 33, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28,
+ 28, 28, 28, 28, 34, 35, 36, 37, 37, 37, 36, 36, 35, 35, 34, 33, 33, 32,
+ 32, 31, 31, 31, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 28, 28,
+ 33, 34, 35, 36, 36, 36, 35, 35, 35, 34, 33, 33, 32, 32, 31, 31, 31, 30,
+ 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 33, 33, 34, 35,
+ 35, 35, 34, 34, 34, 33, 33, 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29,
+ 28, 28, 28, 28, 28, 27, 27, 27, 27, 27, 33, 33, 34, 35, 35, 35, 34, 34,
+ 34, 33, 33, 32, 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28,
+ 28, 27, 27, 27, 27, 27, 33, 33, 34, 35, 35, 35, 34, 34, 34, 33, 33, 32,
+ 32, 31, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27,
+ 27, 27, 33, 33, 34, 35, 35, 35, 34, 34, 34, 33, 33, 32, 32, 31, 31, 31,
+ 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 27, 27, 27, 27, 27 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 129, 119, 81, 60, 119, 86, 68, 56, 81, 68, 55, 49, 60, 56, 49, 45,
+ /* Size 8 */
+ 111, 141, 132, 105, 84, 70, 60, 54, 141, 125, 130, 114, 94, 78, 67, 59,
+ 132, 130, 99, 88, 78, 69, 62, 56, 105, 114, 88, 74, 66, 61, 56, 53, 84,
+ 94, 78, 66, 59, 55, 52, 50, 70, 78, 69, 61, 55, 52, 49, 47, 60, 67, 62,
+ 56, 52, 49, 47, 46, 54, 59, 56, 53, 50, 47, 46, 44,
+ /* Size 16 */
+ 114, 129, 144, 140, 135, 122, 108, 97, 86, 79, 71, 67, 62, 59, 55, 55,
+ 129, 133, 137, 135, 134, 123, 113, 102, 91, 84, 76, 70, 65, 62, 58, 58,
+ 144, 137, 129, 131, 134, 125, 117, 107, 96, 88, 80, 74, 68, 64, 60, 60,
+ 140, 135, 131, 124, 118, 111, 104, 96, 88, 82, 76, 71, 66, 62, 59, 59,
+ 135, 134, 134, 118, 102, 96, 90, 85, 80, 76, 71, 67, 63, 60, 58, 58,
+ 122, 123, 125, 111, 96, 89, 83, 79, 74, 71, 67, 64, 61, 58, 56, 56, 108,
+ 113, 117, 104, 90, 83, 76, 72, 68, 65, 63, 60, 58, 56, 54, 54, 97, 102,
+ 107, 96, 85, 79, 72, 68, 65, 62, 60, 58, 56, 54, 53, 53, 86, 91, 96, 88,
+ 80, 74, 68, 65, 61, 59, 57, 55, 54, 52, 51, 51, 79, 84, 88, 82, 76, 71,
+ 65, 62, 59, 57, 55, 53, 52, 51, 50, 50, 71, 76, 80, 76, 71, 67, 63, 60,
+ 57, 55, 53, 52, 50, 50, 49, 49, 67, 70, 74, 71, 67, 64, 60, 58, 55, 53,
+ 52, 51, 49, 49, 48, 48, 62, 65, 68, 66, 63, 61, 58, 56, 54, 52, 50, 49,
+ 48, 48, 47, 47, 59, 62, 64, 62, 60, 58, 56, 54, 52, 51, 50, 49, 48, 47,
+ 46, 46, 55, 58, 60, 59, 58, 56, 54, 53, 51, 50, 49, 48, 47, 46, 46, 46,
+ 55, 58, 60, 59, 58, 56, 54, 53, 51, 50, 49, 48, 47, 46, 46, 46,
+ /* Size 32 */
+ 116, 123, 131, 139, 146, 144, 142, 139, 137, 130, 123, 116, 109, 104,
+ 98, 93, 87, 84, 80, 76, 72, 70, 68, 65, 63, 61, 59, 58, 56, 56, 56, 56,
+ 123, 128, 133, 138, 142, 141, 140, 138, 137, 130, 124, 118, 112, 106,
+ 101, 95, 90, 86, 82, 78, 75, 72, 69, 67, 64, 63, 61, 59, 57, 57, 57, 57,
+ 131, 133, 135, 137, 138, 138, 137, 137, 136, 131, 125, 120, 114, 109,
+ 103, 98, 93, 89, 85, 81, 77, 74, 71, 69, 66, 64, 62, 61, 59, 59, 59, 59,
+ 139, 138, 137, 136, 134, 135, 135, 135, 136, 131, 126, 121, 116, 111,
+ 106, 100, 95, 91, 87, 83, 79, 76, 73, 71, 68, 66, 64, 62, 60, 60, 60,
+ 60, 146, 142, 138, 134, 130, 132, 133, 134, 135, 131, 127, 123, 119,
+ 113, 108, 103, 98, 94, 89, 85, 81, 78, 75, 72, 69, 67, 65, 63, 61, 61,
+ 61, 61, 144, 141, 138, 135, 132, 131, 129, 128, 127, 123, 120, 116, 112,
+ 107, 103, 98, 94, 90, 86, 83, 79, 76, 73, 71, 68, 66, 64, 62, 61, 61,
+ 61, 61, 142, 140, 137, 135, 133, 129, 126, 123, 119, 116, 112, 109, 105,
+ 101, 97, 93, 90, 86, 83, 80, 77, 74, 72, 69, 67, 65, 63, 62, 60, 60, 60,
+ 60, 139, 138, 137, 135, 134, 128, 123, 117, 111, 108, 105, 101, 98, 95,
+ 92, 89, 86, 83, 80, 77, 74, 72, 70, 68, 66, 64, 62, 61, 59, 59, 59, 59,
+ 137, 137, 136, 136, 135, 127, 119, 111, 103, 100, 97, 94, 91, 89, 86,
+ 84, 81, 79, 77, 74, 72, 70, 68, 66, 64, 63, 61, 60, 58, 58, 58, 58, 130,
+ 130, 131, 131, 131, 123, 116, 108, 100, 97, 94, 91, 88, 85, 83, 81, 78,
+ 76, 74, 72, 70, 68, 66, 65, 63, 62, 60, 59, 57, 57, 57, 57, 123, 124,
+ 125, 126, 127, 120, 112, 105, 97, 94, 91, 87, 84, 82, 80, 77, 75, 73,
+ 71, 70, 68, 66, 65, 63, 61, 60, 59, 58, 57, 57, 57, 57, 116, 118, 120,
+ 121, 123, 116, 109, 101, 94, 91, 87, 84, 80, 78, 76, 74, 72, 71, 69, 67,
+ 66, 64, 63, 61, 60, 59, 58, 57, 56, 56, 56, 56, 109, 112, 114, 116, 119,
+ 112, 105, 98, 91, 88, 84, 80, 77, 75, 73, 71, 69, 68, 66, 65, 63, 62,
+ 61, 60, 59, 58, 57, 56, 55, 55, 55, 55, 104, 106, 109, 111, 113, 107,
+ 101, 95, 89, 85, 82, 78, 75, 73, 71, 69, 67, 66, 65, 63, 62, 61, 60, 59,
+ 58, 57, 56, 55, 54, 54, 54, 54, 98, 101, 103, 106, 108, 103, 97, 92, 86,
+ 83, 80, 76, 73, 71, 69, 67, 65, 64, 63, 62, 60, 59, 58, 57, 56, 56, 55,
+ 54, 53, 53, 53, 53, 93, 95, 98, 100, 103, 98, 93, 89, 84, 81, 77, 74,
+ 71, 69, 67, 65, 64, 62, 61, 60, 59, 58, 57, 56, 55, 55, 54, 53, 52, 52,
+ 52, 52, 87, 90, 93, 95, 98, 94, 90, 86, 81, 78, 75, 72, 69, 67, 65, 64,
+ 62, 61, 60, 59, 57, 57, 56, 55, 54, 54, 53, 52, 52, 52, 52, 52, 84, 86,
+ 89, 91, 94, 90, 86, 83, 79, 76, 73, 71, 68, 66, 64, 62, 61, 60, 59, 58,
+ 56, 56, 55, 54, 53, 53, 52, 52, 51, 51, 51, 51, 80, 82, 85, 87, 89, 86,
+ 83, 80, 77, 74, 71, 69, 66, 65, 63, 61, 60, 59, 58, 57, 56, 55, 54, 53,
+ 53, 52, 52, 51, 50, 50, 50, 50, 76, 78, 81, 83, 85, 83, 80, 77, 74, 72,
+ 70, 67, 65, 63, 62, 60, 59, 58, 57, 56, 55, 54, 53, 53, 52, 51, 51, 50,
+ 50, 50, 50, 50, 72, 75, 77, 79, 81, 79, 77, 74, 72, 70, 68, 66, 63, 62,
+ 60, 59, 57, 56, 56, 55, 54, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 49,
+ 70, 72, 74, 76, 78, 76, 74, 72, 70, 68, 66, 64, 62, 61, 59, 58, 57, 56,
+ 55, 54, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 49, 49, 68, 69, 71, 73,
+ 75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 57, 56, 55, 54, 53, 52, 52,
+ 51, 51, 50, 50, 49, 49, 48, 48, 48, 48, 65, 67, 69, 71, 72, 71, 69, 68,
+ 66, 65, 63, 61, 60, 59, 57, 56, 55, 54, 53, 53, 52, 51, 51, 50, 50, 49,
+ 49, 48, 48, 48, 48, 48, 63, 64, 66, 68, 69, 68, 67, 66, 64, 63, 61, 60,
+ 59, 58, 56, 55, 54, 53, 53, 52, 51, 51, 50, 50, 49, 49, 48, 48, 47, 47,
+ 47, 47, 61, 63, 64, 66, 67, 66, 65, 64, 63, 62, 60, 59, 58, 57, 56, 55,
+ 54, 53, 52, 51, 51, 50, 50, 49, 49, 48, 48, 48, 47, 47, 47, 47, 59, 61,
+ 62, 64, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 52, 51,
+ 50, 50, 49, 49, 48, 48, 48, 47, 47, 47, 47, 47, 58, 59, 61, 62, 63, 62,
+ 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 52, 51, 50, 50, 49, 49, 48,
+ 48, 48, 47, 47, 47, 47, 47, 47, 56, 57, 59, 60, 61, 61, 60, 59, 58, 57,
+ 57, 56, 55, 54, 53, 52, 52, 51, 50, 50, 49, 49, 48, 48, 47, 47, 47, 47,
+ 46, 46, 46, 46, 56, 57, 59, 60, 61, 61, 60, 59, 58, 57, 57, 56, 55, 54,
+ 53, 52, 52, 51, 50, 50, 49, 49, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46,
+ 56, 57, 59, 60, 61, 61, 60, 59, 58, 57, 57, 56, 55, 54, 53, 52, 52, 51,
+ 50, 50, 49, 49, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46, 56, 57, 59, 60,
+ 61, 61, 60, 59, 58, 57, 57, 56, 55, 54, 53, 52, 52, 51, 50, 50, 49, 49,
+ 48, 48, 47, 47, 47, 47, 46, 46, 46, 46 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 45, 42, 36, 45, 40, 38, 35, 42, 38, 32, 30, 36, 35, 30, 27,
+ /* Size 8 */
+ 64, 76, 48, 46, 44, 41, 37, 35, 76, 57, 47, 51, 50, 46, 43, 39, 48, 47,
+ 42, 43, 43, 42, 40, 37, 46, 51, 43, 40, 39, 38, 36, 35, 44, 50, 43, 39,
+ 36, 34, 33, 32, 41, 46, 42, 38, 34, 33, 31, 30, 37, 43, 40, 36, 33, 31,
+ 30, 29, 35, 39, 37, 35, 32, 30, 29, 28,
+ /* Size 16 */
+ 64, 70, 76, 62, 48, 47, 46, 45, 44, 42, 41, 39, 37, 36, 35, 35, 70, 68,
+ 66, 57, 48, 48, 48, 48, 47, 45, 44, 42, 40, 39, 37, 37, 76, 66, 57, 52,
+ 47, 49, 51, 50, 50, 48, 46, 45, 43, 41, 39, 39, 62, 57, 52, 48, 45, 46,
+ 47, 47, 47, 45, 44, 43, 41, 40, 38, 38, 48, 48, 47, 45, 42, 43, 43, 43,
+ 43, 43, 42, 41, 40, 38, 37, 37, 47, 48, 49, 46, 43, 42, 42, 41, 41, 40,
+ 40, 39, 38, 37, 36, 36, 46, 48, 51, 47, 43, 42, 40, 39, 39, 38, 38, 37,
+ 36, 35, 35, 35, 45, 48, 50, 47, 43, 41, 39, 38, 37, 37, 36, 35, 35, 34,
+ 33, 33, 44, 47, 50, 47, 43, 41, 39, 37, 36, 35, 34, 34, 33, 33, 32, 32,
+ 42, 45, 48, 45, 43, 40, 38, 37, 35, 34, 34, 33, 32, 32, 31, 31, 41, 44,
+ 46, 44, 42, 40, 38, 36, 34, 34, 33, 32, 31, 31, 30, 30, 39, 42, 45, 43,
+ 41, 39, 37, 35, 34, 33, 32, 31, 31, 30, 30, 30, 37, 40, 43, 41, 40, 38,
+ 36, 35, 33, 32, 31, 31, 30, 29, 29, 29, 36, 39, 41, 40, 38, 37, 35, 34,
+ 33, 32, 31, 30, 29, 29, 28, 28, 35, 37, 39, 38, 37, 36, 35, 33, 32, 31,
+ 30, 30, 29, 28, 28, 28, 35, 37, 39, 38, 37, 36, 35, 33, 32, 31, 30, 30,
+ 29, 28, 28, 28,
+ /* Size 32 */
+ 64, 67, 70, 73, 76, 69, 62, 55, 48, 47, 47, 46, 46, 45, 45, 44, 44, 43,
+ 42, 41, 41, 40, 39, 38, 37, 37, 36, 35, 35, 35, 35, 35, 67, 68, 69, 70,
+ 71, 65, 60, 54, 48, 48, 47, 47, 47, 47, 46, 46, 45, 44, 44, 43, 42, 41,
+ 40, 40, 39, 38, 37, 37, 36, 36, 36, 36, 70, 69, 68, 67, 66, 62, 57, 52,
+ 48, 48, 48, 48, 48, 48, 48, 47, 47, 46, 45, 44, 44, 43, 42, 41, 40, 39,
+ 39, 38, 37, 37, 37, 37, 73, 70, 67, 65, 62, 58, 55, 51, 47, 48, 49, 49,
+ 50, 49, 49, 49, 48, 47, 47, 46, 45, 44, 43, 42, 41, 41, 40, 39, 38, 38,
+ 38, 38, 76, 71, 66, 62, 57, 54, 52, 50, 47, 48, 49, 50, 51, 51, 50, 50,
+ 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 41, 40, 39, 39, 39, 39, 69, 65,
+ 62, 58, 54, 52, 50, 48, 46, 47, 47, 48, 49, 49, 49, 48, 48, 47, 47, 46,
+ 45, 44, 44, 43, 42, 41, 40, 39, 39, 39, 39, 39, 62, 60, 57, 55, 52, 50,
+ 48, 47, 45, 45, 46, 46, 47, 47, 47, 47, 47, 46, 45, 45, 44, 43, 43, 42,
+ 41, 40, 40, 39, 38, 38, 38, 38, 55, 54, 52, 51, 50, 48, 47, 45, 44, 44,
+ 44, 45, 45, 45, 45, 45, 45, 45, 44, 44, 43, 42, 42, 41, 40, 40, 39, 38,
+ 38, 38, 38, 38, 48, 48, 48, 47, 47, 46, 45, 44, 42, 43, 43, 43, 43, 43,
+ 43, 43, 43, 43, 43, 42, 42, 41, 41, 40, 40, 39, 38, 38, 37, 37, 37, 37,
+ 47, 48, 48, 48, 48, 47, 45, 44, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42,
+ 42, 41, 41, 40, 40, 39, 39, 38, 38, 37, 36, 36, 36, 36, 47, 47, 48, 49,
+ 49, 47, 46, 44, 43, 43, 42, 42, 42, 41, 41, 41, 41, 41, 40, 40, 40, 39,
+ 39, 38, 38, 37, 37, 36, 36, 36, 36, 36, 46, 47, 48, 49, 50, 48, 46, 45,
+ 43, 42, 42, 41, 41, 40, 40, 40, 40, 40, 39, 39, 39, 38, 38, 37, 37, 37,
+ 36, 36, 35, 35, 35, 35, 46, 47, 48, 50, 51, 49, 47, 45, 43, 42, 42, 41,
+ 40, 40, 39, 39, 39, 38, 38, 38, 38, 37, 37, 37, 36, 36, 35, 35, 35, 35,
+ 35, 35, 45, 47, 48, 49, 51, 49, 47, 45, 43, 42, 41, 40, 40, 39, 39, 38,
+ 38, 38, 37, 37, 37, 36, 36, 36, 35, 35, 35, 34, 34, 34, 34, 34, 45, 46,
+ 48, 49, 50, 49, 47, 45, 43, 42, 41, 40, 39, 39, 38, 38, 37, 37, 37, 36,
+ 36, 36, 35, 35, 35, 34, 34, 34, 33, 33, 33, 33, 44, 46, 47, 49, 50, 48,
+ 47, 45, 43, 42, 41, 40, 39, 38, 38, 37, 37, 36, 36, 36, 35, 35, 35, 34,
+ 34, 34, 33, 33, 33, 33, 33, 33, 44, 45, 47, 48, 50, 48, 47, 45, 43, 42,
+ 41, 40, 39, 38, 37, 37, 36, 36, 35, 35, 34, 34, 34, 34, 33, 33, 33, 32,
+ 32, 32, 32, 32, 43, 44, 46, 47, 49, 47, 46, 45, 43, 42, 41, 40, 38, 38,
+ 37, 36, 36, 35, 35, 34, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32,
+ 42, 44, 45, 47, 48, 47, 45, 44, 43, 42, 40, 39, 38, 37, 37, 36, 35, 35,
+ 34, 34, 34, 33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 41, 43, 44, 46,
+ 47, 46, 45, 44, 42, 41, 40, 39, 38, 37, 36, 36, 35, 34, 34, 33, 33, 33,
+ 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 41, 42, 44, 45, 46, 45, 44, 43,
+ 42, 41, 40, 39, 38, 37, 36, 35, 34, 34, 34, 33, 33, 32, 32, 32, 31, 31,
+ 31, 31, 30, 30, 30, 30, 40, 41, 43, 44, 46, 44, 43, 42, 41, 40, 39, 38,
+ 37, 36, 36, 35, 34, 34, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30,
+ 30, 30, 39, 40, 42, 43, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 35,
+ 34, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 38, 40,
+ 41, 42, 44, 43, 42, 41, 40, 39, 38, 37, 37, 36, 35, 34, 34, 33, 33, 32,
+ 32, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 37, 39, 40, 41, 43, 42,
+ 41, 40, 40, 39, 38, 37, 36, 35, 35, 34, 33, 33, 32, 32, 31, 31, 31, 30,
+ 30, 30, 29, 29, 29, 29, 29, 29, 37, 38, 39, 41, 42, 41, 40, 40, 39, 38,
+ 37, 37, 36, 35, 34, 34, 33, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29,
+ 29, 29, 29, 29, 36, 37, 39, 40, 41, 40, 40, 39, 38, 38, 37, 36, 35, 35,
+ 34, 33, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28,
+ 35, 37, 38, 39, 40, 39, 39, 38, 38, 37, 36, 36, 35, 34, 34, 33, 32, 32,
+ 32, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 35, 36, 37, 38,
+ 39, 39, 38, 38, 37, 36, 36, 35, 35, 34, 33, 33, 32, 32, 31, 31, 30, 30,
+ 30, 29, 29, 29, 28, 28, 28, 28, 28, 28, 35, 36, 37, 38, 39, 39, 38, 38,
+ 37, 36, 36, 35, 35, 34, 33, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29,
+ 28, 28, 28, 28, 28, 28, 35, 36, 37, 38, 39, 39, 38, 38, 37, 36, 36, 35,
+ 35, 34, 33, 33, 32, 32, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28,
+ 28, 28, 35, 36, 37, 38, 39, 39, 38, 38, 37, 36, 36, 35, 35, 34, 33, 33,
+ 32, 32, 31, 31, 30, 30, 30, 29, 29, 29, 28, 28, 28, 28, 28, 28 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 117, 81, 76, 64, 81, 71, 67, 61, 76, 67, 57, 52, 64, 61, 52, 47,
+ /* Size 8 */
+ 110, 131, 81, 77, 74, 68, 62, 57, 131, 97, 80, 86, 84, 78, 72, 65, 81,
+ 80, 71, 73, 73, 70, 66, 62, 77, 86, 73, 67, 64, 63, 60, 57, 74, 84, 73,
+ 64, 60, 57, 55, 53, 68, 78, 70, 63, 57, 54, 51, 50, 62, 72, 66, 60, 55,
+ 51, 49, 47, 57, 65, 62, 57, 53, 50, 47, 45,
+ /* Size 16 */
+ 112, 123, 134, 108, 83, 81, 79, 77, 75, 72, 69, 66, 64, 61, 59, 59, 123,
+ 120, 116, 99, 82, 83, 83, 82, 81, 78, 75, 71, 68, 65, 63, 63, 134, 116,
+ 99, 90, 81, 85, 88, 87, 86, 83, 80, 77, 73, 70, 67, 67, 108, 99, 90, 84,
+ 77, 79, 81, 81, 80, 78, 76, 73, 70, 68, 65, 65, 83, 82, 81, 77, 72, 73,
+ 74, 74, 74, 73, 72, 70, 67, 65, 63, 63, 81, 83, 85, 79, 73, 72, 71, 71,
+ 70, 69, 68, 66, 64, 62, 61, 61, 79, 83, 88, 81, 74, 71, 68, 67, 66, 65,
+ 64, 62, 61, 60, 58, 58, 77, 82, 87, 81, 74, 71, 67, 65, 63, 62, 61, 60,
+ 59, 57, 56, 56, 75, 81, 86, 80, 74, 70, 66, 63, 61, 59, 58, 57, 56, 55,
+ 54, 54, 72, 78, 83, 78, 73, 69, 65, 62, 59, 58, 56, 55, 54, 53, 52, 52,
+ 69, 75, 80, 76, 72, 68, 64, 61, 58, 56, 55, 54, 52, 51, 51, 51, 66, 71,
+ 77, 73, 70, 66, 62, 60, 57, 55, 54, 52, 51, 50, 49, 49, 64, 68, 73, 70,
+ 67, 64, 61, 59, 56, 54, 52, 51, 50, 49, 48, 48, 61, 65, 70, 68, 65, 62,
+ 60, 57, 55, 53, 51, 50, 49, 48, 47, 47, 59, 63, 67, 65, 63, 61, 58, 56,
+ 54, 52, 51, 49, 48, 47, 46, 46, 59, 63, 67, 65, 63, 61, 58, 56, 54, 52,
+ 51, 49, 48, 47, 46, 46,
+ /* Size 32 */
+ 113, 119, 124, 130, 135, 122, 109, 96, 83, 82, 82, 81, 80, 79, 78, 77,
+ 76, 74, 73, 71, 70, 69, 67, 66, 64, 63, 62, 60, 59, 59, 59, 59, 119,
+ 121, 123, 124, 126, 116, 105, 94, 83, 83, 83, 82, 82, 81, 80, 79, 79,
+ 77, 76, 74, 73, 71, 70, 68, 67, 65, 64, 63, 61, 61, 61, 61, 124, 123,
+ 121, 119, 118, 109, 100, 91, 83, 83, 84, 84, 84, 84, 83, 82, 81, 80, 78,
+ 77, 75, 74, 72, 71, 69, 68, 66, 65, 63, 63, 63, 63, 130, 124, 119, 114,
+ 109, 102, 96, 89, 83, 84, 84, 85, 86, 86, 85, 85, 84, 83, 81, 80, 78,
+ 76, 75, 73, 71, 70, 68, 67, 65, 65, 65, 65, 135, 126, 118, 109, 100, 95,
+ 91, 87, 82, 84, 85, 87, 89, 88, 88, 87, 87, 85, 84, 82, 81, 79, 77, 76,
+ 74, 72, 71, 69, 67, 67, 67, 67, 122, 116, 109, 102, 95, 92, 88, 84, 80,
+ 81, 83, 84, 85, 85, 85, 84, 84, 83, 81, 80, 79, 77, 76, 74, 72, 71, 69,
+ 68, 66, 66, 66, 66, 109, 105, 100, 96, 91, 88, 84, 81, 78, 79, 80, 81,
+ 82, 82, 81, 81, 81, 80, 79, 78, 77, 75, 74, 72, 71, 70, 68, 67, 65, 65,
+ 65, 65, 96, 94, 91, 89, 87, 84, 81, 78, 75, 76, 77, 78, 78, 78, 78, 78,
+ 78, 77, 76, 75, 74, 73, 72, 71, 70, 68, 67, 66, 65, 65, 65, 65, 83, 83,
+ 83, 83, 82, 80, 78, 75, 73, 74, 74, 75, 75, 75, 75, 75, 75, 74, 74, 73,
+ 72, 71, 70, 69, 68, 67, 66, 65, 64, 64, 64, 64, 82, 83, 83, 84, 84, 81,
+ 79, 76, 74, 74, 73, 73, 73, 73, 73, 73, 73, 72, 72, 71, 70, 69, 68, 68,
+ 67, 66, 64, 63, 62, 62, 62, 62, 82, 83, 84, 84, 85, 83, 80, 77, 74, 73,
+ 73, 72, 72, 72, 71, 71, 71, 70, 70, 69, 68, 68, 67, 66, 65, 64, 63, 62,
+ 61, 61, 61, 61, 81, 82, 84, 85, 87, 84, 81, 78, 75, 73, 72, 71, 70, 70,
+ 69, 69, 69, 68, 67, 67, 66, 66, 65, 64, 63, 63, 62, 61, 60, 60, 60, 60,
+ 80, 82, 84, 86, 89, 85, 82, 78, 75, 73, 72, 70, 69, 68, 67, 67, 66, 66,
+ 65, 65, 64, 64, 63, 62, 62, 61, 60, 60, 59, 59, 59, 59, 79, 81, 84, 86,
+ 88, 85, 82, 78, 75, 73, 72, 70, 68, 67, 67, 66, 65, 65, 64, 63, 63, 62,
+ 62, 61, 60, 60, 59, 58, 58, 58, 58, 58, 78, 80, 83, 85, 88, 85, 81, 78,
+ 75, 73, 71, 69, 67, 67, 66, 65, 64, 63, 63, 62, 62, 61, 60, 60, 59, 59,
+ 58, 57, 57, 57, 57, 57, 77, 79, 82, 85, 87, 84, 81, 78, 75, 73, 71, 69,
+ 67, 66, 65, 64, 63, 62, 61, 61, 60, 60, 59, 58, 58, 57, 57, 56, 56, 56,
+ 56, 56, 76, 79, 81, 84, 87, 84, 81, 78, 75, 73, 71, 69, 66, 65, 64, 63,
+ 61, 61, 60, 59, 59, 58, 58, 57, 57, 56, 56, 55, 54, 54, 54, 54, 74, 77,
+ 80, 83, 85, 83, 80, 77, 74, 72, 70, 68, 66, 65, 63, 62, 61, 60, 59, 59,
+ 58, 57, 57, 56, 56, 55, 55, 54, 54, 54, 54, 54, 73, 76, 78, 81, 84, 81,
+ 79, 76, 74, 72, 70, 67, 65, 64, 63, 61, 60, 59, 59, 58, 57, 56, 56, 55,
+ 55, 54, 54, 53, 53, 53, 53, 53, 71, 74, 77, 80, 82, 80, 78, 75, 73, 71,
+ 69, 67, 65, 63, 62, 61, 59, 59, 58, 57, 56, 56, 55, 54, 54, 53, 53, 52,
+ 52, 52, 52, 52, 70, 73, 75, 78, 81, 79, 77, 74, 72, 70, 68, 66, 64, 63,
+ 62, 60, 59, 58, 57, 56, 55, 55, 54, 53, 53, 52, 52, 51, 51, 51, 51, 51,
+ 69, 71, 74, 76, 79, 77, 75, 73, 71, 69, 68, 66, 64, 62, 61, 60, 58, 57,
+ 56, 56, 55, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50, 50, 67, 70, 72, 75,
+ 77, 76, 74, 72, 70, 68, 67, 65, 63, 62, 60, 59, 58, 57, 56, 55, 54, 53,
+ 53, 52, 52, 51, 51, 50, 50, 50, 50, 50, 66, 68, 71, 73, 76, 74, 72, 71,
+ 69, 68, 66, 64, 62, 61, 60, 58, 57, 56, 55, 54, 53, 53, 52, 52, 51, 51,
+ 50, 50, 49, 49, 49, 49, 64, 67, 69, 71, 74, 72, 71, 70, 68, 67, 65, 63,
+ 62, 60, 59, 58, 57, 56, 55, 54, 53, 52, 52, 51, 50, 50, 49, 49, 49, 49,
+ 49, 49, 63, 65, 68, 70, 72, 71, 70, 68, 67, 66, 64, 63, 61, 60, 59, 57,
+ 56, 55, 54, 53, 52, 52, 51, 51, 50, 49, 49, 49, 48, 48, 48, 48, 62, 64,
+ 66, 68, 71, 69, 68, 67, 66, 64, 63, 62, 60, 59, 58, 57, 56, 55, 54, 53,
+ 52, 51, 51, 50, 49, 49, 49, 48, 48, 48, 48, 48, 60, 63, 65, 67, 69, 68,
+ 67, 66, 65, 63, 62, 61, 60, 58, 57, 56, 55, 54, 53, 52, 51, 51, 50, 50,
+ 49, 49, 48, 48, 47, 47, 47, 47, 59, 61, 63, 65, 67, 66, 65, 65, 64, 62,
+ 61, 60, 59, 58, 57, 56, 54, 54, 53, 52, 51, 50, 50, 49, 49, 48, 48, 47,
+ 47, 47, 47, 47, 59, 61, 63, 65, 67, 66, 65, 65, 64, 62, 61, 60, 59, 58,
+ 57, 56, 54, 54, 53, 52, 51, 50, 50, 49, 49, 48, 48, 47, 47, 47, 47, 47,
+ 59, 61, 63, 65, 67, 66, 65, 65, 64, 62, 61, 60, 59, 58, 57, 56, 54, 54,
+ 53, 52, 51, 50, 50, 49, 49, 48, 48, 47, 47, 47, 47, 47, 59, 61, 63, 65,
+ 67, 66, 65, 65, 64, 62, 61, 60, 59, 58, 57, 56, 54, 54, 53, 52, 51, 50,
+ 50, 49, 49, 48, 48, 47, 47, 47, 47, 47 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 60, 43, 33, 60, 45, 37, 32, 43, 37, 31, 29, 33, 32, 29, 27,
+ /* Size 8 */
+ 64, 79, 74, 61, 50, 43, 38, 35, 79, 71, 73, 66, 55, 47, 42, 38, 74, 73,
+ 58, 52, 48, 43, 39, 36, 61, 66, 52, 45, 42, 39, 37, 35, 50, 55, 48, 42,
+ 38, 36, 34, 33, 43, 47, 43, 39, 36, 34, 33, 32, 38, 42, 39, 37, 34, 33,
+ 32, 31, 35, 38, 36, 35, 33, 32, 31, 31,
+ /* Size 16 */
+ 64, 71, 79, 77, 74, 68, 61, 56, 50, 47, 43, 41, 38, 37, 35, 35, 71, 73,
+ 75, 74, 74, 69, 63, 58, 53, 49, 45, 43, 40, 38, 37, 37, 79, 75, 71, 72,
+ 73, 70, 66, 60, 55, 51, 47, 45, 42, 40, 38, 38, 77, 74, 72, 69, 66, 62,
+ 59, 55, 51, 48, 45, 43, 40, 39, 37, 37, 74, 74, 73, 66, 58, 55, 52, 50,
+ 48, 45, 43, 41, 39, 38, 36, 36, 68, 69, 70, 62, 55, 52, 49, 47, 45, 43,
+ 41, 39, 38, 37, 36, 36, 61, 63, 66, 59, 52, 49, 45, 43, 42, 40, 39, 38,
+ 37, 36, 35, 35, 56, 58, 60, 55, 50, 47, 43, 42, 40, 39, 37, 36, 35, 35,
+ 34, 34, 50, 53, 55, 51, 48, 45, 42, 40, 38, 37, 36, 35, 34, 34, 33, 33,
+ 47, 49, 51, 48, 45, 43, 40, 39, 37, 36, 35, 34, 34, 33, 33, 33, 43, 45,
+ 47, 45, 43, 41, 39, 37, 36, 35, 34, 33, 33, 32, 32, 32, 41, 43, 45, 43,
+ 41, 39, 38, 36, 35, 34, 33, 33, 32, 32, 32, 32, 38, 40, 42, 40, 39, 38,
+ 37, 35, 34, 34, 33, 32, 32, 31, 31, 31, 37, 38, 40, 39, 38, 37, 36, 35,
+ 34, 33, 32, 32, 31, 31, 31, 31, 35, 37, 38, 37, 36, 36, 35, 34, 33, 33,
+ 32, 32, 31, 31, 31, 31, 35, 37, 38, 37, 36, 36, 35, 34, 33, 33, 32, 32,
+ 31, 31, 31, 31,
+ /* Size 32 */
+ 64, 68, 71, 75, 79, 78, 77, 75, 74, 71, 68, 64, 61, 58, 56, 53, 50, 49,
+ 47, 45, 43, 42, 41, 40, 38, 38, 37, 36, 35, 35, 35, 35, 68, 70, 72, 75,
+ 77, 76, 76, 75, 74, 71, 68, 65, 62, 59, 57, 54, 52, 50, 48, 46, 44, 43,
+ 42, 40, 39, 38, 38, 37, 36, 36, 36, 36, 71, 72, 73, 74, 75, 75, 74, 74,
+ 74, 71, 69, 66, 63, 61, 58, 55, 53, 51, 49, 47, 45, 44, 43, 41, 40, 39,
+ 38, 37, 37, 37, 37, 37, 75, 75, 74, 74, 73, 73, 73, 74, 74, 71, 69, 67,
+ 64, 62, 59, 57, 54, 52, 50, 48, 46, 45, 44, 42, 41, 40, 39, 38, 37, 37,
+ 37, 37, 79, 77, 75, 73, 71, 72, 72, 73, 73, 72, 70, 68, 66, 63, 60, 58,
+ 55, 53, 51, 49, 47, 46, 45, 43, 42, 41, 40, 39, 38, 38, 38, 38, 78, 76,
+ 75, 73, 72, 71, 71, 70, 70, 68, 66, 64, 62, 60, 58, 56, 53, 52, 50, 48,
+ 46, 45, 44, 42, 41, 40, 39, 38, 37, 37, 37, 37, 77, 76, 74, 73, 72, 71,
+ 69, 67, 66, 64, 62, 61, 59, 57, 55, 53, 51, 50, 48, 47, 45, 44, 43, 42,
+ 40, 40, 39, 38, 37, 37, 37, 37, 75, 75, 74, 74, 73, 70, 67, 65, 62, 60,
+ 59, 57, 56, 54, 53, 51, 49, 48, 47, 45, 44, 43, 42, 41, 40, 39, 38, 37,
+ 37, 37, 37, 37, 74, 74, 74, 74, 73, 70, 66, 62, 58, 56, 55, 54, 52, 51,
+ 50, 49, 48, 46, 45, 44, 43, 42, 41, 40, 39, 39, 38, 37, 36, 36, 36, 36,
+ 71, 71, 71, 71, 72, 68, 64, 60, 56, 55, 54, 52, 51, 49, 48, 47, 46, 45,
+ 44, 43, 42, 41, 40, 39, 39, 38, 37, 37, 36, 36, 36, 36, 68, 68, 69, 69,
+ 70, 66, 62, 59, 55, 54, 52, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40,
+ 39, 39, 38, 37, 37, 36, 36, 36, 36, 36, 64, 65, 66, 67, 68, 64, 61, 57,
+ 54, 52, 50, 49, 47, 46, 45, 44, 43, 42, 41, 41, 40, 39, 39, 38, 37, 37,
+ 36, 36, 35, 35, 35, 35, 61, 62, 63, 64, 66, 62, 59, 56, 52, 51, 49, 47,
+ 45, 44, 43, 42, 42, 41, 40, 39, 39, 38, 38, 37, 37, 36, 36, 35, 35, 35,
+ 35, 35, 58, 59, 61, 62, 63, 60, 57, 54, 51, 49, 48, 46, 44, 43, 42, 42,
+ 41, 40, 39, 39, 38, 38, 37, 37, 36, 36, 35, 35, 34, 34, 34, 34, 56, 57,
+ 58, 59, 60, 58, 55, 53, 50, 48, 47, 45, 43, 42, 42, 41, 40, 39, 39, 38,
+ 37, 37, 36, 36, 35, 35, 35, 34, 34, 34, 34, 34, 53, 54, 55, 57, 58, 56,
+ 53, 51, 49, 47, 46, 44, 42, 42, 41, 40, 39, 38, 38, 37, 37, 36, 36, 35,
+ 35, 35, 34, 34, 34, 34, 34, 34, 50, 52, 53, 54, 55, 53, 51, 49, 48, 46,
+ 45, 43, 42, 41, 40, 39, 38, 38, 37, 36, 36, 36, 35, 35, 34, 34, 34, 33,
+ 33, 33, 33, 33, 49, 50, 51, 52, 53, 52, 50, 48, 46, 45, 44, 42, 41, 40,
+ 39, 38, 38, 37, 36, 36, 35, 35, 35, 34, 34, 34, 33, 33, 33, 33, 33, 33,
+ 47, 48, 49, 50, 51, 50, 48, 47, 45, 44, 43, 41, 40, 39, 39, 38, 37, 36,
+ 36, 36, 35, 35, 34, 34, 34, 33, 33, 33, 33, 33, 33, 33, 45, 46, 47, 48,
+ 49, 48, 47, 45, 44, 43, 42, 41, 39, 39, 38, 37, 36, 36, 36, 35, 35, 34,
+ 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 43, 44, 45, 46, 47, 46, 45, 44,
+ 43, 42, 41, 40, 39, 38, 37, 37, 36, 35, 35, 35, 34, 34, 33, 33, 33, 33,
+ 32, 32, 32, 32, 32, 32, 42, 43, 44, 45, 46, 45, 44, 43, 42, 41, 40, 39,
+ 38, 38, 37, 36, 36, 35, 35, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 32,
+ 32, 32, 41, 42, 43, 44, 45, 44, 43, 42, 41, 40, 39, 39, 38, 37, 36, 36,
+ 35, 35, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40,
+ 41, 42, 43, 42, 42, 41, 40, 39, 39, 38, 37, 37, 36, 35, 35, 34, 34, 34,
+ 33, 33, 33, 32, 32, 32, 32, 32, 31, 31, 31, 31, 38, 39, 40, 41, 42, 41,
+ 40, 40, 39, 39, 38, 37, 37, 36, 35, 35, 34, 34, 34, 33, 33, 33, 32, 32,
+ 32, 32, 31, 31, 31, 31, 31, 31, 38, 38, 39, 40, 41, 40, 40, 39, 39, 38,
+ 37, 37, 36, 36, 35, 35, 34, 34, 33, 33, 33, 32, 32, 32, 32, 31, 31, 31,
+ 31, 31, 31, 31, 37, 38, 38, 39, 40, 39, 39, 38, 38, 37, 37, 36, 36, 35,
+ 35, 34, 34, 33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31,
+ 36, 37, 37, 38, 39, 38, 38, 37, 37, 37, 36, 36, 35, 35, 34, 34, 33, 33,
+ 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 35, 36, 37, 37,
+ 38, 37, 37, 37, 36, 36, 36, 35, 35, 34, 34, 34, 33, 33, 33, 32, 32, 32,
+ 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, 35, 36, 37, 37, 38, 37, 37, 37,
+ 36, 36, 36, 35, 35, 34, 34, 34, 33, 33, 33, 32, 32, 32, 32, 31, 31, 31,
+ 31, 31, 31, 31, 31, 31, 35, 36, 37, 37, 38, 37, 37, 37, 36, 36, 36, 35,
+ 35, 34, 34, 34, 33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31,
+ 31, 31, 35, 36, 37, 37, 38, 37, 37, 37, 36, 36, 36, 35, 35, 34, 34, 34,
+ 33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 31, 31 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 118, 110, 77, 59, 110, 82, 67, 57, 77, 67, 56, 51, 59, 57, 51, 47,
+ /* Size 8 */
+ 103, 128, 121, 98, 80, 68, 60, 55, 128, 115, 119, 106, 89, 75, 65, 59,
+ 121, 119, 93, 83, 75, 68, 61, 56, 98, 106, 83, 71, 65, 61, 57, 54, 80,
+ 89, 75, 65, 59, 56, 53, 51, 68, 75, 68, 61, 56, 53, 51, 49, 60, 65, 61,
+ 57, 53, 51, 49, 48, 55, 59, 56, 54, 51, 49, 48, 47,
+ /* Size 16 */
+ 106, 118, 131, 127, 123, 112, 100, 91, 82, 76, 69, 65, 61, 59, 56, 56,
+ 118, 122, 125, 124, 123, 113, 104, 95, 86, 80, 73, 69, 64, 61, 58, 58,
+ 131, 125, 118, 120, 122, 115, 108, 99, 91, 84, 77, 72, 67, 64, 60, 60,
+ 127, 124, 120, 114, 108, 103, 97, 90, 84, 78, 73, 69, 65, 62, 59, 59,
+ 123, 123, 122, 108, 95, 90, 85, 81, 77, 73, 69, 66, 63, 60, 58, 58, 112,
+ 113, 115, 103, 90, 85, 79, 76, 72, 69, 66, 63, 60, 58, 56, 56, 100, 104,
+ 108, 97, 85, 79, 73, 70, 67, 64, 62, 60, 58, 56, 55, 55, 91, 95, 99, 90,
+ 81, 76, 70, 67, 64, 62, 60, 58, 56, 55, 54, 54, 82, 86, 91, 84, 77, 72,
+ 67, 64, 61, 59, 57, 56, 54, 53, 52, 52, 76, 80, 84, 78, 73, 69, 64, 62,
+ 59, 57, 55, 54, 53, 52, 51, 51, 69, 73, 77, 73, 69, 66, 62, 60, 57, 55,
+ 54, 53, 52, 51, 50, 50, 65, 69, 72, 69, 66, 63, 60, 58, 56, 54, 53, 52,
+ 51, 50, 49, 49, 61, 64, 67, 65, 63, 60, 58, 56, 54, 53, 52, 51, 50, 49,
+ 49, 49, 59, 61, 64, 62, 60, 58, 56, 55, 53, 52, 51, 50, 49, 49, 48, 48,
+ 56, 58, 60, 59, 58, 56, 55, 54, 52, 51, 50, 49, 49, 48, 48, 48, 56, 58,
+ 60, 59, 58, 56, 55, 54, 52, 51, 50, 49, 49, 48, 48, 48,
+ /* Size 32 */
+ 107, 113, 120, 126, 133, 131, 129, 127, 125, 119, 113, 107, 102, 97, 92,
+ 88, 83, 80, 77, 73, 70, 68, 66, 64, 62, 61, 59, 58, 57, 57, 57, 57, 113,
+ 117, 121, 125, 129, 128, 127, 126, 125, 119, 114, 109, 104, 99, 94, 90,
+ 85, 82, 79, 75, 72, 70, 68, 66, 64, 62, 61, 59, 58, 58, 58, 58, 120,
+ 121, 123, 125, 126, 126, 125, 125, 124, 119, 115, 110, 105, 101, 96, 92,
+ 87, 84, 81, 77, 74, 72, 69, 67, 65, 63, 62, 60, 59, 59, 59, 59, 126,
+ 125, 125, 124, 123, 123, 123, 124, 124, 120, 116, 112, 107, 103, 98, 94,
+ 90, 86, 83, 79, 76, 73, 71, 69, 66, 65, 63, 61, 60, 60, 60, 60, 133,
+ 129, 126, 123, 119, 120, 121, 122, 123, 120, 116, 113, 109, 105, 101,
+ 96, 92, 88, 85, 81, 78, 75, 73, 70, 68, 66, 64, 63, 61, 61, 61, 61, 131,
+ 128, 126, 123, 120, 119, 118, 118, 117, 113, 110, 107, 104, 100, 96, 92,
+ 88, 85, 82, 79, 76, 74, 71, 69, 67, 65, 64, 62, 60, 60, 60, 60, 129,
+ 127, 125, 123, 121, 118, 116, 113, 110, 107, 104, 101, 98, 95, 91, 88,
+ 85, 82, 79, 77, 74, 72, 70, 68, 66, 64, 63, 61, 60, 60, 60, 60, 127,
+ 126, 125, 124, 122, 118, 113, 108, 103, 100, 98, 95, 92, 89, 87, 84, 81,
+ 79, 77, 74, 72, 70, 68, 66, 65, 63, 62, 60, 59, 59, 59, 59, 125, 125,
+ 124, 124, 123, 117, 110, 103, 96, 94, 91, 89, 86, 84, 82, 80, 78, 76,
+ 74, 72, 70, 68, 67, 65, 63, 62, 61, 60, 58, 58, 58, 58, 119, 119, 119,
+ 120, 120, 113, 107, 100, 94, 91, 88, 86, 83, 81, 79, 77, 75, 74, 72, 70,
+ 68, 67, 65, 64, 62, 61, 60, 59, 58, 58, 58, 58, 113, 114, 115, 116, 116,
+ 110, 104, 98, 91, 88, 86, 83, 80, 78, 76, 75, 73, 71, 70, 68, 66, 65,
+ 64, 62, 61, 60, 59, 58, 57, 57, 57, 57, 107, 109, 110, 112, 113, 107,
+ 101, 95, 89, 86, 83, 80, 77, 75, 74, 72, 70, 69, 67, 66, 65, 63, 62, 61,
+ 60, 59, 58, 57, 56, 56, 56, 56, 102, 104, 105, 107, 109, 104, 98, 92,
+ 86, 83, 80, 77, 74, 72, 71, 69, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58,
+ 57, 56, 55, 55, 55, 55, 97, 99, 101, 103, 105, 100, 95, 89, 84, 81, 78,
+ 75, 72, 71, 69, 68, 66, 65, 64, 63, 61, 61, 60, 59, 58, 57, 56, 56, 55,
+ 55, 55, 55, 92, 94, 96, 98, 101, 96, 91, 87, 82, 79, 76, 74, 71, 69, 68,
+ 66, 64, 63, 62, 61, 60, 59, 59, 58, 57, 56, 56, 55, 54, 54, 54, 54, 88,
+ 90, 92, 94, 96, 92, 88, 84, 80, 77, 75, 72, 69, 68, 66, 64, 63, 62, 61,
+ 60, 59, 58, 57, 57, 56, 55, 55, 54, 53, 53, 53, 53, 83, 85, 87, 90, 92,
+ 88, 85, 81, 78, 75, 73, 70, 67, 66, 64, 63, 61, 60, 60, 59, 58, 57, 56,
+ 56, 55, 54, 54, 53, 53, 53, 53, 53, 80, 82, 84, 86, 88, 85, 82, 79, 76,
+ 74, 71, 69, 66, 65, 63, 62, 60, 60, 59, 58, 57, 56, 56, 55, 54, 54, 53,
+ 53, 52, 52, 52, 52, 77, 79, 81, 83, 85, 82, 79, 77, 74, 72, 70, 67, 65,
+ 64, 62, 61, 60, 59, 58, 57, 56, 55, 55, 54, 54, 53, 53, 52, 52, 52, 52,
+ 52, 73, 75, 77, 79, 81, 79, 77, 74, 72, 70, 68, 66, 64, 63, 61, 60, 59,
+ 58, 57, 56, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51, 51, 51, 70, 72, 74,
+ 76, 78, 76, 74, 72, 70, 68, 66, 65, 63, 61, 60, 59, 58, 57, 56, 55, 54,
+ 54, 53, 53, 52, 52, 52, 51, 51, 51, 51, 51, 68, 70, 72, 73, 75, 74, 72,
+ 70, 68, 67, 65, 63, 62, 61, 59, 58, 57, 56, 55, 55, 54, 53, 53, 52, 52,
+ 52, 51, 51, 50, 50, 50, 50, 66, 68, 69, 71, 73, 71, 70, 68, 67, 65, 64,
+ 62, 61, 60, 59, 57, 56, 56, 55, 54, 53, 53, 52, 52, 51, 51, 51, 50, 50,
+ 50, 50, 50, 64, 66, 67, 69, 70, 69, 68, 66, 65, 64, 62, 61, 60, 59, 58,
+ 57, 56, 55, 54, 54, 53, 52, 52, 51, 51, 51, 50, 50, 50, 50, 50, 50, 62,
+ 64, 65, 66, 68, 67, 66, 65, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 54,
+ 53, 52, 52, 51, 51, 51, 50, 50, 50, 49, 49, 49, 49, 61, 62, 63, 65, 66,
+ 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 54, 53, 53, 52, 52, 51,
+ 51, 50, 50, 50, 49, 49, 49, 49, 49, 59, 61, 62, 63, 64, 64, 63, 62, 61,
+ 60, 59, 58, 57, 56, 56, 55, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50, 49,
+ 49, 49, 49, 49, 49, 58, 59, 60, 61, 63, 62, 61, 60, 60, 59, 58, 57, 56,
+ 56, 55, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50, 49, 49, 49, 49, 49, 49,
+ 49, 57, 58, 59, 60, 61, 60, 60, 59, 58, 58, 57, 56, 55, 55, 54, 53, 53,
+ 52, 52, 51, 51, 50, 50, 50, 49, 49, 49, 49, 48, 48, 48, 48, 57, 58, 59,
+ 60, 61, 60, 60, 59, 58, 58, 57, 56, 55, 55, 54, 53, 53, 52, 52, 51, 51,
+ 50, 50, 50, 49, 49, 49, 49, 48, 48, 48, 48, 57, 58, 59, 60, 61, 60, 60,
+ 59, 58, 58, 57, 56, 55, 55, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50, 49,
+ 49, 49, 49, 48, 48, 48, 48, 57, 58, 59, 60, 61, 60, 60, 59, 58, 58, 57,
+ 56, 55, 55, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50, 49, 49, 49, 49, 48,
+ 48, 48, 48 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 46, 44, 38, 46, 41, 40, 37, 44, 40, 35, 32, 38, 37, 32, 30,
+ /* Size 8 */
+ 64, 75, 49, 47, 45, 42, 40, 37, 75, 57, 49, 52, 51, 48, 44, 41, 49, 49,
+ 44, 45, 45, 44, 42, 39, 47, 52, 45, 42, 41, 40, 38, 37, 45, 51, 45, 41,
+ 38, 37, 36, 35, 42, 48, 44, 40, 37, 35, 34, 33, 40, 44, 42, 38, 36, 34,
+ 33, 32, 37, 41, 39, 37, 35, 33, 32, 31,
+ /* Size 16 */
+ 64, 70, 75, 62, 49, 48, 47, 46, 45, 44, 42, 41, 40, 38, 37, 37, 70, 68,
+ 66, 58, 49, 49, 50, 49, 48, 47, 45, 44, 42, 40, 39, 39, 75, 66, 57, 53,
+ 49, 50, 52, 51, 51, 49, 48, 46, 44, 43, 41, 41, 62, 58, 53, 50, 46, 47,
+ 48, 48, 48, 47, 46, 44, 43, 42, 40, 40, 49, 49, 49, 46, 44, 44, 45, 45,
+ 45, 44, 44, 43, 42, 40, 39, 39, 48, 49, 50, 47, 44, 44, 43, 43, 43, 42,
+ 42, 41, 40, 39, 38, 38, 47, 50, 52, 48, 45, 43, 42, 41, 41, 40, 40, 39,
+ 38, 38, 37, 37, 46, 49, 51, 48, 45, 43, 41, 40, 39, 39, 38, 38, 37, 36,
+ 36, 36, 45, 48, 51, 48, 45, 43, 41, 39, 38, 37, 37, 36, 36, 35, 35, 35,
+ 44, 47, 49, 47, 44, 42, 40, 39, 37, 37, 36, 35, 35, 34, 34, 34, 42, 45,
+ 48, 46, 44, 42, 40, 38, 37, 36, 35, 34, 34, 33, 33, 33, 41, 44, 46, 44,
+ 43, 41, 39, 38, 36, 35, 34, 34, 33, 33, 32, 32, 40, 42, 44, 43, 42, 40,
+ 38, 37, 36, 35, 34, 33, 33, 32, 32, 32, 38, 40, 43, 42, 40, 39, 38, 36,
+ 35, 34, 33, 33, 32, 32, 31, 31, 37, 39, 41, 40, 39, 38, 37, 36, 35, 34,
+ 33, 32, 32, 31, 31, 31, 37, 39, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32,
+ 32, 31, 31, 31,
+ /* Size 32 */
+ 64, 67, 70, 72, 75, 69, 62, 56, 49, 49, 48, 48, 47, 47, 46, 46, 45, 45,
+ 44, 43, 42, 42, 41, 40, 40, 39, 38, 38, 37, 37, 37, 37, 67, 68, 69, 70,
+ 71, 65, 60, 54, 49, 49, 49, 49, 48, 48, 48, 47, 47, 46, 45, 45, 44, 43,
+ 42, 41, 41, 40, 39, 39, 38, 38, 38, 38, 70, 69, 68, 67, 66, 62, 58, 53,
+ 49, 49, 49, 49, 50, 49, 49, 48, 48, 47, 47, 46, 45, 44, 44, 43, 42, 41,
+ 40, 40, 39, 39, 39, 39, 72, 70, 67, 64, 62, 59, 55, 52, 49, 49, 50, 50,
+ 51, 50, 50, 50, 49, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40, 40,
+ 40, 40, 75, 71, 66, 62, 57, 55, 53, 51, 49, 49, 50, 51, 52, 52, 51, 51,
+ 51, 50, 49, 49, 48, 47, 46, 45, 44, 44, 43, 42, 41, 41, 41, 41, 69, 65,
+ 62, 59, 55, 53, 51, 49, 47, 48, 49, 49, 50, 50, 50, 50, 49, 49, 48, 47,
+ 47, 46, 45, 44, 44, 43, 42, 41, 41, 41, 41, 41, 62, 60, 58, 55, 53, 51,
+ 50, 48, 46, 47, 47, 48, 48, 48, 48, 48, 48, 47, 47, 46, 46, 45, 44, 44,
+ 43, 42, 42, 41, 40, 40, 40, 40, 56, 54, 53, 52, 51, 49, 48, 47, 45, 46,
+ 46, 46, 47, 47, 47, 47, 46, 46, 46, 45, 45, 44, 43, 43, 42, 42, 41, 40,
+ 40, 40, 40, 40, 49, 49, 49, 49, 49, 47, 46, 45, 44, 44, 44, 45, 45, 45,
+ 45, 45, 45, 45, 44, 44, 44, 43, 43, 42, 42, 41, 40, 40, 39, 39, 39, 39,
+ 49, 49, 49, 49, 49, 48, 47, 46, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+ 43, 43, 43, 42, 42, 41, 41, 40, 40, 39, 39, 39, 39, 39, 48, 49, 49, 50,
+ 50, 49, 47, 46, 44, 44, 44, 44, 43, 43, 43, 43, 43, 43, 42, 42, 42, 41,
+ 41, 40, 40, 39, 39, 38, 38, 38, 38, 38, 48, 49, 49, 50, 51, 49, 48, 46,
+ 45, 44, 44, 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 40, 40, 39, 39, 39,
+ 38, 38, 37, 37, 37, 37, 47, 48, 50, 51, 52, 50, 48, 47, 45, 44, 43, 43,
+ 42, 41, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 38, 38, 38, 37, 37, 37,
+ 37, 37, 47, 48, 49, 50, 52, 50, 48, 47, 45, 44, 43, 42, 41, 41, 41, 40,
+ 40, 40, 39, 39, 39, 39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 36, 46, 48,
+ 49, 50, 51, 50, 48, 47, 45, 44, 43, 42, 41, 41, 40, 40, 39, 39, 39, 38,
+ 38, 38, 38, 37, 37, 37, 36, 36, 36, 36, 36, 36, 46, 47, 48, 50, 51, 50,
+ 48, 47, 45, 44, 43, 42, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 37, 37,
+ 36, 36, 36, 35, 35, 35, 35, 35, 45, 47, 48, 49, 51, 49, 48, 46, 45, 44,
+ 43, 42, 41, 40, 39, 39, 38, 38, 37, 37, 37, 37, 36, 36, 36, 35, 35, 35,
+ 35, 35, 35, 35, 45, 46, 47, 49, 50, 49, 47, 46, 45, 44, 43, 41, 40, 40,
+ 39, 38, 38, 37, 37, 37, 36, 36, 36, 36, 35, 35, 35, 34, 34, 34, 34, 34,
+ 44, 45, 47, 48, 49, 48, 47, 46, 44, 43, 42, 41, 40, 39, 39, 38, 37, 37,
+ 37, 36, 36, 36, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34, 43, 45, 46, 47,
+ 49, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 38, 37, 37, 36, 36, 35, 35,
+ 35, 35, 34, 34, 34, 34, 33, 33, 33, 33, 42, 44, 45, 46, 48, 47, 46, 45,
+ 44, 43, 42, 41, 40, 39, 38, 37, 37, 36, 36, 35, 35, 35, 34, 34, 34, 34,
+ 33, 33, 33, 33, 33, 33, 42, 43, 44, 46, 47, 46, 45, 44, 43, 42, 41, 40,
+ 39, 39, 38, 37, 37, 36, 36, 35, 35, 34, 34, 34, 34, 33, 33, 33, 33, 33,
+ 33, 33, 41, 42, 44, 45, 46, 45, 44, 43, 43, 42, 41, 40, 39, 38, 38, 37,
+ 36, 36, 35, 35, 34, 34, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 40, 41,
+ 43, 44, 45, 44, 44, 43, 42, 41, 40, 39, 39, 38, 37, 37, 36, 36, 35, 35,
+ 34, 34, 34, 33, 33, 33, 32, 32, 32, 32, 32, 32, 40, 41, 42, 43, 44, 44,
+ 43, 42, 42, 41, 40, 39, 38, 38, 37, 36, 36, 35, 35, 34, 34, 34, 33, 33,
+ 33, 32, 32, 32, 32, 32, 32, 32, 39, 40, 41, 42, 44, 43, 42, 42, 41, 40,
+ 39, 39, 38, 37, 37, 36, 35, 35, 35, 34, 34, 33, 33, 33, 32, 32, 32, 32,
+ 31, 31, 31, 31, 38, 39, 40, 42, 43, 42, 42, 41, 40, 40, 39, 38, 38, 37,
+ 36, 36, 35, 35, 34, 34, 33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 31,
+ 38, 39, 40, 41, 42, 41, 41, 40, 40, 39, 38, 38, 37, 37, 36, 35, 35, 34,
+ 34, 34, 33, 33, 33, 32, 32, 32, 31, 31, 31, 31, 31, 31, 37, 38, 39, 40,
+ 41, 41, 40, 40, 39, 39, 38, 37, 37, 36, 36, 35, 35, 34, 34, 33, 33, 33,
+ 32, 32, 32, 31, 31, 31, 31, 31, 31, 31, 37, 38, 39, 40, 41, 41, 40, 40,
+ 39, 39, 38, 37, 37, 36, 36, 35, 35, 34, 34, 33, 33, 33, 32, 32, 32, 31,
+ 31, 31, 31, 31, 31, 31, 37, 38, 39, 40, 41, 41, 40, 40, 39, 39, 38, 37,
+ 37, 36, 36, 35, 35, 34, 34, 33, 33, 33, 32, 32, 32, 31, 31, 31, 31, 31,
+ 31, 31, 37, 38, 39, 40, 41, 41, 40, 40, 39, 39, 38, 37, 37, 36, 36, 35,
+ 35, 34, 34, 33, 33, 33, 32, 32, 32, 31, 31, 31, 31, 31, 31, 31 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 110, 78, 74, 64, 78, 69, 66, 61, 74, 66, 57, 53, 64, 61, 53, 49,
+ /* Size 8 */
+ 103, 122, 78, 75, 72, 67, 62, 58, 122, 92, 77, 83, 81, 76, 70, 65, 78,
+ 77, 70, 71, 71, 69, 66, 62, 75, 83, 71, 66, 64, 62, 60, 58, 72, 81, 71,
+ 64, 60, 58, 56, 54, 67, 76, 69, 62, 58, 55, 53, 51, 62, 70, 66, 60, 56,
+ 53, 50, 49, 58, 65, 62, 58, 54, 51, 49, 47,
+ /* Size 16 */
+ 105, 115, 124, 102, 80, 78, 77, 75, 73, 71, 68, 66, 63, 61, 59, 59, 115,
+ 112, 109, 94, 79, 80, 80, 79, 78, 75, 73, 70, 67, 65, 62, 62, 124, 109,
+ 94, 86, 79, 82, 84, 83, 83, 80, 77, 74, 72, 69, 66, 66, 102, 94, 86, 81,
+ 75, 77, 78, 78, 78, 76, 74, 71, 69, 67, 64, 64, 80, 79, 79, 75, 71, 72,
+ 73, 73, 73, 71, 70, 68, 67, 65, 63, 63, 78, 80, 82, 77, 72, 71, 70, 69,
+ 69, 68, 67, 65, 64, 62, 61, 61, 77, 80, 84, 78, 73, 70, 67, 66, 65, 64,
+ 63, 62, 61, 60, 59, 59, 75, 79, 83, 78, 73, 69, 66, 64, 63, 62, 61, 60,
+ 59, 58, 57, 57, 73, 78, 83, 78, 73, 69, 65, 63, 61, 60, 59, 58, 57, 56,
+ 55, 55, 71, 75, 80, 76, 71, 68, 64, 62, 60, 58, 57, 56, 55, 54, 53, 53,
+ 68, 73, 77, 74, 70, 67, 63, 61, 59, 57, 55, 54, 53, 53, 52, 52, 66, 70,
+ 74, 71, 68, 65, 62, 60, 58, 56, 54, 53, 52, 52, 51, 51, 63, 67, 72, 69,
+ 67, 64, 61, 59, 57, 55, 53, 52, 51, 51, 50, 50, 61, 65, 69, 67, 65, 62,
+ 60, 58, 56, 54, 53, 52, 51, 50, 49, 49, 59, 62, 66, 64, 63, 61, 59, 57,
+ 55, 53, 52, 51, 50, 49, 48, 48, 59, 62, 66, 64, 63, 61, 59, 57, 55, 53,
+ 52, 51, 50, 49, 48, 48,
+ /* Size 32 */
+ 106, 111, 116, 121, 126, 114, 103, 92, 80, 80, 79, 78, 77, 76, 76, 75,
+ 74, 73, 71, 70, 69, 68, 66, 65, 64, 63, 62, 60, 59, 59, 59, 59, 111,
+ 113, 114, 116, 118, 108, 99, 90, 80, 80, 80, 79, 79, 79, 78, 77, 76, 75,
+ 74, 72, 71, 70, 69, 67, 66, 65, 64, 62, 61, 61, 61, 61, 116, 114, 113,
+ 112, 110, 103, 95, 87, 80, 80, 81, 81, 81, 81, 80, 79, 79, 77, 76, 75,
+ 73, 72, 71, 69, 68, 67, 65, 64, 63, 63, 63, 63, 121, 116, 112, 107, 102,
+ 97, 91, 85, 80, 81, 81, 82, 83, 83, 82, 82, 81, 80, 78, 77, 76, 74, 73,
+ 71, 70, 69, 67, 66, 65, 65, 65, 65, 126, 118, 110, 102, 95, 91, 87, 83,
+ 79, 81, 82, 84, 85, 85, 84, 84, 83, 82, 81, 79, 78, 77, 75, 74, 72, 71,
+ 69, 68, 67, 67, 67, 67, 114, 108, 103, 97, 91, 87, 84, 81, 77, 79, 80,
+ 81, 82, 82, 81, 81, 81, 80, 79, 77, 76, 75, 74, 72, 71, 70, 68, 67, 66,
+ 66, 66, 66, 103, 99, 95, 91, 87, 84, 81, 78, 75, 76, 77, 78, 79, 79, 79,
+ 79, 78, 77, 76, 75, 75, 73, 72, 71, 70, 68, 67, 66, 65, 65, 65, 65, 92,
+ 90, 87, 85, 83, 81, 78, 76, 74, 74, 75, 75, 76, 76, 76, 76, 76, 75, 74,
+ 73, 73, 72, 71, 70, 68, 67, 66, 65, 64, 64, 64, 64, 80, 80, 80, 80, 79,
+ 77, 75, 74, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, 72, 72, 71, 70, 69,
+ 68, 67, 66, 65, 64, 63, 63, 63, 63, 80, 80, 80, 81, 81, 79, 76, 74, 72,
+ 72, 72, 72, 72, 72, 72, 71, 71, 71, 70, 70, 69, 68, 68, 67, 66, 65, 64,
+ 63, 62, 62, 62, 62, 79, 80, 81, 81, 82, 80, 77, 75, 72, 72, 71, 71, 70,
+ 70, 70, 70, 69, 69, 68, 68, 67, 67, 66, 65, 64, 64, 63, 62, 61, 61, 61,
+ 61, 78, 79, 81, 82, 84, 81, 78, 75, 73, 72, 71, 70, 69, 69, 68, 68, 68,
+ 67, 67, 66, 66, 65, 64, 64, 63, 62, 62, 61, 60, 60, 60, 60, 77, 79, 81,
+ 83, 85, 82, 79, 76, 73, 72, 70, 69, 68, 67, 67, 66, 66, 65, 65, 64, 64,
+ 63, 63, 62, 62, 61, 60, 60, 59, 59, 59, 59, 76, 79, 81, 83, 85, 82, 79,
+ 76, 73, 72, 70, 69, 67, 66, 66, 65, 65, 64, 64, 63, 63, 62, 62, 61, 61,
+ 60, 59, 59, 58, 58, 58, 58, 76, 78, 80, 82, 84, 81, 79, 76, 73, 72, 70,
+ 68, 67, 66, 65, 64, 64, 63, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58, 57,
+ 57, 57, 57, 75, 77, 79, 82, 84, 81, 79, 76, 73, 71, 70, 68, 66, 65, 64,
+ 63, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58, 57, 57, 56, 56, 56, 56, 74,
+ 76, 79, 81, 83, 81, 78, 76, 73, 71, 69, 68, 66, 65, 64, 62, 61, 61, 60,
+ 60, 59, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 55, 73, 75, 77, 80, 82,
+ 80, 77, 75, 73, 71, 69, 67, 65, 64, 63, 62, 61, 60, 60, 59, 58, 58, 57,
+ 57, 56, 56, 55, 55, 55, 55, 55, 55, 71, 74, 76, 78, 81, 79, 76, 74, 72,
+ 70, 68, 67, 65, 64, 62, 61, 60, 60, 59, 58, 58, 57, 57, 56, 56, 55, 55,
+ 54, 54, 54, 54, 54, 70, 72, 75, 77, 79, 77, 75, 73, 72, 70, 68, 66, 64,
+ 63, 62, 61, 60, 59, 58, 57, 57, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53,
+ 53, 69, 71, 73, 76, 78, 76, 75, 73, 71, 69, 67, 66, 64, 63, 61, 60, 59,
+ 58, 58, 57, 56, 55, 55, 54, 54, 54, 53, 53, 52, 52, 52, 52, 68, 70, 72,
+ 74, 77, 75, 73, 72, 70, 68, 67, 65, 63, 62, 61, 60, 59, 58, 57, 56, 55,
+ 55, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 66, 69, 71, 73, 75, 74, 72,
+ 71, 69, 68, 66, 64, 63, 62, 60, 59, 58, 57, 57, 56, 55, 54, 54, 53, 53,
+ 52, 52, 52, 51, 51, 51, 51, 65, 67, 69, 71, 74, 72, 71, 70, 68, 67, 65,
+ 64, 62, 61, 60, 59, 58, 57, 56, 55, 54, 54, 53, 53, 52, 52, 52, 51, 51,
+ 51, 51, 51, 64, 66, 68, 70, 72, 71, 70, 68, 67, 66, 64, 63, 62, 61, 59,
+ 58, 57, 56, 56, 55, 54, 53, 53, 52, 52, 51, 51, 51, 50, 50, 50, 50, 63,
+ 65, 67, 69, 71, 70, 68, 67, 66, 65, 64, 62, 61, 60, 59, 58, 57, 56, 55,
+ 54, 54, 53, 52, 52, 51, 51, 51, 50, 50, 50, 50, 50, 62, 64, 65, 67, 69,
+ 68, 67, 66, 65, 64, 63, 62, 60, 59, 58, 57, 56, 55, 55, 54, 53, 53, 52,
+ 52, 51, 51, 50, 50, 49, 49, 49, 49, 60, 62, 64, 66, 68, 67, 66, 65, 64,
+ 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 54, 53, 52, 52, 51, 51, 50, 50,
+ 49, 49, 49, 49, 49, 59, 61, 63, 65, 67, 66, 65, 64, 63, 62, 61, 60, 59,
+ 58, 57, 56, 55, 55, 54, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 49, 49,
+ 49, 59, 61, 63, 65, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55,
+ 55, 54, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 49, 49, 49, 59, 61, 63,
+ 65, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 55, 54, 53, 52,
+ 52, 51, 51, 50, 50, 49, 49, 49, 49, 49, 49, 59, 61, 63, 65, 67, 66, 65,
+ 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 55, 54, 53, 52, 52, 51, 51, 50,
+ 50, 49, 49, 49, 49, 49, 49 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 60, 45, 36, 60, 47, 40, 35, 45, 40, 34, 32, 36, 35, 32, 31,
+ /* Size 8 */
+ 64, 77, 73, 61, 52, 45, 41, 38, 77, 70, 73, 65, 56, 49, 44, 40, 73, 73,
+ 58, 53, 49, 45, 42, 39, 61, 65, 53, 47, 44, 41, 39, 38, 52, 56, 49, 44,
+ 41, 39, 37, 36, 45, 49, 45, 41, 39, 37, 36, 35, 41, 44, 42, 39, 37, 36,
+ 35, 34, 38, 40, 39, 38, 36, 35, 34, 34,
+ /* Size 16 */
+ 64, 71, 77, 75, 73, 67, 61, 56, 52, 48, 45, 43, 41, 40, 38, 38, 71, 72,
+ 74, 73, 73, 68, 63, 59, 54, 51, 47, 45, 42, 41, 39, 39, 77, 74, 70, 71,
+ 73, 69, 65, 61, 56, 53, 49, 46, 44, 42, 40, 40, 75, 73, 71, 69, 66, 62,
+ 59, 56, 53, 50, 47, 45, 43, 41, 40, 40, 73, 73, 73, 66, 58, 56, 53, 51,
+ 49, 47, 45, 43, 42, 40, 39, 39, 67, 68, 69, 62, 56, 53, 50, 48, 46, 45,
+ 43, 42, 41, 39, 38, 38, 61, 63, 65, 59, 53, 50, 47, 45, 44, 43, 41, 40,
+ 39, 38, 38, 38, 56, 59, 61, 56, 51, 48, 45, 44, 42, 41, 40, 39, 38, 38,
+ 37, 37, 52, 54, 56, 53, 49, 46, 44, 42, 41, 40, 39, 38, 37, 37, 36, 36,
+ 48, 51, 53, 50, 47, 45, 43, 41, 40, 39, 38, 37, 37, 36, 36, 36, 45, 47,
+ 49, 47, 45, 43, 41, 40, 39, 38, 37, 37, 36, 36, 35, 35, 43, 45, 46, 45,
+ 43, 42, 40, 39, 38, 37, 37, 36, 36, 35, 35, 35, 41, 42, 44, 43, 42, 41,
+ 39, 38, 37, 37, 36, 36, 35, 35, 34, 34, 40, 41, 42, 41, 40, 39, 38, 38,
+ 37, 36, 36, 35, 35, 34, 34, 34, 38, 39, 40, 40, 39, 38, 38, 37, 36, 36,
+ 35, 35, 34, 34, 34, 34, 38, 39, 40, 40, 39, 38, 38, 37, 36, 36, 35, 35,
+ 34, 34, 34, 34,
+ /* Size 32 */
+ 64, 67, 71, 74, 77, 76, 75, 74, 73, 70, 67, 64, 61, 59, 56, 54, 52, 50,
+ 48, 47, 45, 44, 43, 42, 41, 40, 40, 39, 38, 38, 38, 38, 67, 69, 71, 74,
+ 76, 75, 74, 74, 73, 70, 68, 65, 62, 60, 58, 55, 53, 51, 50, 48, 46, 45,
+ 44, 43, 42, 41, 40, 40, 39, 39, 39, 39, 71, 71, 72, 73, 74, 74, 73, 73,
+ 73, 71, 68, 66, 63, 61, 59, 56, 54, 52, 51, 49, 47, 46, 45, 44, 42, 42,
+ 41, 40, 39, 39, 39, 39, 74, 74, 73, 73, 72, 72, 72, 73, 73, 71, 69, 66,
+ 64, 62, 60, 57, 55, 53, 52, 50, 48, 47, 46, 44, 43, 42, 42, 41, 40, 40,
+ 40, 40, 77, 76, 74, 72, 70, 71, 71, 72, 73, 71, 69, 67, 65, 63, 61, 59,
+ 56, 54, 53, 51, 49, 48, 46, 45, 44, 43, 42, 41, 40, 40, 40, 40, 76, 75,
+ 74, 72, 71, 70, 70, 70, 69, 67, 66, 64, 62, 60, 58, 56, 54, 53, 51, 50,
+ 48, 47, 46, 45, 43, 43, 42, 41, 40, 40, 40, 40, 75, 74, 73, 72, 71, 70,
+ 69, 67, 66, 64, 62, 61, 59, 58, 56, 54, 53, 51, 50, 48, 47, 46, 45, 44,
+ 43, 42, 41, 41, 40, 40, 40, 40, 74, 74, 73, 73, 72, 70, 67, 65, 62, 61,
+ 59, 58, 56, 55, 54, 52, 51, 50, 49, 47, 46, 45, 44, 43, 42, 42, 41, 40,
+ 39, 39, 39, 39, 73, 73, 73, 73, 73, 69, 66, 62, 58, 57, 56, 55, 53, 52,
+ 51, 50, 49, 48, 47, 46, 45, 44, 43, 43, 42, 41, 40, 40, 39, 39, 39, 39,
+ 70, 70, 71, 71, 71, 67, 64, 61, 57, 56, 55, 53, 52, 51, 50, 49, 48, 47,
+ 46, 45, 44, 43, 43, 42, 41, 41, 40, 39, 39, 39, 39, 39, 67, 68, 68, 69,
+ 69, 66, 62, 59, 56, 55, 53, 52, 50, 49, 48, 47, 46, 46, 45, 44, 43, 43,
+ 42, 41, 41, 40, 39, 39, 38, 38, 38, 38, 64, 65, 66, 66, 67, 64, 61, 58,
+ 55, 53, 52, 50, 49, 48, 47, 46, 45, 44, 44, 43, 42, 42, 41, 41, 40, 39,
+ 39, 38, 38, 38, 38, 38, 61, 62, 63, 64, 65, 62, 59, 56, 53, 52, 50, 49,
+ 47, 46, 45, 45, 44, 43, 43, 42, 41, 41, 40, 40, 39, 39, 38, 38, 38, 38,
+ 38, 38, 59, 60, 61, 62, 63, 60, 58, 55, 52, 51, 49, 48, 46, 45, 45, 44,
+ 43, 42, 42, 41, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 37, 37, 56, 58,
+ 59, 60, 61, 58, 56, 54, 51, 50, 48, 47, 45, 45, 44, 43, 42, 42, 41, 41,
+ 40, 40, 39, 39, 38, 38, 38, 37, 37, 37, 37, 37, 54, 55, 56, 57, 59, 56,
+ 54, 52, 50, 49, 47, 46, 45, 44, 43, 42, 41, 41, 40, 40, 39, 39, 39, 38,
+ 38, 38, 37, 37, 37, 37, 37, 37, 52, 53, 54, 55, 56, 54, 53, 51, 49, 48,
+ 46, 45, 44, 43, 42, 41, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 37, 37,
+ 36, 36, 36, 36, 50, 51, 52, 53, 54, 53, 51, 50, 48, 47, 46, 44, 43, 42,
+ 42, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 37, 37, 36, 36, 36, 36, 36,
+ 48, 50, 51, 52, 53, 51, 50, 49, 47, 46, 45, 44, 43, 42, 41, 40, 40, 39,
+ 39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 36, 36, 36, 36, 47, 48, 49, 50,
+ 51, 50, 48, 47, 46, 45, 44, 43, 42, 41, 41, 40, 39, 39, 38, 38, 38, 37,
+ 37, 37, 36, 36, 36, 36, 35, 35, 35, 35, 45, 46, 47, 48, 49, 48, 47, 46,
+ 45, 44, 43, 42, 41, 41, 40, 39, 39, 38, 38, 38, 37, 37, 37, 36, 36, 36,
+ 36, 35, 35, 35, 35, 35, 44, 45, 46, 47, 48, 47, 46, 45, 44, 43, 43, 42,
+ 41, 40, 40, 39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 36, 35, 35, 35, 35,
+ 35, 35, 43, 44, 45, 46, 46, 46, 45, 44, 43, 43, 42, 41, 40, 40, 39, 39,
+ 38, 38, 37, 37, 37, 36, 36, 36, 36, 35, 35, 35, 35, 35, 35, 35, 42, 43,
+ 44, 44, 45, 45, 44, 43, 43, 42, 41, 41, 40, 39, 39, 38, 38, 37, 37, 37,
+ 36, 36, 36, 36, 35, 35, 35, 35, 35, 35, 35, 35, 41, 42, 42, 43, 44, 43,
+ 43, 42, 42, 41, 41, 40, 39, 39, 38, 38, 37, 37, 37, 36, 36, 36, 36, 35,
+ 35, 35, 35, 35, 34, 34, 34, 34, 40, 41, 42, 42, 43, 43, 42, 42, 41, 41,
+ 40, 39, 39, 38, 38, 38, 37, 37, 36, 36, 36, 36, 35, 35, 35, 35, 35, 34,
+ 34, 34, 34, 34, 40, 40, 41, 42, 42, 42, 41, 41, 40, 40, 39, 39, 38, 38,
+ 38, 37, 37, 37, 36, 36, 36, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34,
+ 39, 40, 40, 41, 41, 41, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 37, 36,
+ 36, 36, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34, 34, 38, 39, 39, 40,
+ 40, 40, 40, 39, 39, 39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 35, 35, 35,
+ 35, 35, 34, 34, 34, 34, 34, 34, 34, 34, 38, 39, 39, 40, 40, 40, 40, 39,
+ 39, 39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 35, 35, 35, 35, 35, 34, 34,
+ 34, 34, 34, 34, 34, 34, 38, 39, 39, 40, 40, 40, 40, 39, 39, 39, 38, 38,
+ 38, 37, 37, 37, 36, 36, 36, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34,
+ 34, 34, 38, 39, 39, 40, 40, 40, 40, 39, 39, 39, 38, 38, 38, 37, 37, 37,
+ 36, 36, 36, 35, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34, 34, 34 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 109, 102, 75, 59, 102, 79, 66, 57, 75, 66, 56, 52, 59, 57, 52, 49,
+ /* Size 8 */
+ 96, 117, 111, 92, 77, 67, 60, 55, 117, 106, 110, 98, 84, 73, 64, 59,
+ 111, 110, 87, 80, 73, 66, 61, 57, 92, 98, 80, 69, 64, 60, 57, 55, 77,
+ 84, 73, 64, 59, 56, 54, 52, 67, 73, 66, 60, 56, 54, 52, 51, 60, 64, 61,
+ 57, 54, 52, 51, 49, 55, 59, 57, 55, 52, 51, 49, 49,
+ /* Size 16 */
+ 98, 109, 120, 116, 113, 103, 94, 86, 78, 73, 68, 65, 61, 59, 57, 57,
+ 109, 111, 114, 113, 112, 105, 97, 89, 82, 76, 71, 67, 63, 61, 58, 58,
+ 120, 114, 108, 110, 112, 106, 100, 93, 86, 80, 74, 70, 66, 63, 60, 60,
+ 116, 113, 110, 105, 101, 96, 91, 85, 80, 75, 71, 67, 64, 62, 59, 59,
+ 113, 112, 112, 101, 89, 85, 81, 78, 74, 71, 68, 65, 62, 60, 58, 58, 103,
+ 105, 106, 96, 85, 81, 76, 73, 70, 67, 65, 62, 60, 59, 57, 57, 94, 97,
+ 100, 91, 81, 76, 71, 68, 66, 64, 62, 60, 58, 57, 56, 56, 86, 89, 93, 85,
+ 78, 73, 68, 66, 63, 61, 60, 58, 57, 56, 55, 55, 78, 82, 86, 80, 74, 70,
+ 66, 63, 61, 59, 57, 56, 55, 54, 53, 53, 73, 76, 80, 75, 71, 67, 64, 61,
+ 59, 58, 56, 55, 54, 53, 53, 53, 68, 71, 74, 71, 68, 65, 62, 60, 57, 56,
+ 55, 54, 53, 52, 52, 52, 65, 67, 70, 67, 65, 62, 60, 58, 56, 55, 54, 53,
+ 52, 52, 51, 51, 61, 63, 66, 64, 62, 60, 58, 57, 55, 54, 53, 52, 52, 51,
+ 51, 51, 59, 61, 63, 62, 60, 59, 57, 56, 54, 53, 52, 52, 51, 51, 50, 50,
+ 57, 58, 60, 59, 58, 57, 56, 55, 53, 53, 52, 51, 51, 50, 50, 50, 57, 58,
+ 60, 59, 58, 57, 56, 55, 53, 53, 52, 51, 51, 50, 50, 50,
+ /* Size 32 */
+ 99, 105, 110, 115, 121, 119, 118, 116, 114, 109, 104, 100, 95, 91, 87,
+ 83, 79, 76, 74, 71, 69, 67, 65, 63, 62, 61, 59, 58, 57, 57, 57, 57, 105,
+ 108, 111, 115, 118, 117, 116, 115, 114, 110, 105, 101, 96, 93, 89, 85,
+ 81, 78, 76, 73, 70, 68, 67, 65, 63, 62, 61, 59, 58, 58, 58, 58, 110,
+ 111, 113, 114, 115, 115, 114, 114, 114, 110, 106, 102, 98, 94, 90, 87,
+ 83, 80, 77, 74, 72, 70, 68, 66, 64, 63, 62, 60, 59, 59, 59, 59, 115,
+ 115, 114, 113, 112, 113, 113, 113, 113, 110, 106, 103, 100, 96, 92, 88,
+ 85, 82, 79, 76, 73, 71, 69, 67, 65, 64, 63, 61, 60, 60, 60, 60, 121,
+ 118, 115, 112, 110, 110, 111, 112, 113, 110, 107, 104, 101, 98, 94, 90,
+ 87, 84, 81, 78, 75, 73, 71, 69, 67, 65, 64, 62, 61, 61, 61, 61, 119,
+ 117, 115, 113, 110, 110, 109, 108, 107, 105, 102, 99, 96, 93, 90, 87,
+ 84, 81, 78, 76, 73, 71, 69, 68, 66, 64, 63, 62, 60, 60, 60, 60, 118,
+ 116, 114, 113, 111, 109, 106, 104, 102, 99, 97, 94, 92, 89, 86, 83, 81,
+ 78, 76, 74, 72, 70, 68, 66, 65, 63, 62, 61, 60, 60, 60, 60, 116, 115,
+ 114, 113, 112, 108, 104, 100, 96, 94, 91, 89, 87, 85, 82, 80, 78, 76,
+ 74, 72, 70, 68, 67, 65, 64, 63, 62, 60, 59, 59, 59, 59, 114, 114, 114,
+ 113, 113, 107, 102, 96, 90, 88, 86, 84, 82, 80, 79, 77, 75, 73, 72, 70,
+ 68, 67, 66, 64, 63, 62, 61, 60, 59, 59, 59, 59, 109, 110, 110, 110, 110,
+ 105, 99, 94, 88, 86, 84, 82, 79, 78, 76, 74, 73, 71, 70, 68, 67, 66, 64,
+ 63, 62, 61, 60, 59, 58, 58, 58, 58, 104, 105, 106, 106, 107, 102, 97,
+ 91, 86, 84, 81, 79, 77, 75, 74, 72, 71, 69, 68, 67, 65, 64, 63, 62, 61,
+ 60, 59, 58, 57, 57, 57, 57, 100, 101, 102, 103, 104, 99, 94, 89, 84, 82,
+ 79, 77, 74, 73, 71, 70, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 58,
+ 57, 57, 57, 57, 95, 96, 98, 100, 101, 96, 92, 87, 82, 79, 77, 74, 72,
+ 70, 69, 68, 66, 65, 64, 63, 62, 61, 61, 60, 59, 58, 58, 57, 56, 56, 56,
+ 56, 91, 93, 94, 96, 98, 93, 89, 85, 80, 78, 75, 73, 70, 69, 68, 66, 65,
+ 64, 63, 62, 61, 60, 60, 59, 58, 58, 57, 56, 56, 56, 56, 56, 87, 89, 90,
+ 92, 94, 90, 86, 82, 79, 76, 74, 71, 69, 68, 66, 65, 64, 63, 62, 61, 60,
+ 59, 59, 58, 57, 57, 56, 56, 55, 55, 55, 55, 83, 85, 87, 88, 90, 87, 83,
+ 80, 77, 74, 72, 70, 68, 66, 65, 64, 62, 62, 61, 60, 59, 59, 58, 57, 57,
+ 56, 56, 55, 55, 55, 55, 55, 79, 81, 83, 85, 87, 84, 81, 78, 75, 73, 71,
+ 68, 66, 65, 64, 62, 61, 60, 60, 59, 58, 58, 57, 56, 56, 55, 55, 54, 54,
+ 54, 54, 54, 76, 78, 80, 82, 84, 81, 78, 76, 73, 71, 69, 67, 65, 64, 63,
+ 62, 60, 60, 59, 58, 57, 57, 56, 56, 55, 55, 54, 54, 54, 54, 54, 54, 74,
+ 76, 77, 79, 81, 78, 76, 74, 72, 70, 68, 66, 64, 63, 62, 61, 60, 59, 58,
+ 57, 57, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53, 53, 71, 73, 74, 76, 78,
+ 76, 74, 72, 70, 68, 67, 65, 63, 62, 61, 60, 59, 58, 57, 57, 56, 56, 55,
+ 55, 54, 54, 53, 53, 53, 53, 53, 53, 69, 70, 72, 73, 75, 73, 72, 70, 68,
+ 67, 65, 64, 62, 61, 60, 59, 58, 57, 57, 56, 55, 55, 55, 54, 54, 53, 53,
+ 53, 52, 52, 52, 52, 67, 68, 70, 71, 73, 71, 70, 68, 67, 66, 64, 63, 61,
+ 60, 59, 59, 58, 57, 56, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52,
+ 52, 65, 67, 68, 69, 71, 69, 68, 67, 66, 64, 63, 62, 61, 60, 59, 58, 57,
+ 56, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 52, 63, 65, 66,
+ 67, 69, 68, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 56, 55, 55, 54,
+ 54, 53, 53, 52, 52, 52, 52, 51, 51, 51, 51, 62, 63, 64, 65, 67, 66, 65,
+ 64, 63, 62, 61, 60, 59, 58, 57, 57, 56, 55, 55, 54, 54, 53, 53, 52, 52,
+ 52, 52, 51, 51, 51, 51, 51, 61, 62, 63, 64, 65, 64, 63, 63, 62, 61, 60,
+ 59, 58, 58, 57, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 51, 51, 51,
+ 51, 51, 51, 59, 61, 62, 63, 64, 63, 62, 62, 61, 60, 59, 58, 58, 57, 56,
+ 56, 55, 54, 54, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 58,
+ 59, 60, 61, 62, 62, 61, 60, 60, 59, 58, 58, 57, 56, 56, 55, 54, 54, 54,
+ 53, 53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 57, 58, 59, 60, 61,
+ 60, 60, 59, 59, 58, 57, 57, 56, 56, 55, 55, 54, 54, 53, 53, 52, 52, 52,
+ 51, 51, 51, 51, 50, 50, 50, 50, 50, 57, 58, 59, 60, 61, 60, 60, 59, 59,
+ 58, 57, 57, 56, 56, 55, 55, 54, 54, 53, 53, 52, 52, 52, 51, 51, 51, 51,
+ 50, 50, 50, 50, 50, 57, 58, 59, 60, 61, 60, 60, 59, 59, 58, 57, 57, 56,
+ 56, 55, 55, 54, 54, 53, 53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50,
+ 50, 57, 58, 59, 60, 61, 60, 60, 59, 59, 58, 57, 57, 56, 56, 55, 55, 54,
+ 54, 53, 53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 50 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 48, 46, 40, 48, 43, 42, 39, 46, 42, 37, 35, 40, 39, 35, 33,
+ /* Size 8 */
+ 64, 74, 50, 49, 47, 44, 42, 39, 74, 58, 50, 53, 52, 49, 46, 43, 50, 50,
+ 46, 47, 47, 45, 44, 41, 49, 53, 47, 44, 43, 42, 41, 39, 47, 52, 47, 43,
+ 40, 39, 38, 37, 44, 49, 45, 42, 39, 38, 37, 36, 42, 46, 44, 41, 38, 37,
+ 35, 35, 39, 43, 41, 39, 37, 36, 35, 34,
+ /* Size 16 */
+ 64, 69, 74, 62, 50, 50, 49, 48, 47, 46, 44, 43, 42, 41, 39, 39, 69, 68,
+ 66, 58, 50, 51, 51, 50, 50, 48, 47, 45, 44, 43, 41, 41, 74, 66, 58, 54,
+ 50, 51, 53, 52, 52, 51, 49, 48, 46, 45, 43, 43, 62, 58, 54, 51, 48, 49,
+ 50, 50, 49, 48, 47, 46, 45, 44, 42, 42, 50, 50, 50, 48, 46, 46, 47, 47,
+ 47, 46, 45, 45, 44, 43, 41, 41, 50, 51, 51, 49, 46, 46, 45, 45, 45, 44,
+ 44, 43, 42, 41, 40, 40, 49, 51, 53, 50, 47, 45, 44, 43, 43, 42, 42, 41,
+ 41, 40, 39, 39, 48, 50, 52, 50, 47, 45, 43, 42, 42, 41, 41, 40, 39, 39,
+ 38, 38, 47, 50, 52, 49, 47, 45, 43, 42, 40, 40, 39, 39, 38, 38, 37, 37,
+ 46, 48, 51, 48, 46, 44, 42, 41, 40, 39, 38, 38, 37, 37, 37, 37, 44, 47,
+ 49, 47, 45, 44, 42, 41, 39, 38, 38, 37, 37, 36, 36, 36, 43, 45, 48, 46,
+ 45, 43, 41, 40, 39, 38, 37, 37, 36, 36, 35, 35, 42, 44, 46, 45, 44, 42,
+ 41, 39, 38, 37, 37, 36, 35, 35, 35, 35, 41, 43, 45, 44, 43, 41, 40, 39,
+ 38, 37, 36, 36, 35, 35, 34, 34, 39, 41, 43, 42, 41, 40, 39, 38, 37, 37,
+ 36, 35, 35, 34, 34, 34, 39, 41, 43, 42, 41, 40, 39, 38, 37, 37, 36, 35,
+ 35, 34, 34, 34,
+ /* Size 32 */
+ 64, 67, 69, 72, 74, 68, 62, 56, 50, 50, 50, 49, 49, 48, 48, 48, 47, 46,
+ 46, 45, 44, 44, 43, 42, 42, 41, 41, 40, 39, 39, 39, 39, 67, 67, 68, 69,
+ 70, 65, 60, 55, 50, 50, 50, 50, 50, 49, 49, 49, 48, 48, 47, 46, 46, 45,
+ 44, 44, 43, 42, 42, 41, 40, 40, 40, 40, 69, 68, 68, 67, 66, 62, 58, 54,
+ 50, 50, 51, 51, 51, 51, 50, 50, 50, 49, 48, 48, 47, 46, 45, 45, 44, 43,
+ 43, 42, 41, 41, 41, 41, 72, 69, 67, 64, 62, 59, 56, 53, 50, 51, 51, 51,
+ 52, 52, 51, 51, 51, 50, 49, 49, 48, 47, 47, 46, 45, 44, 44, 43, 42, 42,
+ 42, 42, 74, 70, 66, 62, 58, 56, 54, 52, 50, 51, 51, 52, 53, 53, 52, 52,
+ 52, 51, 51, 50, 49, 48, 48, 47, 46, 45, 45, 44, 43, 43, 43, 43, 68, 65,
+ 62, 59, 56, 54, 52, 51, 49, 50, 50, 51, 51, 51, 51, 51, 51, 50, 50, 49,
+ 48, 48, 47, 46, 45, 45, 44, 43, 43, 43, 43, 43, 62, 60, 58, 56, 54, 52,
+ 51, 49, 48, 48, 49, 49, 50, 50, 50, 49, 49, 49, 48, 48, 47, 47, 46, 45,
+ 45, 44, 44, 43, 42, 42, 42, 42, 56, 55, 54, 53, 52, 51, 49, 48, 47, 47,
+ 48, 48, 48, 48, 48, 48, 48, 48, 47, 47, 46, 46, 45, 45, 44, 44, 43, 42,
+ 42, 42, 42, 42, 50, 50, 50, 50, 50, 49, 48, 47, 46, 46, 46, 46, 47, 47,
+ 47, 47, 47, 46, 46, 46, 45, 45, 45, 44, 44, 43, 43, 42, 41, 41, 41, 41,
+ 50, 50, 50, 51, 51, 50, 48, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 45,
+ 45, 45, 45, 44, 44, 43, 43, 42, 42, 41, 41, 41, 41, 41, 50, 50, 51, 51,
+ 51, 50, 49, 48, 46, 46, 46, 45, 45, 45, 45, 45, 45, 44, 44, 44, 44, 43,
+ 43, 42, 42, 42, 41, 41, 40, 40, 40, 40, 49, 50, 51, 51, 52, 51, 49, 48,
+ 46, 46, 45, 45, 44, 44, 44, 44, 44, 43, 43, 43, 43, 42, 42, 42, 41, 41,
+ 41, 40, 40, 40, 40, 40, 49, 50, 51, 52, 53, 51, 50, 48, 47, 46, 45, 44,
+ 44, 43, 43, 43, 43, 42, 42, 42, 42, 42, 41, 41, 41, 40, 40, 40, 39, 39,
+ 39, 39, 48, 49, 51, 52, 53, 51, 50, 48, 47, 46, 45, 44, 43, 43, 43, 42,
+ 42, 42, 42, 41, 41, 41, 41, 40, 40, 40, 39, 39, 39, 39, 39, 39, 48, 49,
+ 50, 51, 52, 51, 50, 48, 47, 46, 45, 44, 43, 43, 42, 42, 42, 41, 41, 41,
+ 41, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 48, 49, 50, 51, 52, 51,
+ 49, 48, 47, 46, 45, 44, 43, 42, 42, 42, 41, 41, 40, 40, 40, 40, 39, 39,
+ 39, 39, 38, 38, 38, 38, 38, 38, 47, 48, 50, 51, 52, 51, 49, 48, 47, 46,
+ 45, 44, 43, 42, 42, 41, 40, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38,
+ 37, 37, 37, 37, 46, 48, 49, 50, 51, 50, 49, 48, 46, 45, 44, 43, 42, 42,
+ 41, 41, 40, 40, 40, 39, 39, 39, 38, 38, 38, 38, 37, 37, 37, 37, 37, 37,
+ 46, 47, 48, 49, 51, 50, 48, 47, 46, 45, 44, 43, 42, 42, 41, 40, 40, 40,
+ 39, 39, 38, 38, 38, 38, 37, 37, 37, 37, 37, 37, 37, 37, 45, 46, 48, 49,
+ 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 41, 40, 40, 39, 39, 38, 38, 38,
+ 38, 37, 37, 37, 37, 36, 36, 36, 36, 36, 44, 46, 47, 48, 49, 48, 47, 46,
+ 45, 45, 44, 43, 42, 41, 41, 40, 39, 39, 38, 38, 38, 37, 37, 37, 37, 36,
+ 36, 36, 36, 36, 36, 36, 44, 45, 46, 47, 48, 48, 47, 46, 45, 44, 43, 42,
+ 42, 41, 40, 40, 39, 39, 38, 38, 37, 37, 37, 37, 36, 36, 36, 36, 35, 35,
+ 35, 35, 43, 44, 45, 47, 48, 47, 46, 45, 45, 44, 43, 42, 41, 41, 40, 39,
+ 39, 38, 38, 38, 37, 37, 37, 36, 36, 36, 36, 35, 35, 35, 35, 35, 42, 44,
+ 45, 46, 47, 46, 45, 45, 44, 43, 42, 42, 41, 40, 40, 39, 39, 38, 38, 37,
+ 37, 37, 36, 36, 36, 36, 35, 35, 35, 35, 35, 35, 42, 43, 44, 45, 46, 45,
+ 45, 44, 44, 43, 42, 41, 41, 40, 39, 39, 38, 38, 37, 37, 37, 36, 36, 36,
+ 35, 35, 35, 35, 35, 35, 35, 35, 41, 42, 43, 44, 45, 45, 44, 44, 43, 42,
+ 42, 41, 40, 40, 39, 39, 38, 38, 37, 37, 36, 36, 36, 36, 35, 35, 35, 35,
+ 34, 34, 34, 34, 41, 42, 43, 44, 45, 44, 44, 43, 43, 42, 41, 41, 40, 39,
+ 39, 38, 38, 37, 37, 37, 36, 36, 36, 35, 35, 35, 35, 34, 34, 34, 34, 34,
+ 40, 41, 42, 43, 44, 43, 43, 42, 42, 41, 41, 40, 40, 39, 39, 38, 38, 37,
+ 37, 36, 36, 36, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34, 39, 40, 41, 42,
+ 43, 43, 42, 42, 41, 41, 40, 40, 39, 39, 38, 38, 37, 37, 37, 36, 36, 35,
+ 35, 35, 35, 34, 34, 34, 34, 34, 34, 34, 39, 40, 41, 42, 43, 43, 42, 42,
+ 41, 41, 40, 40, 39, 39, 38, 38, 37, 37, 37, 36, 36, 35, 35, 35, 35, 34,
+ 34, 34, 34, 34, 34, 34, 39, 40, 41, 42, 43, 43, 42, 42, 41, 41, 40, 40,
+ 39, 39, 38, 38, 37, 37, 37, 36, 36, 35, 35, 35, 35, 34, 34, 34, 34, 34,
+ 34, 34, 39, 40, 41, 42, 43, 43, 42, 42, 41, 41, 40, 40, 39, 39, 38, 38,
+ 37, 37, 37, 36, 36, 35, 35, 35, 35, 34, 34, 34, 34, 34, 34, 34 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 103, 76, 72, 63, 76, 68, 66, 61, 72, 66, 58, 54, 63, 61, 54, 51,
+ /* Size 8 */
+ 98, 114, 76, 73, 71, 66, 62, 58, 114, 88, 75, 80, 78, 74, 69, 64, 76,
+ 75, 69, 70, 70, 68, 65, 62, 73, 80, 70, 65, 64, 62, 60, 58, 71, 78, 70,
+ 64, 60, 58, 57, 55, 66, 74, 68, 62, 58, 56, 54, 53, 62, 69, 65, 60, 57,
+ 54, 52, 51, 58, 64, 62, 58, 55, 53, 51, 49,
+ /* Size 16 */
+ 99, 107, 115, 96, 77, 76, 74, 73, 72, 69, 67, 65, 63, 61, 59, 59, 107,
+ 105, 102, 90, 77, 77, 78, 77, 76, 73, 71, 69, 67, 64, 62, 62, 115, 102,
+ 89, 83, 76, 79, 81, 80, 80, 77, 75, 73, 70, 68, 65, 65, 96, 90, 83, 78,
+ 73, 74, 76, 76, 75, 74, 72, 70, 68, 66, 64, 64, 77, 77, 76, 73, 70, 70,
+ 71, 71, 71, 70, 69, 68, 66, 64, 63, 63, 76, 77, 79, 74, 70, 69, 69, 68,
+ 68, 67, 66, 65, 64, 62, 61, 61, 74, 78, 81, 76, 71, 69, 66, 65, 65, 64,
+ 63, 62, 61, 60, 59, 59, 73, 77, 80, 76, 71, 68, 65, 64, 63, 62, 61, 60,
+ 59, 58, 57, 57, 72, 76, 80, 75, 71, 68, 65, 63, 61, 60, 59, 58, 57, 57,
+ 56, 56, 69, 73, 77, 74, 70, 67, 64, 62, 60, 59, 58, 57, 56, 55, 55, 55,
+ 67, 71, 75, 72, 69, 66, 63, 61, 59, 58, 56, 56, 55, 54, 53, 53, 65, 69,
+ 73, 70, 68, 65, 62, 60, 58, 57, 56, 55, 54, 53, 52, 52, 63, 67, 70, 68,
+ 66, 64, 61, 59, 57, 56, 55, 54, 53, 52, 51, 51, 61, 64, 68, 66, 64, 62,
+ 60, 58, 57, 55, 54, 53, 52, 51, 51, 51, 59, 62, 65, 64, 63, 61, 59, 57,
+ 56, 55, 53, 52, 51, 51, 50, 50, 59, 62, 65, 64, 63, 61, 59, 57, 56, 55,
+ 53, 52, 51, 51, 50, 50,
+ /* Size 32 */
+ 100, 104, 108, 112, 116, 107, 97, 87, 78, 77, 76, 76, 75, 74, 74, 73,
+ 72, 71, 70, 69, 68, 67, 66, 65, 63, 63, 62, 61, 60, 60, 60, 60, 104,
+ 105, 107, 108, 110, 102, 94, 86, 78, 77, 77, 77, 77, 76, 75, 75, 74, 73,
+ 72, 71, 70, 69, 68, 66, 65, 64, 63, 62, 61, 61, 61, 61, 108, 107, 106,
+ 104, 103, 97, 90, 84, 77, 78, 78, 78, 78, 78, 77, 77, 76, 75, 74, 73,
+ 72, 71, 69, 68, 67, 66, 65, 64, 63, 63, 63, 63, 112, 108, 104, 100, 96,
+ 92, 87, 82, 77, 78, 79, 79, 80, 80, 79, 79, 78, 77, 76, 75, 74, 73, 71,
+ 70, 69, 68, 67, 65, 64, 64, 64, 64, 116, 110, 103, 96, 90, 87, 83, 80,
+ 77, 78, 79, 81, 82, 81, 81, 81, 80, 79, 78, 77, 76, 74, 73, 72, 71, 69,
+ 68, 67, 66, 66, 66, 66, 107, 102, 97, 92, 87, 84, 81, 78, 75, 76, 77,
+ 78, 79, 79, 79, 78, 78, 77, 76, 75, 74, 73, 72, 71, 70, 68, 67, 66, 65,
+ 65, 65, 65, 97, 94, 90, 87, 83, 81, 78, 76, 74, 74, 75, 76, 77, 76, 76,
+ 76, 76, 75, 74, 73, 73, 72, 71, 70, 69, 68, 67, 65, 64, 64, 64, 64, 87,
+ 86, 84, 82, 80, 78, 76, 74, 72, 72, 73, 73, 74, 74, 74, 74, 74, 73, 72,
+ 72, 71, 70, 69, 68, 67, 67, 66, 65, 64, 64, 64, 64, 78, 78, 77, 77, 77,
+ 75, 74, 72, 70, 70, 71, 71, 72, 72, 72, 72, 72, 71, 71, 70, 70, 69, 68,
+ 67, 66, 66, 65, 64, 63, 63, 63, 63, 77, 77, 78, 78, 78, 76, 74, 72, 70,
+ 70, 70, 70, 70, 70, 70, 70, 70, 70, 69, 69, 68, 67, 67, 66, 65, 64, 64,
+ 63, 62, 62, 62, 62, 76, 77, 78, 79, 79, 77, 75, 73, 71, 70, 70, 70, 69,
+ 69, 69, 69, 68, 68, 67, 67, 67, 66, 65, 65, 64, 63, 63, 62, 61, 61, 61,
+ 61, 76, 77, 78, 79, 81, 78, 76, 73, 71, 70, 70, 69, 68, 68, 67, 67, 67,
+ 66, 66, 66, 65, 65, 64, 63, 63, 62, 62, 61, 60, 60, 60, 60, 75, 77, 78,
+ 80, 82, 79, 77, 74, 72, 70, 69, 68, 67, 66, 66, 65, 65, 65, 64, 64, 64,
+ 63, 63, 62, 62, 61, 61, 60, 60, 60, 60, 60, 74, 76, 78, 80, 81, 79, 76,
+ 74, 72, 70, 69, 68, 66, 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 61, 61,
+ 60, 60, 59, 59, 59, 59, 59, 74, 75, 77, 79, 81, 79, 76, 74, 72, 70, 69,
+ 67, 66, 65, 65, 64, 63, 63, 62, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58,
+ 58, 58, 58, 73, 75, 77, 79, 81, 78, 76, 74, 72, 70, 69, 67, 65, 65, 64,
+ 63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 72,
+ 74, 76, 78, 80, 78, 76, 74, 72, 70, 68, 67, 65, 64, 63, 62, 61, 61, 60,
+ 60, 59, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 56, 71, 73, 75, 77, 79,
+ 77, 75, 73, 71, 70, 68, 66, 65, 64, 63, 62, 61, 60, 60, 59, 59, 58, 58,
+ 58, 57, 57, 56, 56, 56, 56, 56, 56, 70, 72, 74, 76, 78, 76, 74, 72, 71,
+ 69, 67, 66, 64, 63, 62, 61, 60, 60, 59, 59, 58, 58, 57, 57, 56, 56, 56,
+ 55, 55, 55, 55, 55, 69, 71, 73, 75, 77, 75, 73, 72, 70, 69, 67, 66, 64,
+ 63, 62, 61, 60, 59, 59, 58, 57, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54,
+ 54, 68, 70, 72, 74, 76, 74, 73, 71, 70, 68, 67, 65, 64, 63, 62, 60, 59,
+ 59, 58, 57, 57, 56, 56, 56, 55, 55, 54, 54, 54, 54, 54, 54, 67, 69, 71,
+ 73, 74, 73, 72, 70, 69, 67, 66, 65, 63, 62, 61, 60, 59, 58, 58, 57, 56,
+ 56, 56, 55, 55, 54, 54, 54, 53, 53, 53, 53, 66, 68, 69, 71, 73, 72, 71,
+ 69, 68, 67, 65, 64, 63, 62, 61, 60, 59, 58, 57, 57, 56, 56, 55, 55, 54,
+ 54, 53, 53, 53, 53, 53, 53, 65, 66, 68, 70, 72, 71, 70, 68, 67, 66, 65,
+ 63, 62, 61, 60, 59, 58, 58, 57, 56, 56, 55, 55, 54, 54, 53, 53, 53, 52,
+ 52, 52, 52, 63, 65, 67, 69, 71, 70, 69, 67, 66, 65, 64, 63, 62, 61, 60,
+ 59, 58, 57, 56, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 63,
+ 64, 66, 68, 69, 68, 68, 67, 66, 64, 63, 62, 61, 60, 59, 58, 57, 57, 56,
+ 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 52, 62, 63, 65, 67, 68,
+ 67, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 56, 55, 54, 54, 53,
+ 53, 53, 52, 52, 52, 51, 51, 51, 51, 61, 62, 64, 65, 67, 66, 65, 65, 64,
+ 63, 62, 61, 60, 59, 58, 58, 57, 56, 55, 55, 54, 54, 53, 53, 52, 52, 52,
+ 51, 51, 51, 51, 51, 60, 61, 63, 64, 66, 65, 64, 64, 63, 62, 61, 60, 60,
+ 59, 58, 57, 56, 56, 55, 54, 54, 53, 53, 52, 52, 52, 51, 51, 51, 51, 51,
+ 51, 60, 61, 63, 64, 66, 65, 64, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56,
+ 56, 55, 54, 54, 53, 53, 52, 52, 52, 51, 51, 51, 51, 51, 51, 60, 61, 63,
+ 64, 66, 65, 64, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 56, 55, 54, 54,
+ 53, 53, 52, 52, 52, 51, 51, 51, 51, 51, 51, 60, 61, 63, 64, 66, 65, 64,
+ 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 56, 55, 54, 54, 53, 53, 52, 52,
+ 52, 51, 51, 51, 51, 51, 51 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 61, 47, 39, 61, 49, 42, 38, 47, 42, 38, 35, 39, 38, 35, 34,
+ /* Size 8 */
+ 64, 76, 72, 62, 53, 47, 44, 41, 76, 70, 72, 65, 57, 51, 46, 43, 72, 72,
+ 59, 55, 51, 47, 44, 42, 62, 65, 55, 49, 46, 44, 42, 41, 53, 57, 51, 46,
+ 43, 42, 41, 40, 47, 51, 47, 44, 42, 40, 39, 39, 44, 46, 44, 42, 41, 39,
+ 39, 38, 41, 43, 42, 41, 40, 39, 38, 37,
+ /* Size 16 */
+ 64, 70, 76, 74, 72, 67, 62, 57, 53, 50, 47, 46, 44, 43, 41, 41, 70, 71,
+ 73, 72, 72, 68, 63, 59, 55, 52, 49, 47, 45, 44, 42, 42, 76, 73, 70, 71,
+ 72, 68, 65, 61, 57, 54, 51, 49, 46, 45, 43, 43, 74, 72, 71, 68, 65, 63,
+ 60, 57, 54, 52, 49, 47, 45, 44, 43, 43, 72, 72, 72, 65, 59, 57, 55, 53,
+ 51, 49, 47, 46, 44, 43, 42, 42, 67, 68, 68, 63, 57, 54, 52, 50, 49, 47,
+ 46, 44, 43, 42, 41, 41, 62, 63, 65, 60, 55, 52, 49, 48, 46, 45, 44, 43,
+ 42, 41, 41, 41, 57, 59, 61, 57, 53, 50, 48, 46, 45, 44, 43, 42, 41, 41,
+ 40, 40, 53, 55, 57, 54, 51, 49, 46, 45, 43, 43, 42, 41, 41, 40, 40, 40,
+ 50, 52, 54, 52, 49, 47, 45, 44, 43, 42, 41, 40, 40, 40, 39, 39, 47, 49,
+ 51, 49, 47, 46, 44, 43, 42, 41, 40, 40, 39, 39, 39, 39, 46, 47, 49, 47,
+ 46, 44, 43, 42, 41, 40, 40, 39, 39, 39, 38, 38, 44, 45, 46, 45, 44, 43,
+ 42, 41, 41, 40, 39, 39, 39, 38, 38, 38, 43, 44, 45, 44, 43, 42, 41, 41,
+ 40, 40, 39, 39, 38, 38, 38, 38, 41, 42, 43, 43, 42, 41, 41, 40, 40, 39,
+ 39, 38, 38, 38, 37, 37, 41, 42, 43, 43, 42, 41, 41, 40, 40, 39, 39, 38,
+ 38, 38, 37, 37,
+ /* Size 32 */
+ 64, 67, 70, 73, 76, 75, 74, 73, 72, 70, 67, 64, 62, 60, 57, 55, 53, 52,
+ 50, 49, 47, 47, 46, 45, 44, 43, 43, 42, 41, 41, 41, 41, 67, 69, 71, 72,
+ 74, 74, 73, 73, 72, 70, 67, 65, 63, 60, 58, 56, 54, 53, 51, 50, 48, 47,
+ 46, 45, 44, 44, 43, 42, 42, 42, 42, 42, 70, 71, 71, 72, 73, 73, 72, 72,
+ 72, 70, 68, 66, 63, 61, 59, 57, 55, 54, 52, 51, 49, 48, 47, 46, 45, 44,
+ 44, 43, 42, 42, 42, 42, 73, 72, 72, 72, 71, 71, 71, 72, 72, 70, 68, 66,
+ 64, 62, 60, 58, 56, 55, 53, 52, 50, 49, 48, 47, 46, 45, 44, 43, 43, 43,
+ 43, 43, 76, 74, 73, 71, 70, 70, 71, 71, 72, 70, 68, 67, 65, 63, 61, 59,
+ 57, 56, 54, 52, 51, 50, 49, 47, 46, 46, 45, 44, 43, 43, 43, 43, 75, 74,
+ 73, 71, 70, 70, 69, 69, 68, 67, 66, 64, 63, 61, 59, 57, 56, 54, 53, 51,
+ 50, 49, 48, 47, 46, 45, 44, 44, 43, 43, 43, 43, 74, 73, 72, 71, 71, 69,
+ 68, 67, 65, 64, 63, 61, 60, 58, 57, 56, 54, 53, 52, 50, 49, 48, 47, 46,
+ 45, 45, 44, 43, 43, 43, 43, 43, 73, 73, 72, 72, 71, 69, 67, 64, 62, 61,
+ 60, 59, 57, 56, 55, 54, 52, 51, 50, 49, 48, 47, 47, 46, 45, 44, 44, 43,
+ 42, 42, 42, 42, 72, 72, 72, 72, 72, 68, 65, 62, 59, 58, 57, 56, 55, 54,
+ 53, 52, 51, 50, 49, 48, 47, 47, 46, 45, 44, 44, 43, 43, 42, 42, 42, 42,
+ 70, 70, 70, 70, 70, 67, 64, 61, 58, 57, 56, 55, 53, 52, 52, 51, 50, 49,
+ 48, 47, 47, 46, 45, 45, 44, 43, 43, 42, 42, 42, 42, 42, 67, 67, 68, 68,
+ 68, 66, 63, 60, 57, 56, 54, 53, 52, 51, 50, 49, 49, 48, 47, 46, 46, 45,
+ 44, 44, 43, 43, 42, 42, 41, 41, 41, 41, 64, 65, 66, 66, 67, 64, 61, 59,
+ 56, 55, 53, 52, 51, 50, 49, 48, 47, 47, 46, 45, 45, 44, 44, 43, 43, 42,
+ 42, 42, 41, 41, 41, 41, 62, 63, 63, 64, 65, 63, 60, 57, 55, 53, 52, 51,
+ 49, 48, 48, 47, 46, 46, 45, 45, 44, 44, 43, 43, 42, 42, 41, 41, 41, 41,
+ 41, 41, 60, 60, 61, 62, 63, 61, 58, 56, 54, 52, 51, 50, 48, 48, 47, 46,
+ 46, 45, 44, 44, 43, 43, 43, 42, 42, 41, 41, 41, 40, 40, 40, 40, 57, 58,
+ 59, 60, 61, 59, 57, 55, 53, 52, 50, 49, 48, 47, 46, 46, 45, 44, 44, 43,
+ 43, 43, 42, 42, 41, 41, 41, 40, 40, 40, 40, 40, 55, 56, 57, 58, 59, 57,
+ 56, 54, 52, 51, 49, 48, 47, 46, 46, 45, 44, 44, 43, 43, 42, 42, 42, 41,
+ 41, 41, 40, 40, 40, 40, 40, 40, 53, 54, 55, 56, 57, 56, 54, 52, 51, 50,
+ 49, 47, 46, 46, 45, 44, 43, 43, 43, 42, 42, 41, 41, 41, 41, 40, 40, 40,
+ 40, 40, 40, 40, 52, 53, 54, 55, 56, 54, 53, 51, 50, 49, 48, 47, 46, 45,
+ 44, 44, 43, 43, 42, 42, 41, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39,
+ 50, 51, 52, 53, 54, 53, 52, 50, 49, 48, 47, 46, 45, 44, 44, 43, 43, 42,
+ 42, 41, 41, 41, 40, 40, 40, 40, 40, 39, 39, 39, 39, 39, 49, 50, 51, 52,
+ 52, 51, 50, 49, 48, 47, 46, 45, 45, 44, 43, 43, 42, 42, 41, 41, 41, 40,
+ 40, 40, 40, 39, 39, 39, 39, 39, 39, 39, 47, 48, 49, 50, 51, 50, 49, 48,
+ 47, 47, 46, 45, 44, 43, 43, 42, 42, 41, 41, 41, 40, 40, 40, 40, 39, 39,
+ 39, 39, 39, 39, 39, 39, 47, 47, 48, 49, 50, 49, 48, 47, 47, 46, 45, 44,
+ 44, 43, 43, 42, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 39, 38, 38,
+ 38, 38, 46, 46, 47, 48, 49, 48, 47, 47, 46, 45, 44, 44, 43, 43, 42, 42,
+ 41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 39, 38, 38, 38, 38, 38, 45, 45,
+ 46, 47, 47, 47, 46, 46, 45, 45, 44, 43, 43, 42, 42, 41, 41, 41, 40, 40,
+ 40, 39, 39, 39, 39, 39, 38, 38, 38, 38, 38, 38, 44, 44, 45, 46, 46, 46,
+ 45, 45, 44, 44, 43, 43, 42, 42, 41, 41, 41, 40, 40, 40, 39, 39, 39, 39,
+ 39, 38, 38, 38, 38, 38, 38, 38, 43, 44, 44, 45, 46, 45, 45, 44, 44, 43,
+ 43, 42, 42, 41, 41, 41, 40, 40, 40, 39, 39, 39, 39, 39, 38, 38, 38, 38,
+ 38, 38, 38, 38, 43, 43, 44, 44, 45, 44, 44, 44, 43, 43, 42, 42, 41, 41,
+ 41, 40, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 38, 38, 38, 38, 38,
+ 42, 42, 43, 43, 44, 44, 43, 43, 43, 42, 42, 42, 41, 41, 40, 40, 40, 40,
+ 39, 39, 39, 39, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 41, 42, 42, 43,
+ 43, 43, 43, 42, 42, 42, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 38,
+ 38, 38, 38, 38, 38, 38, 37, 37, 37, 37, 41, 42, 42, 43, 43, 43, 43, 42,
+ 42, 42, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 38,
+ 38, 38, 37, 37, 37, 37, 41, 42, 42, 43, 43, 43, 43, 42, 42, 42, 41, 41,
+ 41, 40, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 38, 38, 38, 37, 37,
+ 37, 37, 41, 42, 42, 43, 43, 43, 43, 42, 42, 42, 41, 41, 41, 40, 40, 40,
+ 40, 39, 39, 39, 39, 38, 38, 38, 38, 38, 38, 38, 37, 37, 37, 37 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 100, 95, 72, 60, 95, 76, 65, 58, 72, 65, 57, 54, 60, 58, 54, 51,
+ /* Size 8 */
+ 90, 107, 102, 86, 74, 65, 60, 56, 107, 98, 101, 92, 80, 70, 64, 59, 102,
+ 101, 83, 76, 71, 65, 61, 58, 86, 92, 76, 68, 64, 60, 58, 56, 74, 80, 71,
+ 64, 60, 57, 55, 54, 65, 70, 65, 60, 57, 55, 53, 52, 60, 64, 61, 58, 55,
+ 53, 52, 51, 56, 59, 58, 56, 54, 52, 51, 51,
+ /* Size 16 */
+ 92, 100, 109, 107, 104, 96, 88, 82, 75, 71, 67, 64, 61, 59, 57, 57, 100,
+ 102, 105, 104, 103, 97, 91, 84, 78, 74, 69, 66, 63, 61, 59, 59, 109,
+ 105, 100, 101, 103, 98, 93, 87, 81, 76, 72, 68, 65, 63, 60, 60, 107,
+ 104, 101, 97, 94, 89, 85, 81, 77, 73, 69, 66, 63, 61, 59, 59, 104, 103,
+ 103, 94, 84, 81, 78, 75, 72, 69, 66, 64, 62, 60, 59, 59, 96, 97, 98, 89,
+ 81, 77, 73, 71, 68, 66, 64, 62, 60, 59, 58, 58, 88, 91, 93, 85, 78, 73,
+ 69, 67, 65, 63, 62, 60, 59, 58, 57, 57, 82, 84, 87, 81, 75, 71, 67, 65,
+ 63, 61, 60, 59, 58, 57, 56, 56, 75, 78, 81, 77, 72, 68, 65, 63, 61, 59,
+ 58, 57, 56, 55, 55, 55, 71, 74, 76, 73, 69, 66, 63, 61, 59, 58, 57, 56,
+ 55, 55, 54, 54, 67, 69, 72, 69, 66, 64, 62, 60, 58, 57, 56, 55, 54, 54,
+ 53, 53, 64, 66, 68, 66, 64, 62, 60, 59, 57, 56, 55, 54, 54, 53, 53, 53,
+ 61, 63, 65, 63, 62, 60, 59, 58, 56, 55, 54, 54, 53, 53, 52, 52, 59, 61,
+ 63, 61, 60, 59, 58, 57, 55, 55, 54, 53, 53, 52, 52, 52, 57, 59, 60, 59,
+ 59, 58, 57, 56, 55, 54, 53, 53, 52, 52, 52, 52, 57, 59, 60, 59, 59, 58,
+ 57, 56, 55, 54, 53, 53, 52, 52, 52, 52,
+ /* Size 32 */
+ 92, 97, 101, 106, 110, 109, 107, 106, 105, 101, 97, 93, 89, 86, 82, 79,
+ 76, 74, 72, 69, 67, 66, 64, 63, 62, 61, 60, 59, 58, 58, 58, 58, 97, 100,
+ 102, 105, 108, 107, 106, 105, 104, 101, 97, 94, 90, 87, 84, 81, 77, 75,
+ 73, 71, 69, 67, 66, 64, 63, 62, 61, 60, 59, 59, 59, 59, 101, 102, 103,
+ 105, 106, 105, 105, 105, 104, 101, 98, 95, 91, 88, 85, 82, 79, 77, 74,
+ 72, 70, 68, 67, 65, 64, 63, 61, 60, 59, 59, 59, 59, 106, 105, 105, 104,
+ 103, 103, 104, 104, 104, 101, 98, 96, 93, 90, 87, 84, 80, 78, 76, 73,
+ 71, 69, 68, 66, 65, 63, 62, 61, 60, 60, 60, 60, 110, 108, 106, 103, 101,
+ 102, 102, 103, 104, 101, 99, 97, 94, 91, 88, 85, 82, 80, 77, 75, 72, 71,
+ 69, 67, 66, 64, 63, 62, 61, 61, 61, 61, 109, 107, 105, 103, 102, 101,
+ 100, 100, 99, 97, 95, 92, 90, 88, 85, 82, 80, 77, 75, 73, 71, 69, 68,
+ 66, 65, 64, 63, 61, 60, 60, 60, 60, 107, 106, 105, 104, 102, 100, 98,
+ 96, 94, 92, 90, 88, 86, 84, 82, 79, 77, 75, 73, 72, 70, 68, 67, 65, 64,
+ 63, 62, 61, 60, 60, 60, 60, 106, 105, 105, 104, 103, 100, 96, 93, 90,
+ 88, 86, 84, 82, 80, 79, 77, 75, 73, 72, 70, 68, 67, 66, 65, 63, 62, 61,
+ 60, 60, 60, 60, 60, 105, 104, 104, 104, 104, 99, 94, 90, 85, 83, 82, 80,
+ 78, 77, 75, 74, 73, 71, 70, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 59,
+ 59, 59, 101, 101, 101, 101, 101, 97, 92, 88, 83, 82, 80, 78, 76, 75, 73,
+ 72, 71, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 59, 59, 59, 59, 97,
+ 97, 98, 98, 99, 95, 90, 86, 82, 80, 78, 76, 74, 73, 71, 70, 69, 68, 67,
+ 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 58, 58, 58, 93, 94, 95, 96, 97,
+ 92, 88, 84, 80, 78, 76, 74, 72, 71, 70, 68, 67, 66, 65, 64, 63, 63, 62,
+ 61, 60, 59, 59, 58, 58, 58, 58, 58, 89, 90, 91, 93, 94, 90, 86, 82, 78,
+ 76, 74, 72, 70, 69, 68, 66, 65, 65, 64, 63, 62, 61, 61, 60, 59, 59, 58,
+ 58, 57, 57, 57, 57, 86, 87, 88, 90, 91, 88, 84, 80, 77, 75, 73, 71, 69,
+ 68, 66, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 58, 58, 57, 57, 57, 57,
+ 57, 82, 84, 85, 87, 88, 85, 82, 79, 75, 73, 71, 70, 68, 66, 65, 64, 63,
+ 63, 62, 61, 60, 60, 59, 59, 58, 58, 57, 57, 56, 56, 56, 56, 79, 81, 82,
+ 84, 85, 82, 79, 77, 74, 72, 70, 68, 66, 65, 64, 63, 62, 62, 61, 60, 59,
+ 59, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 76, 77, 79, 80, 82, 80, 77,
+ 75, 73, 71, 69, 67, 65, 64, 63, 62, 61, 61, 60, 59, 59, 58, 58, 57, 57,
+ 56, 56, 56, 55, 55, 55, 55, 74, 75, 77, 78, 80, 77, 75, 73, 71, 69, 68,
+ 66, 65, 64, 63, 62, 61, 60, 59, 59, 58, 58, 57, 57, 56, 56, 56, 55, 55,
+ 55, 55, 55, 72, 73, 74, 76, 77, 75, 73, 72, 70, 68, 67, 65, 64, 63, 62,
+ 61, 60, 59, 59, 58, 57, 57, 57, 56, 56, 55, 55, 55, 55, 55, 55, 55, 69,
+ 71, 72, 73, 75, 73, 72, 70, 68, 67, 66, 64, 63, 62, 61, 60, 59, 59, 58,
+ 58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54, 54, 54, 67, 69, 70, 71, 72,
+ 71, 70, 68, 67, 66, 65, 63, 62, 61, 60, 59, 59, 58, 57, 57, 56, 56, 56,
+ 55, 55, 55, 54, 54, 54, 54, 54, 54, 66, 67, 68, 69, 71, 69, 68, 67, 66,
+ 65, 64, 63, 61, 61, 60, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54,
+ 54, 54, 54, 54, 54, 64, 66, 67, 68, 69, 68, 67, 66, 65, 64, 63, 62, 61,
+ 60, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54, 54, 53, 53, 53,
+ 53, 63, 64, 65, 66, 67, 66, 65, 65, 64, 63, 62, 61, 60, 59, 59, 58, 57,
+ 57, 56, 56, 55, 55, 55, 54, 54, 54, 54, 53, 53, 53, 53, 53, 62, 63, 64,
+ 65, 66, 65, 64, 63, 63, 62, 61, 60, 59, 59, 58, 57, 57, 56, 56, 55, 55,
+ 55, 54, 54, 54, 53, 53, 53, 53, 53, 53, 53, 61, 62, 63, 63, 64, 64, 63,
+ 62, 62, 61, 60, 59, 59, 58, 58, 57, 56, 56, 55, 55, 55, 54, 54, 54, 53,
+ 53, 53, 53, 53, 53, 53, 53, 60, 61, 61, 62, 63, 63, 62, 61, 61, 60, 60,
+ 59, 58, 58, 57, 57, 56, 56, 55, 55, 54, 54, 54, 54, 53, 53, 53, 53, 52,
+ 52, 52, 52, 59, 60, 60, 61, 62, 61, 61, 60, 60, 59, 59, 58, 58, 57, 57,
+ 56, 56, 55, 55, 54, 54, 54, 54, 53, 53, 53, 53, 52, 52, 52, 52, 52, 58,
+ 59, 59, 60, 61, 60, 60, 60, 59, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55,
+ 54, 54, 54, 53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 58, 59, 59, 60, 61,
+ 60, 60, 60, 59, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54, 53,
+ 53, 53, 53, 52, 52, 52, 52, 52, 52, 58, 59, 59, 60, 61, 60, 60, 60, 59,
+ 59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54, 53, 53, 53, 53, 52,
+ 52, 52, 52, 52, 52, 58, 59, 59, 60, 61, 60, 60, 60, 59, 59, 58, 58, 57,
+ 57, 56, 56, 55, 55, 55, 54, 54, 54, 53, 53, 53, 53, 52, 52, 52, 52, 52,
+ 52 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 50, 48, 43, 50, 45, 44, 42, 48, 44, 40, 38, 43, 42, 38, 36,
+ /* Size 8 */
+ 64, 73, 52, 50, 49, 47, 44, 42, 73, 59, 51, 54, 53, 51, 48, 45, 52, 51,
+ 48, 49, 49, 47, 46, 44, 50, 54, 49, 46, 45, 44, 43, 42, 49, 53, 49, 45,
+ 43, 42, 41, 40, 47, 51, 47, 44, 42, 40, 40, 39, 44, 48, 46, 43, 41, 40,
+ 39, 38, 42, 45, 44, 42, 40, 39, 38, 37,
+ /* Size 16 */
+ 64, 69, 73, 63, 52, 51, 50, 50, 49, 48, 47, 45, 44, 43, 42, 42, 69, 67,
+ 66, 59, 52, 52, 52, 52, 51, 50, 49, 47, 46, 45, 44, 44, 73, 66, 59, 55,
+ 51, 53, 54, 54, 53, 52, 51, 49, 48, 47, 45, 45, 63, 59, 55, 52, 50, 50,
+ 51, 51, 51, 50, 49, 48, 47, 46, 45, 45, 52, 52, 51, 50, 48, 48, 49, 49,
+ 49, 48, 47, 47, 46, 45, 44, 44, 51, 52, 53, 50, 48, 48, 47, 47, 47, 46,
+ 46, 45, 44, 44, 43, 43, 50, 52, 54, 51, 49, 47, 46, 45, 45, 45, 44, 44,
+ 43, 43, 42, 42, 50, 52, 54, 51, 49, 47, 45, 45, 44, 44, 43, 43, 42, 42,
+ 41, 41, 49, 51, 53, 51, 49, 47, 45, 44, 43, 42, 42, 41, 41, 41, 40, 40,
+ 48, 50, 52, 50, 48, 46, 45, 44, 42, 42, 41, 41, 40, 40, 39, 39, 47, 49,
+ 51, 49, 47, 46, 44, 43, 42, 41, 40, 40, 40, 39, 39, 39, 45, 47, 49, 48,
+ 47, 45, 44, 43, 41, 41, 40, 40, 39, 39, 38, 38, 44, 46, 48, 47, 46, 44,
+ 43, 42, 41, 40, 40, 39, 39, 38, 38, 38, 43, 45, 47, 46, 45, 44, 43, 42,
+ 41, 40, 39, 39, 38, 38, 37, 37, 42, 44, 45, 45, 44, 43, 42, 41, 40, 39,
+ 39, 38, 38, 37, 37, 37, 42, 44, 45, 45, 44, 43, 42, 41, 40, 39, 39, 38,
+ 38, 37, 37, 37,
+ /* Size 32 */
+ 64, 66, 69, 71, 73, 68, 63, 57, 52, 52, 51, 51, 50, 50, 50, 49, 49, 48,
+ 48, 47, 47, 46, 45, 45, 44, 44, 43, 43, 42, 42, 42, 42, 66, 67, 68, 69,
+ 69, 65, 61, 56, 52, 52, 52, 52, 51, 51, 51, 50, 50, 49, 49, 48, 48, 47,
+ 46, 46, 45, 45, 44, 43, 43, 43, 43, 43, 69, 68, 67, 67, 66, 62, 59, 55,
+ 52, 52, 52, 52, 52, 52, 52, 51, 51, 51, 50, 49, 49, 48, 47, 47, 46, 46,
+ 45, 44, 44, 44, 44, 44, 71, 69, 67, 64, 62, 60, 57, 54, 52, 52, 52, 53,
+ 53, 53, 53, 52, 52, 52, 51, 50, 50, 49, 48, 48, 47, 46, 46, 45, 45, 45,
+ 45, 45, 73, 69, 66, 62, 59, 57, 55, 53, 51, 52, 53, 53, 54, 54, 54, 54,
+ 53, 53, 52, 51, 51, 50, 49, 49, 48, 47, 47, 46, 45, 45, 45, 45, 68, 65,
+ 62, 60, 57, 55, 54, 52, 51, 51, 52, 52, 53, 53, 52, 52, 52, 52, 51, 51,
+ 50, 49, 49, 48, 47, 47, 46, 46, 45, 45, 45, 45, 63, 61, 59, 57, 55, 54,
+ 52, 51, 50, 50, 50, 51, 51, 51, 51, 51, 51, 51, 50, 50, 49, 49, 48, 47,
+ 47, 46, 46, 45, 45, 45, 45, 45, 57, 56, 55, 54, 53, 52, 51, 50, 49, 49,
+ 49, 50, 50, 50, 50, 50, 50, 49, 49, 49, 48, 48, 47, 47, 46, 46, 45, 45,
+ 44, 44, 44, 44, 52, 52, 52, 52, 51, 51, 50, 49, 48, 48, 48, 48, 49, 49,
+ 49, 49, 49, 48, 48, 48, 47, 47, 47, 46, 46, 45, 45, 44, 44, 44, 44, 44,
+ 52, 52, 52, 52, 52, 51, 50, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 47,
+ 47, 47, 47, 46, 46, 46, 45, 45, 44, 44, 43, 43, 43, 43, 51, 52, 52, 52,
+ 53, 52, 50, 49, 48, 48, 48, 47, 47, 47, 47, 47, 47, 47, 46, 46, 46, 46,
+ 45, 45, 44, 44, 44, 43, 43, 43, 43, 43, 51, 52, 52, 53, 53, 52, 51, 50,
+ 48, 48, 47, 47, 47, 46, 46, 46, 46, 46, 45, 45, 45, 45, 44, 44, 44, 43,
+ 43, 43, 42, 42, 42, 42, 50, 51, 52, 53, 54, 53, 51, 50, 49, 48, 47, 47,
+ 46, 46, 45, 45, 45, 45, 45, 44, 44, 44, 44, 43, 43, 43, 43, 42, 42, 42,
+ 42, 42, 50, 51, 52, 53, 54, 53, 51, 50, 49, 48, 47, 46, 46, 45, 45, 45,
+ 45, 44, 44, 44, 44, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 50, 51,
+ 52, 53, 54, 52, 51, 50, 49, 48, 47, 46, 45, 45, 45, 44, 44, 44, 44, 43,
+ 43, 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 41, 49, 50, 51, 52, 54, 52,
+ 51, 50, 49, 48, 47, 46, 45, 45, 44, 44, 44, 43, 43, 43, 42, 42, 42, 42,
+ 42, 41, 41, 41, 41, 41, 41, 41, 49, 50, 51, 52, 53, 52, 51, 50, 49, 48,
+ 47, 46, 45, 45, 44, 44, 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 41, 40,
+ 40, 40, 40, 40, 48, 49, 51, 52, 53, 52, 51, 49, 48, 47, 47, 46, 45, 44,
+ 44, 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 40, 40, 40, 40, 40, 40, 40,
+ 48, 49, 50, 51, 52, 51, 50, 49, 48, 47, 46, 45, 45, 44, 44, 43, 42, 42,
+ 42, 42, 41, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 47, 48, 49, 50,
+ 51, 51, 50, 49, 48, 47, 46, 45, 44, 44, 43, 43, 42, 42, 42, 41, 41, 41,
+ 40, 40, 40, 40, 40, 39, 39, 39, 39, 39, 47, 48, 49, 50, 51, 50, 49, 48,
+ 47, 47, 46, 45, 44, 44, 43, 42, 42, 42, 41, 41, 40, 40, 40, 40, 40, 39,
+ 39, 39, 39, 39, 39, 39, 46, 47, 48, 49, 50, 49, 49, 48, 47, 46, 46, 45,
+ 44, 43, 43, 42, 42, 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 39, 39,
+ 39, 39, 45, 46, 47, 48, 49, 49, 48, 47, 47, 46, 45, 44, 44, 43, 43, 42,
+ 41, 41, 41, 40, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 38, 45, 46,
+ 47, 48, 49, 48, 47, 47, 46, 46, 45, 44, 43, 43, 42, 42, 41, 41, 41, 40,
+ 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 38, 38, 44, 45, 46, 47, 48, 47,
+ 47, 46, 46, 45, 44, 44, 43, 43, 42, 42, 41, 41, 40, 40, 40, 39, 39, 39,
+ 39, 38, 38, 38, 38, 38, 38, 38, 44, 45, 46, 46, 47, 47, 46, 46, 45, 45,
+ 44, 43, 43, 42, 42, 41, 41, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38,
+ 38, 38, 38, 38, 43, 44, 45, 46, 47, 46, 46, 45, 45, 44, 44, 43, 43, 42,
+ 42, 41, 41, 40, 40, 40, 39, 39, 39, 38, 38, 38, 38, 38, 37, 37, 37, 37,
+ 43, 43, 44, 45, 46, 46, 45, 45, 44, 44, 43, 43, 42, 42, 41, 41, 40, 40,
+ 40, 39, 39, 39, 38, 38, 38, 38, 38, 37, 37, 37, 37, 37, 42, 43, 44, 45,
+ 45, 45, 45, 44, 44, 43, 43, 42, 42, 42, 41, 41, 40, 40, 39, 39, 39, 39,
+ 38, 38, 38, 38, 37, 37, 37, 37, 37, 37, 42, 43, 44, 45, 45, 45, 45, 44,
+ 44, 43, 43, 42, 42, 42, 41, 41, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38,
+ 37, 37, 37, 37, 37, 37, 42, 43, 44, 45, 45, 45, 45, 44, 44, 43, 43, 42,
+ 42, 42, 41, 41, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 37, 37, 37, 37,
+ 37, 37, 42, 43, 44, 45, 45, 45, 45, 44, 44, 43, 43, 42, 42, 42, 41, 41,
+ 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 37, 37, 37, 37, 37, 37 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 97, 74, 71, 63, 74, 67, 65, 61, 71, 65, 58, 56, 63, 61, 56, 52,
+ /* Size 8 */
+ 92, 106, 74, 72, 69, 66, 62, 59, 106, 84, 73, 77, 76, 72, 68, 64, 74,
+ 73, 68, 69, 69, 67, 65, 62, 72, 77, 69, 65, 63, 62, 61, 59, 69, 76, 69,
+ 63, 60, 59, 57, 56, 66, 72, 67, 62, 59, 57, 55, 54, 62, 68, 65, 61, 57,
+ 55, 54, 53, 59, 64, 62, 59, 56, 54, 53, 51,
+ /* Size 16 */
+ 93, 100, 107, 91, 75, 74, 73, 71, 70, 68, 67, 65, 63, 61, 60, 60, 100,
+ 98, 96, 85, 74, 75, 75, 74, 74, 72, 70, 68, 66, 64, 62, 62, 107, 96, 85,
+ 79, 74, 76, 78, 77, 77, 75, 73, 71, 69, 67, 65, 65, 91, 85, 79, 75, 71,
+ 73, 74, 74, 73, 72, 71, 69, 67, 65, 64, 64, 75, 74, 74, 71, 68, 69, 70,
+ 70, 70, 69, 68, 67, 65, 64, 63, 63, 74, 75, 76, 73, 69, 68, 68, 67, 67,
+ 66, 65, 64, 63, 62, 61, 61, 73, 75, 78, 74, 70, 68, 66, 65, 64, 64, 63,
+ 62, 61, 60, 60, 60, 71, 74, 77, 74, 70, 67, 65, 64, 63, 62, 61, 61, 60,
+ 59, 58, 58, 70, 74, 77, 73, 70, 67, 64, 63, 61, 60, 59, 59, 58, 58, 57,
+ 57, 68, 72, 75, 72, 69, 66, 64, 62, 60, 59, 58, 58, 57, 56, 56, 56, 67,
+ 70, 73, 71, 68, 65, 63, 61, 59, 58, 57, 57, 56, 55, 55, 55, 65, 68, 71,
+ 69, 67, 64, 62, 61, 59, 58, 57, 56, 55, 55, 54, 54, 63, 66, 69, 67, 65,
+ 63, 61, 60, 58, 57, 56, 55, 54, 54, 53, 53, 61, 64, 67, 65, 64, 62, 60,
+ 59, 58, 56, 55, 55, 54, 53, 53, 53, 60, 62, 65, 64, 63, 61, 60, 58, 57,
+ 56, 55, 54, 53, 53, 52, 52, 60, 62, 65, 64, 63, 61, 60, 58, 57, 56, 55,
+ 54, 53, 53, 52, 52,
+ /* Size 32 */
+ 94, 97, 101, 104, 108, 100, 91, 83, 75, 75, 74, 74, 73, 72, 72, 71, 71,
+ 70, 69, 68, 67, 66, 65, 64, 63, 63, 62, 61, 60, 60, 60, 60, 97, 98, 100,
+ 101, 102, 95, 89, 82, 75, 75, 75, 75, 74, 74, 73, 73, 72, 71, 70, 70,
+ 69, 68, 67, 66, 65, 64, 63, 62, 61, 61, 61, 61, 101, 100, 99, 98, 97,
+ 91, 86, 80, 75, 75, 75, 76, 76, 75, 75, 74, 74, 73, 72, 71, 70, 69, 68,
+ 67, 66, 65, 65, 64, 63, 63, 63, 63, 104, 101, 98, 94, 91, 87, 83, 79,
+ 75, 75, 76, 77, 77, 77, 76, 76, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67,
+ 66, 65, 64, 64, 64, 64, 108, 102, 97, 91, 85, 83, 80, 77, 75, 76, 77,
+ 78, 79, 78, 78, 78, 77, 76, 75, 75, 74, 73, 71, 70, 69, 68, 67, 66, 65,
+ 65, 65, 65, 100, 95, 91, 87, 83, 80, 78, 76, 73, 74, 75, 76, 76, 76, 76,
+ 76, 76, 75, 74, 73, 72, 71, 70, 69, 68, 68, 67, 66, 65, 65, 65, 65, 91,
+ 89, 86, 83, 80, 78, 76, 74, 72, 72, 73, 74, 74, 74, 74, 74, 74, 73, 72,
+ 72, 71, 70, 69, 68, 68, 67, 66, 65, 64, 64, 64, 64, 83, 82, 80, 79, 77,
+ 76, 74, 72, 70, 71, 71, 72, 72, 72, 72, 72, 72, 71, 71, 70, 70, 69, 68,
+ 67, 67, 66, 65, 64, 64, 64, 64, 64, 75, 75, 75, 75, 75, 73, 72, 70, 69,
+ 69, 69, 70, 70, 70, 70, 70, 70, 70, 69, 69, 68, 68, 67, 66, 66, 65, 64,
+ 64, 63, 63, 63, 63, 75, 75, 75, 75, 76, 74, 72, 71, 69, 69, 69, 69, 69,
+ 69, 69, 69, 69, 68, 68, 68, 67, 67, 66, 65, 65, 64, 63, 63, 62, 62, 62,
+ 62, 74, 75, 75, 76, 77, 75, 73, 71, 69, 69, 69, 68, 68, 68, 68, 68, 67,
+ 67, 67, 66, 66, 65, 65, 64, 64, 63, 63, 62, 61, 61, 61, 61, 74, 75, 76,
+ 77, 78, 76, 74, 72, 70, 69, 68, 68, 67, 67, 67, 66, 66, 66, 65, 65, 65,
+ 64, 64, 63, 63, 62, 62, 61, 61, 61, 61, 61, 73, 74, 76, 77, 79, 76, 74,
+ 72, 70, 69, 68, 67, 66, 66, 65, 65, 65, 64, 64, 64, 63, 63, 63, 62, 62,
+ 61, 61, 60, 60, 60, 60, 60, 72, 74, 75, 77, 78, 76, 74, 72, 70, 69, 68,
+ 67, 66, 65, 65, 64, 64, 64, 63, 63, 63, 62, 62, 61, 61, 61, 60, 60, 59,
+ 59, 59, 59, 72, 73, 75, 76, 78, 76, 74, 72, 70, 69, 68, 67, 65, 65, 64,
+ 64, 63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 59, 59, 59, 59, 59, 59, 71,
+ 73, 74, 76, 78, 76, 74, 72, 70, 69, 68, 66, 65, 64, 64, 63, 62, 62, 62,
+ 61, 61, 60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 58, 71, 72, 74, 76, 77,
+ 76, 74, 72, 70, 69, 67, 66, 65, 64, 63, 62, 62, 61, 61, 60, 60, 60, 59,
+ 59, 59, 58, 58, 58, 57, 57, 57, 57, 70, 71, 73, 75, 76, 75, 73, 71, 70,
+ 68, 67, 66, 64, 64, 63, 62, 61, 61, 60, 60, 59, 59, 59, 58, 58, 58, 57,
+ 57, 57, 57, 57, 57, 69, 70, 72, 74, 75, 74, 72, 71, 69, 68, 67, 65, 64,
+ 63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56,
+ 56, 68, 70, 71, 73, 75, 73, 72, 70, 69, 68, 66, 65, 64, 63, 62, 61, 60,
+ 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 67, 69, 70,
+ 72, 74, 72, 71, 70, 68, 67, 66, 65, 63, 63, 62, 61, 60, 59, 59, 58, 58,
+ 57, 57, 57, 56, 56, 56, 55, 55, 55, 55, 55, 66, 68, 69, 71, 73, 71, 70,
+ 69, 68, 67, 65, 64, 63, 62, 61, 60, 60, 59, 58, 58, 57, 57, 57, 56, 56,
+ 56, 55, 55, 55, 55, 55, 55, 65, 67, 68, 70, 71, 70, 69, 68, 67, 66, 65,
+ 64, 63, 62, 61, 60, 59, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 55, 54,
+ 54, 54, 54, 64, 66, 67, 69, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 61,
+ 60, 59, 58, 58, 57, 57, 56, 56, 55, 55, 55, 55, 54, 54, 54, 54, 54, 63,
+ 65, 66, 68, 69, 68, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 59, 58, 57,
+ 57, 56, 56, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 63, 64, 65, 67, 68,
+ 68, 67, 66, 65, 64, 63, 62, 61, 61, 60, 59, 58, 58, 57, 57, 56, 56, 55,
+ 55, 54, 54, 54, 54, 53, 53, 53, 53, 62, 63, 65, 66, 67, 67, 66, 65, 64,
+ 63, 63, 62, 61, 60, 59, 59, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54,
+ 53, 53, 53, 53, 53, 61, 62, 64, 65, 66, 66, 65, 64, 64, 63, 62, 61, 60,
+ 60, 59, 58, 58, 57, 56, 56, 55, 55, 55, 54, 54, 54, 53, 53, 53, 53, 53,
+ 53, 60, 61, 63, 64, 65, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 58, 57,
+ 57, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 60, 61, 63,
+ 64, 65, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 58, 57, 57, 56, 56, 55,
+ 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 60, 61, 63, 64, 65, 65, 64,
+ 64, 63, 62, 61, 61, 60, 59, 59, 58, 57, 57, 56, 56, 55, 55, 54, 54, 54,
+ 53, 53, 53, 52, 52, 52, 52, 60, 61, 63, 64, 65, 65, 64, 64, 63, 62, 61,
+ 61, 60, 59, 59, 58, 57, 57, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53, 52,
+ 52, 52, 52 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 61, 49, 42, 61, 51, 45, 41, 49, 45, 41, 39, 42, 41, 39, 38,
+ /* Size 8 */
+ 64, 74, 71, 62, 55, 50, 47, 45, 74, 69, 70, 65, 58, 53, 49, 46, 71, 70,
+ 60, 56, 53, 50, 47, 45, 62, 65, 56, 51, 49, 47, 45, 44, 55, 58, 53, 49,
+ 46, 45, 44, 43, 50, 53, 50, 47, 45, 44, 43, 42, 47, 49, 47, 45, 44, 43,
+ 42, 42, 45, 46, 45, 44, 43, 42, 42, 41,
+ /* Size 16 */
+ 64, 69, 74, 73, 71, 66, 62, 58, 55, 52, 50, 48, 47, 46, 45, 45, 69, 70,
+ 71, 71, 71, 67, 63, 60, 56, 54, 51, 49, 48, 47, 45, 45, 74, 71, 69, 70,
+ 70, 68, 65, 62, 58, 55, 53, 51, 49, 48, 46, 46, 73, 71, 70, 67, 65, 63,
+ 61, 58, 55, 53, 51, 50, 48, 47, 46, 46, 71, 71, 70, 65, 60, 58, 56, 54,
+ 53, 51, 50, 48, 47, 46, 45, 45, 66, 67, 68, 63, 58, 56, 54, 52, 51, 50,
+ 48, 47, 46, 45, 45, 45, 62, 63, 65, 61, 56, 54, 51, 50, 49, 48, 47, 46,
+ 45, 45, 44, 44, 58, 60, 62, 58, 54, 52, 50, 49, 48, 47, 46, 45, 45, 44,
+ 44, 44, 55, 56, 58, 55, 53, 51, 49, 48, 46, 46, 45, 44, 44, 43, 43, 43,
+ 52, 54, 55, 53, 51, 50, 48, 47, 46, 45, 44, 44, 43, 43, 43, 43, 50, 51,
+ 53, 51, 50, 48, 47, 46, 45, 44, 44, 43, 43, 43, 42, 42, 48, 49, 51, 50,
+ 48, 47, 46, 45, 44, 44, 43, 43, 42, 42, 42, 42, 47, 48, 49, 48, 47, 46,
+ 45, 45, 44, 43, 43, 42, 42, 42, 42, 42, 46, 47, 48, 47, 46, 45, 45, 44,
+ 43, 43, 43, 42, 42, 42, 41, 41, 45, 45, 46, 46, 45, 45, 44, 44, 43, 43,
+ 42, 42, 42, 41, 41, 41, 45, 45, 46, 46, 45, 45, 44, 44, 43, 43, 42, 42,
+ 42, 41, 41, 41,
+ /* Size 32 */
+ 64, 67, 69, 72, 74, 73, 73, 72, 71, 69, 66, 64, 62, 60, 58, 57, 55, 53,
+ 52, 51, 50, 49, 48, 47, 47, 46, 46, 45, 45, 45, 45, 45, 67, 68, 70, 71,
+ 73, 72, 72, 71, 71, 69, 67, 65, 63, 61, 59, 57, 56, 54, 53, 52, 51, 50,
+ 49, 48, 47, 47, 46, 45, 45, 45, 45, 45, 69, 70, 70, 71, 71, 71, 71, 71,
+ 71, 69, 67, 65, 63, 62, 60, 58, 56, 55, 54, 53, 51, 50, 49, 49, 48, 47,
+ 47, 46, 45, 45, 45, 45, 72, 71, 71, 71, 70, 70, 70, 70, 71, 69, 67, 66,
+ 64, 63, 61, 59, 57, 56, 55, 53, 52, 51, 50, 49, 48, 48, 47, 46, 46, 46,
+ 46, 46, 74, 73, 71, 70, 69, 69, 70, 70, 70, 69, 68, 66, 65, 63, 62, 60,
+ 58, 57, 55, 54, 53, 52, 51, 50, 49, 48, 48, 47, 46, 46, 46, 46, 73, 72,
+ 71, 70, 69, 69, 69, 68, 68, 67, 65, 64, 63, 61, 60, 58, 57, 56, 54, 53,
+ 52, 51, 50, 49, 48, 48, 47, 47, 46, 46, 46, 46, 73, 72, 71, 70, 70, 69,
+ 67, 66, 65, 64, 63, 62, 61, 59, 58, 57, 55, 54, 53, 52, 51, 50, 50, 49,
+ 48, 47, 47, 46, 46, 46, 46, 46, 72, 71, 71, 70, 70, 68, 66, 64, 62, 61,
+ 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 50, 49, 48, 48, 47, 47, 46,
+ 45, 45, 45, 45, 71, 71, 71, 71, 70, 68, 65, 62, 60, 59, 58, 57, 56, 55,
+ 54, 54, 53, 52, 51, 50, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 45,
+ 69, 69, 69, 69, 69, 67, 64, 61, 59, 58, 57, 56, 55, 54, 53, 53, 52, 51,
+ 50, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 45, 45, 66, 67, 67, 67,
+ 68, 65, 63, 60, 58, 57, 56, 55, 54, 53, 52, 51, 51, 50, 50, 49, 48, 48,
+ 47, 47, 46, 46, 45, 45, 45, 45, 45, 45, 64, 65, 65, 66, 66, 64, 62, 59,
+ 57, 56, 55, 54, 52, 52, 51, 50, 50, 49, 49, 48, 48, 47, 47, 46, 46, 45,
+ 45, 45, 44, 44, 44, 44, 62, 63, 63, 64, 65, 63, 61, 58, 56, 55, 54, 52,
+ 51, 51, 50, 49, 49, 48, 48, 47, 47, 46, 46, 46, 45, 45, 45, 44, 44, 44,
+ 44, 44, 60, 61, 62, 63, 63, 61, 59, 57, 55, 54, 53, 52, 51, 50, 49, 49,
+ 48, 48, 47, 47, 46, 46, 46, 45, 45, 45, 44, 44, 44, 44, 44, 44, 58, 59,
+ 60, 61, 62, 60, 58, 56, 54, 53, 52, 51, 50, 49, 49, 48, 48, 47, 47, 46,
+ 46, 46, 45, 45, 45, 44, 44, 44, 44, 44, 44, 44, 57, 57, 58, 59, 60, 58,
+ 57, 55, 54, 53, 51, 50, 49, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 45,
+ 44, 44, 44, 44, 43, 43, 43, 43, 55, 56, 56, 57, 58, 57, 55, 54, 53, 52,
+ 51, 50, 49, 48, 48, 47, 46, 46, 46, 45, 45, 45, 44, 44, 44, 44, 43, 43,
+ 43, 43, 43, 43, 53, 54, 55, 56, 57, 56, 54, 53, 52, 51, 50, 49, 48, 48,
+ 47, 47, 46, 46, 45, 45, 45, 44, 44, 44, 44, 43, 43, 43, 43, 43, 43, 43,
+ 52, 53, 54, 55, 55, 54, 53, 52, 51, 50, 50, 49, 48, 47, 47, 46, 46, 45,
+ 45, 45, 44, 44, 44, 44, 43, 43, 43, 43, 43, 43, 43, 43, 51, 52, 53, 53,
+ 54, 53, 52, 51, 50, 50, 49, 48, 47, 47, 46, 46, 45, 45, 45, 44, 44, 44,
+ 44, 43, 43, 43, 43, 43, 42, 42, 42, 42, 50, 51, 51, 52, 53, 52, 51, 50,
+ 50, 49, 48, 48, 47, 46, 46, 45, 45, 45, 44, 44, 44, 43, 43, 43, 43, 43,
+ 43, 42, 42, 42, 42, 42, 49, 50, 50, 51, 52, 51, 50, 50, 49, 48, 48, 47,
+ 46, 46, 46, 45, 45, 44, 44, 44, 43, 43, 43, 43, 43, 43, 42, 42, 42, 42,
+ 42, 42, 48, 49, 49, 50, 51, 50, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45,
+ 44, 44, 44, 44, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 42, 47, 48,
+ 49, 49, 50, 49, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 44, 44, 44, 43,
+ 43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 42, 47, 47, 48, 48, 49, 48,
+ 48, 48, 47, 47, 46, 46, 45, 45, 45, 44, 44, 44, 43, 43, 43, 43, 42, 42,
+ 42, 42, 42, 42, 42, 42, 42, 42, 46, 47, 47, 48, 48, 48, 47, 47, 47, 46,
+ 46, 45, 45, 45, 44, 44, 44, 43, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42,
+ 42, 42, 42, 42, 46, 46, 47, 47, 48, 47, 47, 47, 46, 46, 45, 45, 45, 44,
+ 44, 44, 43, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 41, 41, 41, 41,
+ 45, 45, 46, 46, 47, 47, 46, 46, 46, 45, 45, 45, 44, 44, 44, 44, 43, 43,
+ 43, 43, 42, 42, 42, 42, 42, 42, 42, 41, 41, 41, 41, 41, 45, 45, 45, 46,
+ 46, 46, 46, 45, 45, 45, 45, 44, 44, 44, 44, 43, 43, 43, 43, 42, 42, 42,
+ 42, 42, 42, 42, 41, 41, 41, 41, 41, 41, 45, 45, 45, 46, 46, 46, 46, 45,
+ 45, 45, 45, 44, 44, 44, 44, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42,
+ 41, 41, 41, 41, 41, 41, 45, 45, 45, 46, 46, 46, 46, 45, 45, 45, 45, 44,
+ 44, 44, 44, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 41, 41, 41, 41,
+ 41, 41, 45, 45, 45, 46, 46, 46, 46, 45, 45, 45, 45, 44, 44, 44, 44, 43,
+ 43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 41, 41, 41, 41, 41, 41 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 93, 88, 70, 60, 88, 73, 64, 58, 70, 64, 58, 55, 60, 58, 55, 53,
+ /* Size 8 */
+ 84, 98, 94, 82, 71, 65, 60, 57, 98, 91, 93, 86, 76, 69, 63, 60, 94, 93,
+ 79, 73, 69, 65, 61, 58, 82, 86, 73, 67, 63, 61, 58, 57, 71, 76, 69, 63,
+ 60, 58, 56, 55, 65, 69, 65, 61, 58, 56, 55, 54, 60, 63, 61, 58, 56, 55,
+ 54, 53, 57, 60, 58, 57, 55, 54, 53, 53,
+ /* Size 16 */
+ 86, 93, 100, 98, 96, 89, 83, 78, 73, 69, 66, 63, 61, 60, 58, 58, 93, 94,
+ 96, 96, 95, 90, 85, 80, 75, 71, 68, 65, 63, 61, 59, 59, 100, 96, 93, 94,
+ 95, 91, 87, 82, 77, 74, 70, 67, 64, 62, 61, 61, 98, 96, 94, 90, 87, 84,
+ 81, 77, 74, 71, 68, 65, 63, 61, 60, 60, 96, 95, 95, 87, 80, 77, 74, 72,
+ 70, 68, 66, 64, 62, 61, 59, 59, 89, 90, 91, 84, 77, 74, 71, 69, 67, 65,
+ 64, 62, 61, 59, 58, 58, 83, 85, 87, 81, 74, 71, 68, 66, 64, 63, 62, 60,
+ 59, 58, 58, 58, 78, 80, 82, 77, 72, 69, 66, 64, 62, 61, 60, 59, 58, 58,
+ 57, 57, 73, 75, 77, 74, 70, 67, 64, 62, 61, 60, 59, 58, 57, 57, 56, 56,
+ 69, 71, 74, 71, 68, 65, 63, 61, 60, 59, 58, 57, 57, 56, 56, 56, 66, 68,
+ 70, 68, 66, 64, 62, 60, 59, 58, 57, 56, 56, 55, 55, 55, 63, 65, 67, 65,
+ 64, 62, 60, 59, 58, 57, 56, 56, 55, 55, 55, 55, 61, 63, 64, 63, 62, 61,
+ 59, 58, 57, 57, 56, 55, 55, 55, 54, 54, 60, 61, 62, 61, 61, 59, 58, 58,
+ 57, 56, 55, 55, 55, 54, 54, 54, 58, 59, 61, 60, 59, 58, 58, 57, 56, 56,
+ 55, 55, 54, 54, 54, 54, 58, 59, 61, 60, 59, 58, 58, 57, 56, 56, 55, 55,
+ 54, 54, 54, 54,
+ /* Size 32 */
+ 86, 90, 93, 97, 101, 100, 98, 97, 96, 93, 90, 87, 83, 81, 78, 76, 73,
+ 71, 70, 68, 66, 65, 64, 63, 62, 61, 60, 59, 59, 59, 59, 59, 90, 92, 94,
+ 97, 99, 98, 97, 97, 96, 93, 90, 87, 84, 82, 79, 77, 74, 73, 71, 69, 67,
+ 66, 65, 64, 62, 62, 61, 60, 59, 59, 59, 59, 93, 94, 95, 96, 97, 97, 96,
+ 96, 96, 93, 91, 88, 86, 83, 81, 78, 76, 74, 72, 70, 68, 67, 66, 64, 63,
+ 62, 62, 61, 60, 60, 60, 60, 97, 97, 96, 96, 95, 95, 95, 96, 96, 93, 91,
+ 89, 87, 84, 82, 79, 77, 75, 73, 71, 69, 68, 67, 65, 64, 63, 62, 61, 60,
+ 60, 60, 60, 101, 99, 97, 95, 93, 94, 94, 95, 95, 94, 92, 90, 88, 85, 83,
+ 80, 78, 76, 74, 72, 70, 69, 68, 66, 65, 64, 63, 62, 61, 61, 61, 61, 100,
+ 98, 97, 95, 94, 93, 93, 92, 92, 90, 88, 86, 85, 82, 80, 78, 76, 74, 73,
+ 71, 69, 68, 67, 65, 64, 63, 62, 62, 61, 61, 61, 61, 98, 97, 96, 95, 94,
+ 93, 91, 90, 88, 86, 85, 83, 81, 80, 78, 76, 74, 73, 71, 70, 68, 67, 66,
+ 65, 64, 63, 62, 61, 60, 60, 60, 60, 97, 97, 96, 96, 95, 92, 90, 87, 84,
+ 83, 81, 80, 78, 77, 75, 74, 72, 71, 70, 68, 67, 66, 65, 64, 63, 62, 61,
+ 61, 60, 60, 60, 60, 96, 96, 96, 96, 95, 92, 88, 84, 80, 79, 78, 76, 75,
+ 74, 73, 72, 70, 69, 68, 67, 66, 65, 64, 63, 62, 62, 61, 60, 60, 60, 60,
+ 60, 93, 93, 93, 93, 94, 90, 86, 83, 79, 78, 76, 75, 73, 72, 71, 70, 69,
+ 68, 67, 66, 65, 64, 63, 63, 62, 61, 60, 60, 59, 59, 59, 59, 90, 90, 91,
+ 91, 92, 88, 85, 81, 78, 76, 75, 73, 72, 71, 70, 69, 67, 67, 66, 65, 64,
+ 63, 63, 62, 61, 61, 60, 59, 59, 59, 59, 59, 87, 87, 88, 89, 90, 86, 83,
+ 80, 76, 75, 73, 72, 70, 69, 68, 67, 66, 65, 65, 64, 63, 62, 62, 61, 60,
+ 60, 59, 59, 58, 58, 58, 58, 83, 84, 86, 87, 88, 85, 81, 78, 75, 73, 72,
+ 70, 68, 67, 66, 66, 65, 64, 63, 63, 62, 61, 61, 60, 60, 59, 59, 58, 58,
+ 58, 58, 58, 81, 82, 83, 84, 85, 82, 80, 77, 74, 72, 71, 69, 67, 66, 66,
+ 65, 64, 63, 63, 62, 61, 61, 60, 60, 59, 59, 58, 58, 58, 58, 58, 58, 78,
+ 79, 81, 82, 83, 80, 78, 75, 73, 71, 70, 68, 66, 66, 65, 64, 63, 62, 62,
+ 61, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 76, 77, 78, 79, 80,
+ 78, 76, 74, 72, 70, 69, 67, 66, 65, 64, 63, 62, 62, 61, 60, 60, 59, 59,
+ 59, 58, 58, 58, 57, 57, 57, 57, 57, 73, 74, 76, 77, 78, 76, 74, 72, 70,
+ 69, 67, 66, 65, 64, 63, 62, 61, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57,
+ 57, 57, 57, 57, 57, 71, 73, 74, 75, 76, 74, 73, 71, 69, 68, 67, 65, 64,
+ 63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56,
+ 56, 70, 71, 72, 73, 74, 73, 71, 70, 68, 67, 66, 65, 63, 63, 62, 61, 60,
+ 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 68, 69, 70,
+ 71, 72, 71, 70, 68, 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 59, 58, 58,
+ 58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 66, 67, 68, 69, 70, 69, 68,
+ 67, 66, 65, 64, 63, 62, 61, 61, 60, 59, 59, 58, 58, 57, 57, 57, 57, 56,
+ 56, 56, 56, 55, 55, 55, 55, 65, 66, 67, 68, 69, 68, 67, 66, 65, 64, 63,
+ 62, 61, 61, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 55, 55,
+ 55, 55, 55, 64, 65, 66, 67, 68, 67, 66, 65, 64, 63, 63, 62, 61, 60, 60,
+ 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 63,
+ 64, 64, 65, 66, 65, 65, 64, 63, 63, 62, 61, 60, 60, 59, 59, 58, 58, 57,
+ 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 62, 62, 63, 64, 65,
+ 64, 64, 63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56,
+ 56, 55, 55, 55, 55, 55, 55, 55, 55, 61, 62, 62, 63, 64, 63, 63, 62, 62,
+ 61, 61, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55,
+ 55, 54, 54, 54, 54, 60, 61, 62, 62, 63, 62, 62, 61, 61, 60, 60, 59, 59,
+ 58, 58, 58, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 54,
+ 54, 59, 60, 61, 61, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57,
+ 57, 56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 59, 59, 60,
+ 60, 61, 61, 60, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 55,
+ 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 59, 59, 60, 60, 61, 61, 60,
+ 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 55, 55,
+ 54, 54, 54, 54, 54, 54, 54, 59, 59, 60, 60, 61, 61, 60, 60, 60, 59, 59,
+ 58, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 54,
+ 54, 54, 54, 59, 59, 60, 60, 61, 61, 60, 60, 60, 59, 59, 58, 58, 58, 57,
+ 57, 57, 56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 51, 50, 45, 51, 48, 47, 44, 50, 47, 43, 41, 45, 44, 41, 40,
+ /* Size 8 */
+ 64, 72, 54, 52, 51, 49, 47, 45, 72, 59, 53, 55, 55, 53, 50, 48, 54, 53,
+ 50, 51, 51, 50, 48, 47, 52, 55, 51, 48, 48, 47, 46, 45, 51, 55, 51, 48,
+ 46, 45, 44, 43, 49, 53, 50, 47, 45, 44, 43, 42, 47, 50, 48, 46, 44, 43,
+ 42, 41, 45, 48, 47, 45, 43, 42, 41, 41,
+ /* Size 16 */
+ 64, 68, 72, 63, 54, 53, 52, 52, 51, 50, 49, 48, 47, 46, 45, 45, 68, 67,
+ 66, 59, 53, 54, 54, 53, 53, 52, 51, 50, 48, 47, 46, 46, 72, 66, 59, 56,
+ 53, 54, 55, 55, 55, 54, 53, 51, 50, 49, 48, 48, 63, 59, 56, 54, 52, 52,
+ 53, 53, 53, 52, 51, 50, 49, 48, 47, 47, 54, 53, 53, 52, 50, 50, 51, 51,
+ 51, 50, 50, 49, 48, 47, 47, 47, 53, 54, 54, 52, 50, 50, 49, 49, 49, 49,
+ 48, 48, 47, 46, 46, 46, 52, 54, 55, 53, 51, 49, 48, 48, 48, 47, 47, 46,
+ 46, 45, 45, 45, 52, 53, 55, 53, 51, 49, 48, 47, 47, 46, 46, 45, 45, 45,
+ 44, 44, 51, 53, 55, 53, 51, 49, 48, 47, 46, 45, 45, 44, 44, 44, 43, 43,
+ 50, 52, 54, 52, 50, 49, 47, 46, 45, 45, 44, 44, 43, 43, 43, 43, 49, 51,
+ 53, 51, 50, 48, 47, 46, 45, 44, 44, 43, 43, 42, 42, 42, 48, 50, 51, 50,
+ 49, 48, 46, 45, 44, 44, 43, 43, 42, 42, 42, 42, 47, 48, 50, 49, 48, 47,
+ 46, 45, 44, 43, 43, 42, 42, 42, 41, 41, 46, 47, 49, 48, 47, 46, 45, 45,
+ 44, 43, 42, 42, 42, 41, 41, 41, 45, 46, 48, 47, 47, 46, 45, 44, 43, 43,
+ 42, 42, 41, 41, 41, 41, 45, 46, 48, 47, 47, 46, 45, 44, 43, 43, 42, 42,
+ 41, 41, 41, 41,
+ /* Size 32 */
+ 64, 66, 68, 70, 72, 67, 63, 58, 54, 53, 53, 53, 52, 52, 52, 51, 51, 50,
+ 50, 49, 49, 48, 48, 47, 47, 46, 46, 45, 45, 45, 45, 45, 66, 67, 67, 68,
+ 69, 65, 61, 57, 53, 53, 53, 53, 53, 53, 52, 52, 52, 51, 51, 50, 50, 49,
+ 49, 48, 48, 47, 47, 46, 46, 46, 46, 46, 68, 67, 67, 66, 66, 63, 59, 56,
+ 53, 53, 54, 54, 54, 54, 53, 53, 53, 52, 52, 51, 51, 50, 50, 49, 48, 48,
+ 47, 47, 46, 46, 46, 46, 70, 68, 66, 64, 62, 60, 58, 56, 53, 54, 54, 54,
+ 55, 54, 54, 54, 54, 53, 53, 52, 52, 51, 50, 50, 49, 49, 48, 48, 47, 47,
+ 47, 47, 72, 69, 66, 62, 59, 58, 56, 55, 53, 54, 54, 55, 55, 55, 55, 55,
+ 55, 54, 54, 53, 53, 52, 51, 51, 50, 50, 49, 48, 48, 48, 48, 48, 67, 65,
+ 63, 60, 58, 56, 55, 54, 52, 53, 53, 54, 54, 54, 54, 54, 54, 53, 53, 52,
+ 52, 51, 51, 50, 50, 49, 49, 48, 48, 48, 48, 48, 63, 61, 59, 58, 56, 55,
+ 54, 53, 52, 52, 52, 53, 53, 53, 53, 53, 53, 52, 52, 52, 51, 51, 50, 50,
+ 49, 49, 48, 48, 47, 47, 47, 47, 58, 57, 56, 56, 55, 54, 53, 52, 51, 51,
+ 51, 52, 52, 52, 52, 52, 52, 51, 51, 51, 50, 50, 50, 49, 49, 48, 48, 47,
+ 47, 47, 47, 47, 54, 53, 53, 53, 53, 52, 52, 51, 50, 50, 50, 50, 51, 51,
+ 51, 51, 51, 50, 50, 50, 50, 49, 49, 49, 48, 48, 47, 47, 47, 47, 47, 47,
+ 53, 53, 53, 54, 54, 53, 52, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 49, 49, 49, 49, 48, 48, 48, 47, 47, 46, 46, 46, 46, 46, 53, 53, 54, 54,
+ 54, 53, 52, 51, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 49, 48, 48, 48,
+ 48, 47, 47, 47, 46, 46, 46, 46, 46, 46, 53, 53, 54, 54, 55, 54, 53, 52,
+ 50, 50, 50, 49, 49, 49, 49, 48, 48, 48, 48, 48, 48, 47, 47, 47, 46, 46,
+ 46, 46, 45, 45, 45, 45, 52, 53, 54, 55, 55, 54, 53, 52, 51, 50, 49, 49,
+ 48, 48, 48, 48, 48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 45, 45, 45, 45,
+ 45, 45, 52, 53, 54, 54, 55, 54, 53, 52, 51, 50, 49, 49, 48, 48, 48, 47,
+ 47, 47, 47, 47, 46, 46, 46, 46, 45, 45, 45, 45, 44, 44, 44, 44, 52, 52,
+ 53, 54, 55, 54, 53, 52, 51, 50, 49, 49, 48, 48, 47, 47, 47, 46, 46, 46,
+ 46, 46, 45, 45, 45, 45, 45, 44, 44, 44, 44, 44, 51, 52, 53, 54, 55, 54,
+ 53, 52, 51, 50, 49, 48, 48, 47, 47, 47, 46, 46, 46, 46, 45, 45, 45, 45,
+ 45, 44, 44, 44, 44, 44, 44, 44, 51, 52, 53, 54, 55, 54, 53, 52, 51, 50,
+ 49, 48, 48, 47, 47, 46, 46, 46, 45, 45, 45, 45, 44, 44, 44, 44, 44, 44,
+ 43, 43, 43, 43, 50, 51, 52, 53, 54, 53, 52, 51, 50, 50, 49, 48, 47, 47,
+ 46, 46, 46, 45, 45, 45, 45, 44, 44, 44, 44, 44, 43, 43, 43, 43, 43, 43,
+ 50, 51, 52, 53, 54, 53, 52, 51, 50, 49, 49, 48, 47, 47, 46, 46, 45, 45,
+ 45, 44, 44, 44, 44, 44, 43, 43, 43, 43, 43, 43, 43, 43, 49, 50, 51, 52,
+ 53, 52, 52, 51, 50, 49, 48, 48, 47, 47, 46, 46, 45, 45, 44, 44, 44, 44,
+ 43, 43, 43, 43, 43, 43, 42, 42, 42, 42, 49, 50, 51, 52, 53, 52, 51, 50,
+ 50, 49, 48, 48, 47, 46, 46, 45, 45, 45, 44, 44, 44, 43, 43, 43, 43, 43,
+ 42, 42, 42, 42, 42, 42, 48, 49, 50, 51, 52, 51, 51, 50, 49, 49, 48, 47,
+ 47, 46, 46, 45, 45, 44, 44, 44, 43, 43, 43, 43, 43, 42, 42, 42, 42, 42,
+ 42, 42, 48, 49, 50, 50, 51, 51, 50, 50, 49, 48, 48, 47, 46, 46, 45, 45,
+ 44, 44, 44, 43, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42, 42, 42, 47, 48,
+ 49, 50, 51, 50, 50, 49, 49, 48, 47, 47, 46, 46, 45, 45, 44, 44, 44, 43,
+ 43, 43, 43, 42, 42, 42, 42, 42, 41, 41, 41, 41, 47, 48, 48, 49, 50, 50,
+ 49, 49, 48, 48, 47, 46, 46, 45, 45, 45, 44, 44, 43, 43, 43, 43, 42, 42,
+ 42, 42, 42, 41, 41, 41, 41, 41, 46, 47, 48, 49, 50, 49, 49, 48, 48, 47,
+ 47, 46, 46, 45, 45, 44, 44, 44, 43, 43, 43, 42, 42, 42, 42, 42, 41, 41,
+ 41, 41, 41, 41, 46, 47, 47, 48, 49, 49, 48, 48, 47, 47, 46, 46, 45, 45,
+ 45, 44, 44, 43, 43, 43, 42, 42, 42, 42, 42, 41, 41, 41, 41, 41, 41, 41,
+ 45, 46, 47, 48, 48, 48, 48, 47, 47, 46, 46, 46, 45, 45, 44, 44, 44, 43,
+ 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41, 45, 46, 46, 47,
+ 48, 48, 47, 47, 47, 46, 46, 45, 45, 44, 44, 44, 43, 43, 43, 42, 42, 42,
+ 42, 41, 41, 41, 41, 41, 41, 41, 41, 41, 45, 46, 46, 47, 48, 48, 47, 47,
+ 47, 46, 46, 45, 45, 44, 44, 44, 43, 43, 43, 42, 42, 42, 42, 41, 41, 41,
+ 41, 41, 41, 41, 41, 41, 45, 46, 46, 47, 48, 48, 47, 47, 47, 46, 46, 45,
+ 45, 44, 44, 44, 43, 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 41, 41, 41,
+ 41, 41, 45, 46, 46, 47, 48, 48, 47, 47, 47, 46, 46, 45, 45, 44, 44, 44,
+ 43, 43, 43, 42, 42, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41, 41 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 90, 72, 69, 63, 72, 66, 65, 62, 69, 65, 59, 57, 63, 62, 57, 54,
+ /* Size 8 */
+ 87, 98, 72, 70, 68, 65, 62, 60, 98, 80, 71, 74, 74, 70, 67, 64, 72, 71,
+ 67, 68, 68, 66, 64, 62, 70, 74, 68, 64, 63, 62, 61, 59, 68, 74, 68, 63,
+ 61, 59, 58, 57, 65, 70, 66, 62, 59, 58, 57, 56, 62, 67, 64, 61, 58, 57,
+ 55, 54, 60, 64, 62, 59, 57, 56, 54, 53,
+ /* Size 16 */
+ 88, 93, 99, 86, 73, 72, 71, 70, 69, 67, 66, 64, 63, 62, 60, 60, 93, 92,
+ 90, 81, 72, 73, 73, 72, 72, 70, 69, 67, 65, 64, 62, 62, 99, 90, 81, 76,
+ 72, 74, 75, 75, 74, 73, 71, 69, 68, 66, 65, 65, 86, 81, 76, 73, 70, 71,
+ 72, 72, 71, 70, 69, 68, 66, 65, 64, 64, 73, 72, 72, 70, 67, 68, 68, 68,
+ 68, 68, 67, 66, 65, 64, 63, 63, 72, 73, 74, 71, 68, 67, 67, 66, 66, 66,
+ 65, 64, 63, 62, 61, 61, 71, 73, 75, 72, 68, 67, 65, 65, 64, 63, 63, 62,
+ 62, 61, 60, 60, 70, 72, 75, 72, 68, 66, 65, 64, 63, 62, 62, 61, 60, 60,
+ 59, 59, 69, 72, 74, 71, 68, 66, 64, 63, 61, 61, 60, 60, 59, 58, 58, 58,
+ 67, 70, 73, 70, 68, 66, 63, 62, 61, 60, 59, 59, 58, 58, 57, 57, 66, 69,
+ 71, 69, 67, 65, 63, 62, 60, 59, 58, 58, 57, 57, 56, 56, 64, 67, 69, 68,
+ 66, 64, 62, 61, 60, 59, 58, 57, 56, 56, 56, 56, 63, 65, 68, 66, 65, 63,
+ 62, 60, 59, 58, 57, 56, 56, 55, 55, 55, 62, 64, 66, 65, 64, 62, 61, 60,
+ 58, 58, 57, 56, 55, 55, 54, 54, 60, 62, 65, 64, 63, 61, 60, 59, 58, 57,
+ 56, 56, 55, 54, 54, 54, 60, 62, 65, 64, 63, 61, 60, 59, 58, 57, 56, 56,
+ 55, 54, 54, 54,
+ /* Size 32 */
+ 88, 91, 94, 97, 100, 93, 86, 80, 73, 73, 72, 72, 71, 71, 70, 70, 69, 68,
+ 68, 67, 66, 65, 65, 64, 63, 63, 62, 61, 61, 61, 61, 61, 91, 92, 93, 94,
+ 95, 89, 84, 78, 73, 73, 73, 72, 72, 72, 71, 71, 71, 70, 69, 68, 68, 67,
+ 66, 65, 64, 64, 63, 62, 62, 62, 62, 62, 94, 93, 92, 91, 90, 86, 82, 77,
+ 73, 73, 73, 73, 73, 73, 73, 72, 72, 71, 70, 70, 69, 68, 67, 66, 66, 65,
+ 64, 63, 63, 63, 63, 63, 97, 94, 91, 89, 86, 83, 79, 76, 73, 73, 74, 74,
+ 75, 74, 74, 74, 73, 73, 72, 71, 70, 69, 69, 68, 67, 66, 65, 65, 64, 64,
+ 64, 64, 100, 95, 90, 86, 81, 79, 77, 75, 72, 73, 74, 75, 76, 75, 75, 75,
+ 75, 74, 73, 72, 72, 71, 70, 69, 68, 67, 66, 66, 65, 65, 65, 65, 93, 89,
+ 86, 83, 79, 77, 75, 73, 71, 72, 73, 73, 74, 74, 74, 73, 73, 73, 72, 71,
+ 71, 70, 69, 68, 67, 67, 66, 65, 64, 64, 64, 64, 86, 84, 82, 79, 77, 75,
+ 73, 72, 70, 71, 71, 72, 72, 72, 72, 72, 72, 71, 71, 70, 70, 69, 68, 67,
+ 67, 66, 65, 65, 64, 64, 64, 64, 80, 78, 77, 76, 75, 73, 72, 70, 69, 69,
+ 70, 70, 70, 70, 70, 70, 70, 70, 69, 69, 68, 68, 67, 67, 66, 65, 65, 64,
+ 63, 63, 63, 63, 73, 73, 73, 73, 72, 71, 70, 69, 68, 68, 68, 68, 69, 69,
+ 69, 69, 69, 68, 68, 68, 67, 67, 66, 66, 65, 65, 64, 63, 63, 63, 63, 63,
+ 73, 73, 73, 73, 73, 72, 71, 69, 68, 68, 68, 68, 68, 68, 68, 68, 68, 67,
+ 67, 67, 66, 66, 65, 65, 64, 64, 63, 63, 62, 62, 62, 62, 72, 73, 73, 74,
+ 74, 73, 71, 70, 68, 68, 68, 67, 67, 67, 67, 67, 67, 66, 66, 66, 65, 65,
+ 64, 64, 64, 63, 63, 62, 62, 62, 62, 62, 72, 72, 73, 74, 75, 73, 72, 70,
+ 68, 68, 67, 67, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 63, 63, 62,
+ 62, 62, 61, 61, 61, 61, 71, 72, 73, 75, 76, 74, 72, 70, 69, 68, 67, 66,
+ 65, 65, 65, 65, 64, 64, 64, 64, 63, 63, 63, 62, 62, 62, 61, 61, 60, 60,
+ 60, 60, 71, 72, 73, 74, 75, 74, 72, 70, 69, 68, 67, 66, 65, 65, 64, 64,
+ 64, 63, 63, 63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 60, 70, 71,
+ 73, 74, 75, 74, 72, 70, 69, 68, 67, 66, 65, 64, 64, 64, 63, 63, 62, 62,
+ 62, 62, 61, 61, 61, 60, 60, 60, 59, 59, 59, 59, 70, 71, 72, 74, 75, 73,
+ 72, 70, 69, 68, 67, 66, 65, 64, 64, 63, 62, 62, 62, 61, 61, 61, 61, 60,
+ 60, 60, 59, 59, 59, 59, 59, 59, 69, 71, 72, 73, 75, 73, 72, 70, 69, 68,
+ 67, 65, 64, 64, 63, 62, 62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59,
+ 58, 58, 58, 58, 68, 70, 71, 73, 74, 73, 71, 70, 68, 67, 66, 65, 64, 63,
+ 63, 62, 61, 61, 61, 60, 60, 60, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58,
+ 68, 69, 70, 72, 73, 72, 71, 69, 68, 67, 66, 65, 64, 63, 62, 62, 61, 61,
+ 60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 67, 68, 70, 71,
+ 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 61, 60, 60, 60, 59, 59,
+ 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 66, 68, 69, 70, 72, 71, 70, 68,
+ 67, 66, 65, 64, 63, 63, 62, 61, 60, 60, 60, 59, 59, 58, 58, 58, 57, 57,
+ 57, 57, 57, 57, 57, 57, 65, 67, 68, 69, 71, 70, 69, 68, 67, 66, 65, 64,
+ 63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56,
+ 56, 56, 65, 66, 67, 69, 70, 69, 68, 67, 66, 65, 64, 64, 63, 62, 61, 61,
+ 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 64, 65,
+ 66, 68, 69, 68, 67, 67, 66, 65, 64, 63, 62, 62, 61, 60, 60, 59, 59, 58,
+ 58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 63, 64, 66, 67, 68, 67,
+ 67, 66, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 58, 58, 57, 57, 57, 56,
+ 56, 56, 56, 55, 55, 55, 55, 55, 63, 64, 65, 66, 67, 67, 66, 65, 65, 64,
+ 63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55,
+ 55, 55, 55, 55, 62, 63, 64, 65, 66, 66, 65, 65, 64, 63, 63, 62, 61, 61,
+ 60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55,
+ 61, 62, 63, 65, 66, 65, 65, 64, 63, 63, 62, 62, 61, 60, 60, 59, 59, 58,
+ 58, 57, 57, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 61, 62, 63, 64,
+ 65, 64, 64, 63, 63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 57, 57, 57, 56,
+ 56, 56, 55, 55, 55, 55, 54, 54, 54, 54, 61, 62, 63, 64, 65, 64, 64, 63,
+ 63, 62, 62, 61, 60, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55,
+ 55, 55, 54, 54, 54, 54, 61, 62, 63, 64, 65, 64, 64, 63, 63, 62, 62, 61,
+ 60, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 55, 54, 54,
+ 54, 54, 61, 62, 63, 64, 65, 64, 64, 63, 63, 62, 62, 61, 60, 60, 59, 59,
+ 58, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 55, 54, 54, 54, 54 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 62, 51, 46, 62, 53, 48, 45, 51, 48, 45, 43, 46, 45, 43, 42,
+ /* Size 8 */
+ 64, 72, 70, 62, 56, 52, 50, 48, 72, 68, 69, 65, 59, 55, 52, 49, 70, 69,
+ 61, 57, 55, 52, 50, 49, 62, 65, 57, 53, 51, 50, 49, 48, 56, 59, 55, 51,
+ 49, 48, 47, 47, 52, 55, 52, 50, 48, 47, 47, 46, 50, 52, 50, 49, 47, 47,
+ 46, 46, 48, 49, 49, 48, 47, 46, 46, 45,
+ /* Size 16 */
+ 64, 68, 72, 71, 70, 66, 62, 59, 56, 54, 52, 51, 50, 49, 48, 48, 68, 69,
+ 70, 70, 70, 67, 64, 61, 58, 56, 54, 52, 51, 50, 49, 49, 72, 70, 68, 69,
+ 69, 67, 65, 62, 59, 57, 55, 53, 52, 50, 49, 49, 71, 70, 69, 67, 65, 63,
+ 61, 59, 57, 55, 53, 52, 51, 50, 49, 49, 70, 70, 69, 65, 61, 59, 57, 56,
+ 55, 53, 52, 51, 50, 49, 49, 49, 66, 67, 67, 63, 59, 57, 55, 54, 53, 52,
+ 51, 50, 49, 49, 48, 48, 62, 64, 65, 61, 57, 55, 53, 52, 51, 51, 50, 49,
+ 49, 48, 48, 48, 59, 61, 62, 59, 56, 54, 52, 51, 50, 50, 49, 49, 48, 48,
+ 47, 47, 56, 58, 59, 57, 55, 53, 51, 50, 49, 49, 48, 48, 47, 47, 47, 47,
+ 54, 56, 57, 55, 53, 52, 51, 50, 49, 48, 48, 47, 47, 47, 46, 46, 52, 54,
+ 55, 53, 52, 51, 50, 49, 48, 48, 47, 47, 47, 46, 46, 46, 51, 52, 53, 52,
+ 51, 50, 49, 49, 48, 47, 47, 47, 46, 46, 46, 46, 50, 51, 52, 51, 50, 49,
+ 49, 48, 47, 47, 47, 46, 46, 46, 46, 46, 49, 50, 50, 50, 49, 49, 48, 48,
+ 47, 47, 46, 46, 46, 46, 45, 45, 48, 49, 49, 49, 49, 48, 48, 47, 47, 46,
+ 46, 46, 46, 45, 45, 45, 48, 49, 49, 49, 49, 48, 48, 47, 47, 46, 46, 46,
+ 46, 45, 45, 45,
+ /* Size 32 */
+ 64, 66, 68, 70, 72, 72, 71, 70, 70, 68, 66, 64, 62, 61, 59, 58, 56, 55,
+ 54, 53, 52, 52, 51, 50, 50, 49, 49, 48, 48, 48, 48, 48, 66, 67, 69, 70,
+ 71, 71, 70, 70, 70, 68, 66, 65, 63, 61, 60, 59, 57, 56, 55, 54, 53, 52,
+ 52, 51, 50, 50, 49, 49, 48, 48, 48, 48, 68, 69, 69, 70, 70, 70, 70, 70,
+ 70, 68, 67, 65, 64, 62, 61, 59, 58, 57, 56, 55, 54, 53, 52, 51, 51, 50,
+ 50, 49, 49, 49, 49, 49, 70, 70, 70, 69, 69, 69, 69, 69, 69, 68, 67, 66,
+ 64, 63, 61, 60, 58, 57, 56, 55, 54, 53, 53, 52, 51, 51, 50, 49, 49, 49,
+ 49, 49, 72, 71, 70, 69, 68, 68, 69, 69, 69, 68, 67, 66, 65, 63, 62, 61,
+ 59, 58, 57, 56, 55, 54, 53, 52, 52, 51, 50, 50, 49, 49, 49, 49, 72, 71,
+ 70, 69, 68, 68, 68, 67, 67, 66, 65, 64, 63, 62, 61, 59, 58, 57, 56, 55,
+ 54, 53, 53, 52, 51, 51, 50, 50, 49, 49, 49, 49, 71, 70, 70, 69, 69, 68,
+ 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 53, 52, 51,
+ 51, 50, 50, 49, 49, 49, 49, 49, 70, 70, 70, 69, 69, 67, 66, 64, 63, 62,
+ 61, 60, 59, 58, 58, 57, 56, 55, 54, 54, 53, 52, 52, 51, 50, 50, 50, 49,
+ 49, 49, 49, 49, 70, 70, 70, 69, 69, 67, 65, 63, 61, 60, 59, 58, 57, 57,
+ 56, 55, 55, 54, 53, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 49, 49, 49,
+ 68, 68, 68, 68, 68, 66, 64, 62, 60, 59, 58, 57, 56, 56, 55, 55, 54, 53,
+ 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 48, 48, 48, 48, 66, 66, 67, 67,
+ 67, 65, 63, 61, 59, 58, 57, 56, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51,
+ 50, 50, 49, 49, 49, 48, 48, 48, 48, 48, 64, 65, 65, 66, 66, 64, 62, 60,
+ 58, 57, 56, 55, 54, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50, 49, 49, 49,
+ 48, 48, 48, 48, 48, 48, 62, 63, 64, 64, 65, 63, 61, 59, 57, 56, 55, 54,
+ 53, 53, 52, 52, 51, 51, 51, 50, 50, 50, 49, 49, 49, 48, 48, 48, 48, 48,
+ 48, 48, 61, 61, 62, 63, 63, 62, 60, 58, 57, 56, 55, 54, 53, 52, 52, 51,
+ 51, 51, 50, 50, 49, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 59, 60,
+ 61, 61, 62, 61, 59, 58, 56, 55, 54, 53, 52, 52, 51, 51, 50, 50, 50, 49,
+ 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 47, 58, 59, 59, 60, 61, 59,
+ 58, 57, 55, 55, 54, 53, 52, 51, 51, 50, 50, 50, 49, 49, 49, 48, 48, 48,
+ 48, 48, 47, 47, 47, 47, 47, 47, 56, 57, 58, 58, 59, 58, 57, 56, 55, 54,
+ 53, 52, 51, 51, 50, 50, 49, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47,
+ 47, 47, 47, 47, 55, 56, 57, 57, 58, 57, 56, 55, 54, 53, 53, 52, 51, 51,
+ 50, 50, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 47, 47, 47, 47, 47,
+ 54, 55, 56, 56, 57, 56, 55, 54, 53, 53, 52, 51, 51, 50, 50, 49, 49, 49,
+ 48, 48, 48, 48, 47, 47, 47, 47, 47, 47, 46, 46, 46, 46, 53, 54, 55, 55,
+ 56, 55, 54, 54, 53, 52, 52, 51, 50, 50, 49, 49, 49, 48, 48, 48, 48, 47,
+ 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 52, 53, 54, 54, 55, 54, 53, 53,
+ 52, 52, 51, 50, 50, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 47, 46,
+ 46, 46, 46, 46, 46, 46, 52, 52, 53, 53, 54, 53, 53, 52, 52, 51, 51, 50,
+ 50, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46,
+ 46, 46, 51, 52, 52, 53, 53, 53, 52, 52, 51, 51, 50, 50, 49, 49, 49, 48,
+ 48, 48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 50, 51,
+ 51, 52, 52, 52, 51, 51, 51, 50, 50, 49, 49, 49, 48, 48, 48, 47, 47, 47,
+ 47, 47, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 50, 50, 51, 51, 52, 51,
+ 51, 50, 50, 50, 49, 49, 49, 48, 48, 48, 47, 47, 47, 47, 47, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 46, 46, 49, 50, 50, 51, 51, 51, 50, 50, 50, 49,
+ 49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 46, 46, 49, 49, 50, 50, 50, 50, 50, 50, 49, 49, 49, 48, 48, 48,
+ 48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46, 45, 45, 45, 45,
+ 48, 49, 49, 49, 50, 50, 49, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47,
+ 47, 46, 46, 46, 46, 46, 46, 46, 46, 45, 45, 45, 45, 45, 48, 48, 49, 49,
+ 49, 49, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47, 47, 47, 46, 46, 46, 46,
+ 46, 46, 46, 46, 45, 45, 45, 45, 45, 45, 48, 48, 49, 49, 49, 49, 49, 49,
+ 49, 48, 48, 48, 48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46,
+ 45, 45, 45, 45, 45, 45, 48, 48, 49, 49, 49, 49, 49, 49, 49, 48, 48, 48,
+ 48, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46, 45, 45, 45, 45,
+ 45, 45, 48, 48, 49, 49, 49, 49, 49, 49, 49, 48, 48, 48, 48, 47, 47, 47,
+ 47, 47, 46, 46, 46, 46, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 86, 83, 68, 60, 83, 70, 64, 59, 68, 64, 59, 57, 60, 59, 57, 55,
+ /* Size 8 */
+ 79, 90, 87, 77, 69, 64, 61, 58, 90, 85, 86, 80, 73, 67, 63, 60, 87, 86,
+ 75, 71, 67, 64, 61, 59, 77, 80, 71, 66, 63, 61, 59, 58, 69, 73, 67, 63,
+ 60, 59, 58, 57, 64, 67, 64, 61, 59, 57, 57, 56, 61, 63, 61, 59, 58, 57,
+ 56, 55, 58, 60, 59, 58, 57, 56, 55, 55,
+ /* Size 16 */
+ 80, 86, 91, 90, 88, 83, 78, 74, 70, 68, 65, 63, 61, 60, 59, 59, 86, 87,
+ 89, 88, 88, 84, 80, 76, 72, 69, 66, 65, 63, 61, 60, 60, 91, 89, 86, 87,
+ 87, 84, 82, 78, 74, 71, 68, 66, 64, 62, 61, 61, 90, 88, 87, 84, 82, 79,
+ 77, 74, 71, 69, 66, 65, 63, 62, 60, 60, 88, 88, 87, 82, 76, 74, 72, 70,
+ 68, 66, 65, 63, 62, 61, 60, 60, 83, 84, 84, 79, 74, 71, 69, 68, 66, 65,
+ 63, 62, 61, 60, 59, 59, 78, 80, 82, 77, 72, 69, 66, 65, 64, 63, 62, 61,
+ 60, 59, 59, 59, 74, 76, 78, 74, 70, 68, 65, 64, 62, 62, 61, 60, 59, 59,
+ 58, 58, 70, 72, 74, 71, 68, 66, 64, 62, 61, 60, 60, 59, 58, 58, 57, 57,
+ 68, 69, 71, 69, 66, 65, 63, 62, 60, 60, 59, 58, 58, 57, 57, 57, 65, 66,
+ 68, 66, 65, 63, 62, 61, 60, 59, 58, 58, 57, 57, 57, 57, 63, 65, 66, 65,
+ 63, 62, 61, 60, 59, 58, 58, 57, 57, 57, 56, 56, 61, 63, 64, 63, 62, 61,
+ 60, 59, 58, 58, 57, 57, 57, 56, 56, 56, 60, 61, 62, 62, 61, 60, 59, 59,
+ 58, 57, 57, 57, 56, 56, 56, 56, 59, 60, 61, 60, 60, 59, 59, 58, 57, 57,
+ 57, 56, 56, 56, 56, 56, 59, 60, 61, 60, 60, 59, 59, 58, 57, 57, 57, 56,
+ 56, 56, 56, 56,
+ /* Size 32 */
+ 81, 84, 86, 89, 92, 91, 90, 89, 89, 86, 84, 81, 79, 77, 75, 73, 71, 69,
+ 68, 67, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 59, 59, 84, 85, 87, 89,
+ 91, 90, 89, 89, 88, 86, 84, 82, 79, 78, 76, 74, 72, 70, 69, 67, 66, 65,
+ 64, 63, 62, 62, 61, 61, 60, 60, 60, 60, 86, 87, 88, 88, 89, 89, 89, 88,
+ 88, 86, 84, 82, 80, 78, 76, 75, 73, 71, 70, 68, 67, 66, 65, 64, 63, 62,
+ 62, 61, 60, 60, 60, 60, 89, 89, 88, 88, 88, 88, 88, 88, 88, 86, 85, 83,
+ 81, 79, 77, 75, 74, 72, 71, 69, 68, 67, 66, 65, 64, 63, 62, 62, 61, 61,
+ 61, 61, 92, 91, 89, 88, 86, 87, 87, 88, 88, 86, 85, 83, 82, 80, 78, 76,
+ 74, 73, 71, 70, 68, 67, 66, 65, 64, 64, 63, 62, 61, 61, 61, 61, 91, 90,
+ 89, 88, 87, 86, 86, 85, 85, 84, 82, 81, 80, 78, 76, 75, 73, 72, 70, 69,
+ 68, 67, 66, 65, 64, 63, 62, 62, 61, 61, 61, 61, 90, 89, 89, 88, 87, 86,
+ 85, 83, 82, 81, 80, 78, 77, 76, 74, 73, 72, 70, 69, 68, 67, 66, 65, 64,
+ 63, 63, 62, 61, 61, 61, 61, 61, 89, 89, 88, 88, 88, 85, 83, 81, 79, 78,
+ 77, 76, 75, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 64, 63, 62, 62, 61,
+ 61, 61, 61, 61, 89, 88, 88, 88, 88, 85, 82, 79, 76, 75, 74, 73, 72, 71,
+ 70, 69, 69, 68, 67, 66, 65, 64, 64, 63, 62, 62, 61, 61, 60, 60, 60, 60,
+ 86, 86, 86, 86, 86, 84, 81, 78, 75, 74, 73, 72, 71, 70, 69, 68, 67, 67,
+ 66, 65, 64, 64, 63, 63, 62, 61, 61, 60, 60, 60, 60, 60, 84, 84, 84, 85,
+ 85, 82, 80, 77, 74, 73, 72, 71, 70, 69, 68, 67, 66, 66, 65, 64, 64, 63,
+ 63, 62, 61, 61, 61, 60, 60, 60, 60, 60, 81, 82, 82, 83, 83, 81, 78, 76,
+ 73, 72, 71, 69, 68, 67, 67, 66, 65, 65, 64, 63, 63, 62, 62, 61, 61, 60,
+ 60, 60, 59, 59, 59, 59, 79, 79, 80, 81, 82, 80, 77, 75, 72, 71, 70, 68,
+ 67, 66, 65, 65, 64, 64, 63, 63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 59,
+ 59, 59, 77, 78, 78, 79, 80, 78, 76, 73, 71, 70, 69, 67, 66, 66, 65, 64,
+ 63, 63, 63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 59, 59, 59, 59, 75, 76,
+ 76, 77, 78, 76, 74, 72, 70, 69, 68, 67, 65, 65, 64, 63, 63, 62, 62, 61,
+ 61, 61, 60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 73, 74, 75, 75, 76, 75,
+ 73, 71, 69, 68, 67, 66, 65, 64, 63, 63, 62, 62, 61, 61, 60, 60, 60, 60,
+ 59, 59, 59, 58, 58, 58, 58, 58, 71, 72, 73, 74, 74, 73, 72, 70, 69, 67,
+ 66, 65, 64, 63, 63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 59, 59, 58, 58,
+ 58, 58, 58, 58, 69, 70, 71, 72, 73, 72, 70, 69, 68, 67, 66, 65, 64, 63,
+ 62, 62, 61, 61, 60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58,
+ 68, 69, 70, 71, 71, 70, 69, 68, 67, 66, 65, 64, 63, 63, 62, 61, 61, 60,
+ 60, 60, 59, 59, 59, 58, 58, 58, 58, 58, 57, 57, 57, 57, 67, 67, 68, 69,
+ 70, 69, 68, 67, 66, 65, 64, 63, 63, 62, 61, 61, 60, 60, 60, 59, 59, 59,
+ 58, 58, 58, 58, 58, 57, 57, 57, 57, 57, 65, 66, 67, 68, 68, 68, 67, 66,
+ 65, 64, 64, 63, 62, 62, 61, 60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57,
+ 57, 57, 57, 57, 57, 57, 64, 65, 66, 67, 67, 67, 66, 65, 64, 64, 63, 62,
+ 62, 61, 61, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57,
+ 57, 57, 64, 64, 65, 66, 66, 66, 65, 64, 64, 63, 63, 62, 61, 61, 60, 60,
+ 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 57, 57, 57, 63, 63,
+ 64, 65, 65, 65, 64, 64, 63, 63, 62, 61, 61, 60, 60, 60, 59, 59, 58, 58,
+ 58, 58, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 62, 62, 63, 64, 64, 64,
+ 63, 63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 58, 58, 58, 58, 57, 57, 57,
+ 57, 57, 57, 56, 56, 56, 56, 56, 61, 62, 62, 63, 64, 63, 63, 62, 62, 61,
+ 61, 60, 60, 60, 59, 59, 59, 58, 58, 58, 57, 57, 57, 57, 57, 57, 56, 56,
+ 56, 56, 56, 56, 61, 61, 62, 62, 63, 62, 62, 62, 61, 61, 61, 60, 60, 59,
+ 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56,
+ 60, 61, 61, 62, 62, 62, 61, 61, 61, 60, 60, 60, 59, 59, 59, 58, 58, 58,
+ 58, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 59, 60, 60, 61,
+ 61, 61, 61, 61, 60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57,
+ 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 59, 60, 60, 61, 61, 61, 61, 61,
+ 60, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 56, 56,
+ 56, 56, 56, 56, 56, 56, 59, 60, 60, 61, 61, 61, 61, 61, 60, 60, 60, 59,
+ 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56,
+ 56, 56, 59, 60, 60, 61, 61, 61, 61, 61, 60, 60, 60, 59, 59, 59, 58, 58,
+ 58, 58, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 53, 52, 48, 53, 50, 49, 48, 52, 49, 46, 45, 48, 48, 45, 43,
+ /* Size 8 */
+ 64, 71, 55, 54, 53, 51, 50, 48, 71, 60, 55, 57, 56, 54, 52, 51, 55, 55,
+ 52, 53, 53, 52, 51, 49, 54, 57, 53, 51, 50, 50, 49, 48, 53, 56, 53, 50,
+ 49, 48, 47, 47, 51, 54, 52, 50, 48, 47, 46, 46, 50, 52, 51, 49, 47, 46,
+ 46, 45, 48, 51, 49, 48, 47, 46, 45, 44,
+ /* Size 16 */
+ 64, 67, 71, 63, 55, 55, 54, 54, 53, 52, 51, 50, 50, 49, 48, 48, 67, 66,
+ 65, 60, 55, 55, 56, 55, 55, 54, 53, 52, 51, 50, 49, 49, 71, 65, 60, 58,
+ 55, 56, 57, 57, 56, 55, 54, 53, 52, 51, 51, 51, 63, 60, 58, 56, 54, 54,
+ 55, 55, 55, 54, 53, 52, 52, 51, 50, 50, 55, 55, 55, 54, 52, 53, 53, 53,
+ 53, 52, 52, 51, 51, 50, 49, 49, 55, 55, 56, 54, 53, 52, 52, 52, 52, 51,
+ 51, 50, 50, 49, 49, 49, 54, 56, 57, 55, 53, 52, 51, 51, 50, 50, 50, 49,
+ 49, 48, 48, 48, 54, 55, 57, 55, 53, 52, 51, 50, 50, 49, 49, 48, 48, 48,
+ 47, 47, 53, 55, 56, 55, 53, 52, 50, 50, 49, 48, 48, 48, 47, 47, 47, 47,
+ 52, 54, 55, 54, 52, 51, 50, 49, 48, 48, 47, 47, 47, 47, 46, 46, 51, 53,
+ 54, 53, 52, 51, 50, 49, 48, 47, 47, 47, 46, 46, 46, 46, 50, 52, 53, 52,
+ 51, 50, 49, 48, 48, 47, 47, 46, 46, 46, 45, 45, 50, 51, 52, 52, 51, 50,
+ 49, 48, 47, 47, 46, 46, 46, 45, 45, 45, 49, 50, 51, 51, 50, 49, 48, 48,
+ 47, 47, 46, 46, 45, 45, 45, 45, 48, 49, 51, 50, 49, 49, 48, 47, 47, 46,
+ 46, 45, 45, 45, 44, 44, 48, 49, 51, 50, 49, 49, 48, 47, 47, 46, 46, 45,
+ 45, 45, 44, 44,
+ /* Size 32 */
+ 64, 66, 67, 69, 71, 67, 63, 59, 55, 55, 55, 54, 54, 54, 54, 53, 53, 53,
+ 52, 52, 51, 51, 50, 50, 50, 49, 49, 48, 48, 48, 48, 48, 66, 66, 67, 67,
+ 68, 65, 62, 58, 55, 55, 55, 55, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52,
+ 51, 51, 50, 50, 50, 49, 49, 49, 49, 49, 67, 67, 66, 66, 65, 63, 60, 58,
+ 55, 55, 55, 55, 56, 55, 55, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51, 51,
+ 50, 50, 49, 49, 49, 49, 69, 67, 66, 64, 63, 61, 59, 57, 55, 55, 56, 56,
+ 56, 56, 56, 56, 55, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51, 50, 50, 50,
+ 50, 50, 71, 68, 65, 63, 60, 59, 58, 56, 55, 55, 56, 56, 57, 57, 57, 56,
+ 56, 56, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51, 51, 51, 51, 51, 67, 65,
+ 63, 61, 59, 58, 57, 55, 54, 55, 55, 55, 56, 56, 56, 56, 55, 55, 55, 54,
+ 54, 53, 53, 52, 52, 52, 51, 51, 50, 50, 50, 50, 63, 62, 60, 59, 58, 57,
+ 56, 55, 54, 54, 54, 54, 55, 55, 55, 55, 55, 54, 54, 54, 53, 53, 52, 52,
+ 52, 51, 51, 50, 50, 50, 50, 50, 59, 58, 58, 57, 56, 55, 55, 54, 53, 53,
+ 53, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 52, 52, 52, 51, 51, 50, 50,
+ 50, 50, 50, 50, 55, 55, 55, 55, 55, 54, 54, 53, 52, 52, 53, 53, 53, 53,
+ 53, 53, 53, 53, 52, 52, 52, 52, 51, 51, 51, 50, 50, 50, 49, 49, 49, 49,
+ 55, 55, 55, 55, 55, 55, 54, 53, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
+ 52, 52, 51, 51, 51, 51, 50, 50, 50, 49, 49, 49, 49, 49, 55, 55, 55, 56,
+ 56, 55, 54, 53, 53, 52, 52, 52, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51,
+ 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 54, 55, 55, 56, 56, 55, 54, 54,
+ 53, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 49, 49,
+ 49, 49, 48, 48, 48, 48, 54, 55, 56, 56, 57, 56, 55, 54, 53, 52, 52, 51,
+ 51, 51, 51, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 48, 48, 48, 48,
+ 48, 48, 54, 55, 55, 56, 57, 56, 55, 54, 53, 52, 52, 51, 51, 51, 50, 50,
+ 50, 50, 50, 49, 49, 49, 49, 49, 49, 48, 48, 48, 48, 48, 48, 48, 54, 54,
+ 55, 56, 57, 56, 55, 54, 53, 52, 52, 51, 51, 50, 50, 50, 50, 49, 49, 49,
+ 49, 49, 48, 48, 48, 48, 48, 48, 47, 47, 47, 47, 53, 54, 55, 56, 56, 56,
+ 55, 54, 53, 52, 52, 51, 50, 50, 50, 49, 49, 49, 49, 49, 48, 48, 48, 48,
+ 48, 48, 47, 47, 47, 47, 47, 47, 53, 54, 55, 55, 56, 55, 55, 54, 53, 52,
+ 52, 51, 50, 50, 50, 49, 49, 49, 48, 48, 48, 48, 48, 48, 47, 47, 47, 47,
+ 47, 47, 47, 47, 53, 53, 54, 55, 56, 55, 54, 53, 53, 52, 51, 51, 50, 50,
+ 49, 49, 49, 48, 48, 48, 48, 48, 47, 47, 47, 47, 47, 47, 46, 46, 46, 46,
+ 52, 53, 54, 55, 55, 55, 54, 53, 52, 52, 51, 51, 50, 50, 49, 49, 48, 48,
+ 48, 48, 47, 47, 47, 47, 47, 47, 47, 46, 46, 46, 46, 46, 52, 53, 53, 54,
+ 55, 54, 54, 53, 52, 52, 51, 50, 50, 49, 49, 49, 48, 48, 48, 47, 47, 47,
+ 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 51, 52, 53, 54, 54, 54, 53, 53,
+ 52, 51, 51, 50, 50, 49, 49, 48, 48, 48, 47, 47, 47, 47, 47, 46, 46, 46,
+ 46, 46, 46, 46, 46, 46, 51, 52, 52, 53, 54, 53, 53, 52, 52, 51, 51, 50,
+ 49, 49, 49, 48, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 46, 46,
+ 46, 46, 50, 51, 52, 53, 53, 53, 52, 52, 51, 51, 50, 50, 49, 49, 48, 48,
+ 48, 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, 45, 45, 45, 45, 45, 50, 51,
+ 51, 52, 53, 52, 52, 52, 51, 51, 50, 50, 49, 49, 48, 48, 48, 47, 47, 47,
+ 46, 46, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45, 50, 50, 51, 52, 52, 52,
+ 52, 51, 51, 50, 50, 49, 49, 49, 48, 48, 47, 47, 47, 47, 46, 46, 46, 46,
+ 46, 45, 45, 45, 45, 45, 45, 45, 49, 50, 51, 51, 52, 52, 51, 51, 50, 50,
+ 50, 49, 49, 48, 48, 48, 47, 47, 47, 46, 46, 46, 46, 46, 45, 45, 45, 45,
+ 45, 45, 45, 45, 49, 50, 50, 51, 51, 51, 51, 50, 50, 50, 49, 49, 48, 48,
+ 48, 47, 47, 47, 47, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45, 45, 45, 45,
+ 48, 49, 50, 50, 51, 51, 50, 50, 50, 49, 49, 49, 48, 48, 48, 47, 47, 47,
+ 46, 46, 46, 46, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 48, 49, 49, 50,
+ 51, 50, 50, 50, 49, 49, 49, 48, 48, 48, 47, 47, 47, 46, 46, 46, 46, 46,
+ 45, 45, 45, 45, 45, 45, 44, 44, 44, 44, 48, 49, 49, 50, 51, 50, 50, 50,
+ 49, 49, 49, 48, 48, 48, 47, 47, 47, 46, 46, 46, 46, 46, 45, 45, 45, 45,
+ 45, 45, 44, 44, 44, 44, 48, 49, 49, 50, 51, 50, 50, 50, 49, 49, 49, 48,
+ 48, 48, 47, 47, 47, 46, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45, 44, 44,
+ 44, 44, 48, 49, 49, 50, 51, 50, 50, 50, 49, 49, 49, 48, 48, 48, 47, 47,
+ 47, 46, 46, 46, 46, 46, 45, 45, 45, 45, 45, 45, 44, 44, 44, 44 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 85, 70, 68, 63, 70, 66, 64, 62, 68, 64, 60, 58, 63, 62, 58, 56,
+ /* Size 8 */
+ 82, 91, 70, 69, 67, 65, 62, 60, 91, 76, 70, 72, 71, 69, 66, 64, 70, 70,
+ 66, 67, 67, 66, 64, 62, 69, 72, 67, 64, 63, 62, 61, 60, 67, 71, 67, 63,
+ 61, 60, 59, 59, 65, 69, 66, 62, 60, 59, 58, 57, 62, 66, 64, 61, 59, 58,
+ 57, 56, 60, 64, 62, 60, 59, 57, 56, 55,
+ /* Size 16 */
+ 82, 87, 91, 81, 71, 70, 69, 68, 68, 66, 65, 64, 63, 62, 61, 61, 87, 86,
+ 84, 77, 70, 71, 71, 70, 70, 69, 67, 66, 65, 64, 63, 63, 91, 84, 77, 74,
+ 70, 71, 73, 72, 72, 71, 70, 68, 67, 65, 64, 64, 81, 77, 74, 71, 68, 69,
+ 70, 70, 70, 69, 68, 67, 66, 65, 63, 63, 71, 70, 70, 68, 67, 67, 67, 67,
+ 67, 67, 66, 65, 65, 64, 63, 63, 70, 71, 71, 69, 67, 66, 66, 66, 66, 65,
+ 65, 64, 63, 62, 62, 62, 69, 71, 73, 70, 67, 66, 65, 64, 64, 63, 63, 62,
+ 62, 61, 61, 61, 68, 70, 72, 70, 67, 66, 64, 64, 63, 62, 62, 61, 61, 60,
+ 60, 60, 68, 70, 72, 70, 67, 66, 64, 63, 62, 61, 61, 60, 60, 59, 59, 59,
+ 66, 69, 71, 69, 67, 65, 63, 62, 61, 61, 60, 60, 59, 59, 58, 58, 65, 67,
+ 70, 68, 66, 65, 63, 62, 61, 60, 59, 59, 58, 58, 58, 58, 64, 66, 68, 67,
+ 65, 64, 62, 61, 60, 60, 59, 58, 58, 58, 57, 57, 63, 65, 67, 66, 65, 63,
+ 62, 61, 60, 59, 58, 58, 57, 57, 57, 57, 62, 64, 65, 65, 64, 62, 61, 60,
+ 59, 59, 58, 58, 57, 57, 56, 56, 61, 63, 64, 63, 63, 62, 61, 60, 59, 58,
+ 58, 57, 57, 56, 56, 56, 61, 63, 64, 63, 63, 62, 61, 60, 59, 58, 58, 57,
+ 57, 56, 56, 56,
+ /* Size 32 */
+ 83, 85, 87, 90, 92, 87, 81, 76, 71, 71, 70, 70, 69, 69, 69, 68, 68, 67,
+ 67, 66, 66, 65, 64, 64, 63, 63, 62, 62, 61, 61, 61, 61, 85, 86, 87, 87,
+ 88, 84, 79, 75, 71, 71, 71, 70, 70, 70, 70, 69, 69, 68, 68, 67, 67, 66,
+ 65, 65, 64, 64, 63, 63, 62, 62, 62, 62, 87, 87, 86, 85, 85, 81, 78, 74,
+ 71, 71, 71, 71, 71, 71, 71, 70, 70, 69, 69, 68, 68, 67, 66, 66, 65, 65,
+ 64, 63, 63, 63, 63, 63, 90, 87, 85, 83, 81, 78, 76, 73, 71, 71, 71, 72,
+ 72, 72, 72, 71, 71, 71, 70, 69, 69, 68, 67, 67, 66, 66, 65, 64, 64, 64,
+ 64, 64, 92, 88, 85, 81, 77, 76, 74, 72, 70, 71, 72, 72, 73, 73, 73, 72,
+ 72, 72, 71, 70, 70, 69, 68, 68, 67, 66, 66, 65, 65, 65, 65, 65, 87, 84,
+ 81, 78, 76, 74, 73, 71, 70, 70, 71, 71, 72, 72, 71, 71, 71, 71, 70, 70,
+ 69, 68, 68, 67, 67, 66, 65, 65, 64, 64, 64, 64, 81, 79, 78, 76, 74, 73,
+ 71, 70, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 69, 69, 68, 68, 67, 66,
+ 66, 65, 65, 64, 64, 64, 64, 64, 76, 75, 74, 73, 72, 71, 70, 69, 68, 68,
+ 68, 69, 69, 69, 69, 69, 69, 68, 68, 68, 67, 67, 66, 66, 65, 65, 64, 64,
+ 63, 63, 63, 63, 71, 71, 71, 71, 70, 70, 69, 68, 67, 67, 67, 67, 68, 68,
+ 68, 68, 68, 67, 67, 67, 67, 66, 66, 65, 65, 64, 64, 63, 63, 63, 63, 63,
+ 71, 71, 71, 71, 71, 70, 69, 68, 67, 67, 67, 67, 67, 67, 67, 67, 67, 66,
+ 66, 66, 66, 65, 65, 65, 64, 64, 63, 63, 62, 62, 62, 62, 70, 71, 71, 71,
+ 72, 71, 69, 68, 67, 67, 67, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65,
+ 64, 64, 64, 63, 63, 62, 62, 62, 62, 62, 70, 70, 71, 72, 72, 71, 70, 69,
+ 67, 67, 66, 66, 66, 65, 65, 65, 65, 65, 65, 64, 64, 64, 63, 63, 63, 63,
+ 62, 62, 62, 62, 62, 62, 69, 70, 71, 72, 73, 72, 70, 69, 68, 67, 66, 66,
+ 65, 65, 65, 64, 64, 64, 64, 63, 63, 63, 63, 63, 62, 62, 62, 61, 61, 61,
+ 61, 61, 69, 70, 71, 72, 73, 72, 70, 69, 68, 67, 66, 65, 65, 64, 64, 64,
+ 64, 63, 63, 63, 63, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 69, 70,
+ 71, 72, 73, 71, 70, 69, 68, 67, 66, 65, 65, 64, 64, 63, 63, 63, 63, 62,
+ 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 68, 69, 70, 71, 72, 71,
+ 70, 69, 68, 67, 66, 65, 64, 64, 63, 63, 63, 62, 62, 62, 62, 61, 61, 61,
+ 61, 60, 60, 60, 60, 60, 60, 60, 68, 69, 70, 71, 72, 71, 70, 69, 68, 67,
+ 66, 65, 64, 64, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60,
+ 59, 59, 59, 59, 67, 68, 69, 71, 72, 71, 70, 68, 67, 66, 66, 65, 64, 63,
+ 63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+ 67, 68, 69, 70, 71, 70, 69, 68, 67, 66, 65, 65, 64, 63, 63, 62, 62, 61,
+ 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 66, 67, 68, 69,
+ 70, 70, 69, 68, 67, 66, 65, 64, 63, 63, 62, 62, 61, 61, 61, 60, 60, 60,
+ 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 66, 67, 68, 69, 70, 69, 68, 67,
+ 67, 66, 65, 64, 63, 63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 59, 59, 58,
+ 58, 58, 58, 58, 58, 58, 65, 66, 67, 68, 69, 68, 68, 67, 66, 65, 65, 64,
+ 63, 62, 62, 61, 61, 60, 60, 60, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58,
+ 58, 58, 64, 65, 66, 67, 68, 68, 67, 66, 66, 65, 64, 63, 63, 62, 62, 61,
+ 61, 60, 60, 59, 59, 59, 59, 58, 58, 58, 58, 58, 57, 57, 57, 57, 64, 65,
+ 66, 67, 68, 67, 66, 66, 65, 65, 64, 63, 63, 62, 61, 61, 60, 60, 60, 59,
+ 59, 59, 58, 58, 58, 58, 58, 57, 57, 57, 57, 57, 63, 64, 65, 66, 67, 67,
+ 66, 65, 65, 64, 64, 63, 62, 62, 61, 61, 60, 60, 59, 59, 59, 58, 58, 58,
+ 58, 57, 57, 57, 57, 57, 57, 57, 63, 64, 65, 66, 66, 66, 65, 65, 64, 64,
+ 63, 63, 62, 61, 61, 60, 60, 60, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57,
+ 57, 57, 57, 57, 62, 63, 64, 65, 66, 65, 65, 64, 64, 63, 63, 62, 62, 61,
+ 61, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 57, 57,
+ 62, 63, 63, 64, 65, 65, 64, 64, 63, 63, 62, 62, 61, 61, 60, 60, 60, 59,
+ 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 56, 56, 56, 56, 61, 62, 63, 64,
+ 65, 64, 64, 63, 63, 62, 62, 62, 61, 61, 60, 60, 59, 59, 59, 58, 58, 58,
+ 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 61, 62, 63, 64, 65, 64, 64, 63,
+ 63, 62, 62, 62, 61, 61, 60, 60, 59, 59, 59, 58, 58, 58, 57, 57, 57, 57,
+ 57, 56, 56, 56, 56, 56, 61, 62, 63, 64, 65, 64, 64, 63, 63, 62, 62, 62,
+ 61, 61, 60, 60, 59, 59, 59, 58, 58, 58, 57, 57, 57, 57, 57, 56, 56, 56,
+ 56, 56, 61, 62, 63, 64, 65, 64, 64, 63, 63, 62, 62, 62, 61, 61, 60, 60,
+ 59, 59, 59, 58, 58, 58, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 62, 54, 50, 62, 55, 52, 49, 54, 52, 49, 48, 50, 49, 48, 47,
+ /* Size 8 */
+ 64, 70, 68, 63, 58, 55, 53, 52, 70, 67, 68, 65, 60, 57, 54, 53, 68, 68,
+ 61, 59, 57, 55, 53, 52, 63, 65, 59, 56, 54, 53, 52, 51, 58, 60, 57, 54,
+ 53, 52, 51, 51, 55, 57, 55, 53, 52, 51, 51, 50, 53, 54, 53, 52, 51, 51,
+ 50, 50, 52, 53, 52, 51, 51, 50, 50, 50,
+ /* Size 16 */
+ 64, 67, 70, 69, 68, 66, 63, 60, 58, 57, 55, 54, 53, 52, 52, 52, 67, 68,
+ 69, 69, 68, 66, 64, 61, 59, 58, 56, 55, 54, 53, 52, 52, 70, 69, 67, 68,
+ 68, 66, 65, 62, 60, 59, 57, 56, 54, 53, 53, 53, 69, 69, 68, 66, 65, 63,
+ 62, 60, 59, 57, 56, 55, 54, 53, 52, 52, 68, 68, 68, 65, 61, 60, 59, 58,
+ 57, 56, 55, 54, 53, 53, 52, 52, 66, 66, 66, 63, 60, 59, 57, 56, 56, 55,
+ 54, 53, 53, 52, 52, 52, 63, 64, 65, 62, 59, 57, 56, 55, 54, 54, 53, 53,
+ 52, 52, 51, 51, 60, 61, 62, 60, 58, 56, 55, 54, 54, 53, 52, 52, 52, 51,
+ 51, 51, 58, 59, 60, 59, 57, 56, 54, 54, 53, 52, 52, 52, 51, 51, 51, 51,
+ 57, 58, 59, 57, 56, 55, 54, 53, 52, 52, 51, 51, 51, 51, 50, 50, 55, 56,
+ 57, 56, 55, 54, 53, 52, 52, 51, 51, 51, 51, 50, 50, 50, 54, 55, 56, 55,
+ 54, 53, 53, 52, 52, 51, 51, 51, 50, 50, 50, 50, 53, 54, 54, 54, 53, 53,
+ 52, 52, 51, 51, 51, 50, 50, 50, 50, 50, 52, 53, 53, 53, 53, 52, 52, 51,
+ 51, 51, 50, 50, 50, 50, 50, 50, 52, 52, 53, 52, 52, 52, 51, 51, 51, 50,
+ 50, 50, 50, 50, 50, 50, 52, 52, 53, 52, 52, 52, 51, 51, 51, 50, 50, 50,
+ 50, 50, 50, 50,
+ /* Size 32 */
+ 64, 66, 67, 69, 70, 70, 69, 69, 68, 67, 66, 64, 63, 62, 60, 59, 58, 57,
+ 57, 56, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 52, 66, 67, 68, 69,
+ 70, 69, 69, 69, 68, 67, 66, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 55,
+ 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 67, 68, 68, 68, 69, 69, 69, 68,
+ 68, 67, 66, 65, 64, 63, 61, 60, 59, 58, 58, 57, 56, 55, 55, 54, 54, 53,
+ 53, 52, 52, 52, 52, 52, 69, 69, 68, 68, 68, 68, 68, 68, 68, 67, 66, 65,
+ 64, 63, 62, 61, 60, 59, 58, 57, 56, 56, 55, 55, 54, 54, 53, 53, 52, 52,
+ 52, 52, 70, 70, 69, 68, 67, 67, 68, 68, 68, 67, 66, 66, 65, 64, 62, 61,
+ 60, 59, 59, 58, 57, 56, 56, 55, 54, 54, 53, 53, 53, 53, 53, 53, 70, 69,
+ 69, 68, 67, 67, 67, 67, 66, 66, 65, 64, 63, 62, 61, 60, 59, 59, 58, 57,
+ 56, 56, 55, 55, 54, 54, 53, 53, 52, 52, 52, 52, 69, 69, 69, 68, 68, 67,
+ 66, 65, 65, 64, 63, 63, 62, 61, 60, 59, 59, 58, 57, 57, 56, 55, 55, 54,
+ 54, 53, 53, 53, 52, 52, 52, 52, 69, 69, 68, 68, 68, 67, 65, 64, 63, 62,
+ 62, 61, 60, 60, 59, 58, 58, 57, 57, 56, 55, 55, 54, 54, 54, 53, 53, 53,
+ 52, 52, 52, 52, 68, 68, 68, 68, 68, 66, 65, 63, 61, 61, 60, 60, 59, 58,
+ 58, 57, 57, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52,
+ 67, 67, 67, 67, 67, 66, 64, 62, 61, 60, 59, 59, 58, 58, 57, 57, 56, 56,
+ 55, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 52, 66, 66, 66, 66,
+ 66, 65, 63, 62, 60, 59, 59, 58, 57, 57, 56, 56, 56, 55, 55, 54, 54, 54,
+ 53, 53, 53, 52, 52, 52, 52, 52, 52, 52, 64, 64, 65, 65, 66, 64, 63, 61,
+ 60, 59, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52,
+ 52, 52, 51, 51, 51, 51, 63, 63, 64, 64, 65, 63, 62, 60, 59, 58, 57, 57,
+ 56, 55, 55, 55, 54, 54, 54, 53, 53, 53, 53, 52, 52, 52, 52, 51, 51, 51,
+ 51, 51, 62, 62, 63, 63, 64, 62, 61, 60, 58, 58, 57, 56, 55, 55, 55, 54,
+ 54, 54, 53, 53, 53, 53, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 60, 61,
+ 61, 62, 62, 61, 60, 59, 58, 57, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53,
+ 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 59, 60, 60, 61, 61, 60,
+ 59, 58, 57, 57, 56, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52,
+ 51, 51, 51, 51, 51, 51, 51, 51, 58, 59, 59, 60, 60, 59, 59, 58, 57, 56,
+ 56, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 57, 58, 58, 59, 59, 59, 58, 57, 56, 56, 55, 55, 54, 54,
+ 53, 53, 53, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 57, 57, 58, 58, 59, 58, 57, 57, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52,
+ 52, 52, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 56, 56, 57, 57,
+ 58, 57, 57, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52, 51, 51, 51,
+ 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 55, 55, 56, 56, 57, 56, 56, 55,
+ 55, 54, 54, 54, 53, 53, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 50,
+ 50, 50, 50, 50, 50, 50, 54, 55, 55, 56, 56, 56, 55, 55, 54, 54, 54, 53,
+ 53, 53, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50,
+ 50, 50, 54, 54, 55, 55, 56, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52,
+ 52, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 53, 54,
+ 54, 55, 55, 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 51, 51, 51, 51,
+ 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 53, 53, 54, 54, 54, 54,
+ 54, 54, 53, 53, 53, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 53, 53, 53, 54, 54, 54, 53, 53, 53, 53,
+ 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 52, 53, 53, 53, 53, 53, 53, 53, 53, 52, 52, 52, 52, 52,
+ 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 52, 52, 52, 53, 53, 53, 53, 53, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 52, 52, 52, 52,
+ 53, 52, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 52, 52, 52, 52, 53, 52, 52, 52,
+ 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 52, 52, 52, 52, 53, 52, 52, 52, 52, 52, 52, 51,
+ 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 52, 52, 52, 52, 53, 52, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51,
+ 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 80, 77, 67, 61, 77, 68, 63, 60, 67, 63, 60, 58, 61, 60, 58, 57,
+ /* Size 8 */
+ 75, 83, 80, 73, 68, 64, 61, 60, 83, 79, 80, 76, 70, 66, 63, 61, 80, 80,
+ 72, 69, 66, 64, 62, 60, 73, 76, 69, 65, 63, 61, 60, 59, 68, 70, 66, 63,
+ 61, 60, 59, 58, 64, 66, 64, 61, 60, 59, 58, 58, 61, 63, 62, 60, 59, 58,
+ 58, 57, 60, 61, 60, 59, 58, 58, 57, 57,
+ /* Size 16 */
+ 76, 80, 84, 82, 81, 78, 74, 71, 68, 66, 64, 63, 62, 61, 60, 60, 80, 81,
+ 82, 81, 81, 78, 75, 72, 70, 68, 66, 64, 63, 62, 61, 61, 84, 82, 80, 80,
+ 81, 79, 76, 74, 71, 69, 67, 65, 64, 63, 61, 61, 82, 81, 80, 78, 77, 75,
+ 73, 71, 69, 67, 65, 64, 63, 62, 61, 61, 81, 81, 81, 77, 72, 71, 69, 68,
+ 67, 66, 64, 63, 62, 61, 61, 61, 78, 78, 79, 75, 71, 69, 67, 66, 65, 64,
+ 63, 62, 62, 61, 60, 60, 74, 75, 76, 73, 69, 67, 66, 65, 64, 63, 62, 61,
+ 61, 60, 60, 60, 71, 72, 74, 71, 68, 66, 65, 64, 63, 62, 61, 61, 60, 60,
+ 59, 59, 68, 70, 71, 69, 67, 65, 64, 63, 62, 61, 60, 60, 60, 59, 59, 59,
+ 66, 68, 69, 67, 66, 64, 63, 62, 61, 61, 60, 60, 59, 59, 59, 59, 64, 66,
+ 67, 65, 64, 63, 62, 61, 60, 60, 59, 59, 59, 59, 58, 58, 63, 64, 65, 64,
+ 63, 62, 61, 61, 60, 60, 59, 59, 59, 58, 58, 58, 62, 63, 64, 63, 62, 62,
+ 61, 60, 60, 59, 59, 59, 58, 58, 58, 58, 61, 62, 63, 62, 61, 61, 60, 60,
+ 59, 59, 59, 58, 58, 58, 58, 58, 60, 61, 61, 61, 61, 60, 60, 59, 59, 59,
+ 58, 58, 58, 58, 58, 58, 60, 61, 61, 61, 61, 60, 60, 59, 59, 59, 58, 58,
+ 58, 58, 58, 58,
+ /* Size 32 */
+ 76, 78, 80, 82, 84, 83, 83, 82, 82, 80, 78, 76, 74, 73, 71, 70, 69, 68,
+ 67, 66, 65, 64, 63, 63, 62, 62, 61, 61, 60, 60, 60, 60, 78, 79, 81, 82,
+ 83, 83, 82, 82, 82, 80, 78, 77, 75, 74, 72, 71, 69, 68, 67, 66, 65, 65,
+ 64, 63, 63, 62, 62, 61, 61, 61, 61, 61, 80, 81, 81, 82, 82, 82, 82, 82,
+ 81, 80, 79, 77, 76, 74, 73, 71, 70, 69, 68, 67, 66, 65, 64, 64, 63, 63,
+ 62, 62, 61, 61, 61, 61, 82, 82, 82, 81, 81, 81, 81, 81, 81, 80, 79, 77,
+ 76, 75, 73, 72, 71, 70, 69, 67, 66, 66, 65, 64, 63, 63, 62, 62, 61, 61,
+ 61, 61, 84, 83, 82, 81, 80, 80, 81, 81, 81, 80, 79, 78, 77, 75, 74, 73,
+ 71, 70, 69, 68, 67, 66, 65, 65, 64, 63, 63, 62, 62, 62, 62, 62, 83, 83,
+ 82, 81, 80, 80, 80, 79, 79, 78, 77, 76, 75, 74, 73, 71, 70, 69, 68, 67,
+ 66, 66, 65, 64, 64, 63, 63, 62, 62, 62, 62, 62, 83, 82, 82, 81, 81, 80,
+ 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 67, 66, 65, 65, 64,
+ 63, 63, 62, 62, 61, 61, 61, 61, 82, 82, 82, 81, 81, 79, 78, 76, 75, 74,
+ 73, 72, 71, 71, 70, 69, 68, 67, 67, 66, 65, 65, 64, 63, 63, 62, 62, 62,
+ 61, 61, 61, 61, 82, 82, 81, 81, 81, 79, 77, 75, 73, 72, 71, 70, 70, 69,
+ 68, 68, 67, 66, 66, 65, 65, 64, 64, 63, 63, 62, 62, 61, 61, 61, 61, 61,
+ 80, 80, 80, 80, 80, 78, 76, 74, 72, 71, 70, 70, 69, 68, 67, 67, 66, 66,
+ 65, 65, 64, 64, 63, 63, 62, 62, 61, 61, 61, 61, 61, 61, 78, 78, 79, 79,
+ 79, 77, 75, 73, 71, 70, 69, 69, 68, 67, 67, 66, 65, 65, 64, 64, 63, 63,
+ 63, 62, 62, 62, 61, 61, 61, 61, 61, 61, 76, 77, 77, 77, 78, 76, 74, 72,
+ 70, 70, 69, 68, 67, 66, 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 61, 61,
+ 61, 61, 60, 60, 60, 60, 74, 75, 76, 76, 77, 75, 73, 71, 70, 69, 68, 67,
+ 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60,
+ 60, 60, 73, 74, 74, 75, 75, 74, 72, 71, 69, 68, 67, 66, 65, 65, 64, 64,
+ 63, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 71, 72,
+ 73, 73, 74, 73, 71, 70, 68, 67, 67, 66, 65, 64, 64, 63, 63, 63, 62, 62,
+ 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 70, 71, 71, 72, 73, 71,
+ 70, 69, 68, 67, 66, 65, 64, 64, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60,
+ 60, 60, 60, 60, 59, 59, 59, 59, 69, 69, 70, 71, 71, 70, 69, 68, 67, 66,
+ 65, 65, 64, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 59,
+ 59, 59, 59, 59, 68, 68, 69, 70, 70, 69, 68, 67, 66, 66, 65, 64, 63, 63,
+ 63, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+ 67, 67, 68, 69, 69, 68, 67, 67, 66, 65, 64, 64, 63, 63, 62, 62, 61, 61,
+ 61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 66, 66, 67, 67,
+ 68, 67, 67, 66, 65, 65, 64, 63, 63, 62, 62, 61, 61, 61, 61, 60, 60, 60,
+ 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 65, 65, 66, 66, 67, 66, 66, 65,
+ 65, 64, 63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 64, 65, 65, 66, 66, 66, 65, 65, 64, 64, 63, 63,
+ 62, 62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 58, 58,
+ 58, 58, 63, 64, 64, 65, 65, 65, 65, 64, 64, 63, 63, 62, 62, 61, 61, 61,
+ 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 63, 63,
+ 64, 64, 65, 64, 64, 63, 63, 63, 62, 62, 61, 61, 61, 60, 60, 60, 60, 59,
+ 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 62, 63, 63, 63, 64, 64,
+ 63, 63, 63, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59,
+ 59, 58, 58, 58, 58, 58, 58, 58, 62, 62, 63, 63, 63, 63, 63, 62, 62, 62,
+ 62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58,
+ 58, 58, 58, 58, 61, 62, 62, 62, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60,
+ 60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58, 58,
+ 61, 61, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 59, 59,
+ 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 60, 61, 61, 61,
+ 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 58,
+ 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 60, 61, 61, 61, 62, 62, 61, 61,
+ 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58,
+ 58, 58, 58, 58, 58, 58, 60, 61, 61, 61, 62, 62, 61, 61, 61, 61, 61, 60,
+ 60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58, 58,
+ 58, 58, 60, 61, 61, 61, 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 59,
+ 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 56, 54, 52, 56, 53, 52, 51, 54, 52, 50, 49, 52, 51, 49, 48,
+ /* Size 8 */
+ 64, 69, 57, 56, 55, 54, 53, 52, 69, 61, 57, 58, 58, 57, 55, 53, 57, 57,
+ 55, 55, 55, 55, 54, 53, 56, 58, 55, 54, 53, 53, 52, 51, 55, 58, 55, 53,
+ 52, 51, 51, 50, 54, 57, 55, 53, 51, 51, 50, 50, 53, 55, 54, 52, 51, 50,
+ 49, 49, 52, 53, 53, 51, 50, 50, 49, 49,
+ /* Size 16 */
+ 64, 67, 69, 63, 57, 57, 56, 56, 55, 55, 54, 53, 53, 52, 52, 52, 67, 66,
+ 65, 61, 57, 57, 57, 57, 57, 56, 55, 55, 54, 53, 52, 52, 69, 65, 61, 59,
+ 57, 58, 58, 58, 58, 57, 57, 56, 55, 54, 53, 53, 63, 61, 59, 57, 56, 56,
+ 57, 57, 57, 56, 56, 55, 54, 54, 53, 53, 57, 57, 57, 56, 55, 55, 55, 55,
+ 55, 55, 55, 54, 54, 53, 53, 53, 57, 57, 58, 56, 55, 55, 54, 54, 54, 54,
+ 54, 53, 53, 52, 52, 52, 56, 57, 58, 57, 55, 54, 54, 53, 53, 53, 53, 52,
+ 52, 52, 51, 51, 56, 57, 58, 57, 55, 54, 53, 53, 53, 52, 52, 52, 52, 51,
+ 51, 51, 55, 57, 58, 57, 55, 54, 53, 53, 52, 52, 51, 51, 51, 51, 50, 50,
+ 55, 56, 57, 56, 55, 54, 53, 52, 52, 51, 51, 51, 51, 50, 50, 50, 54, 55,
+ 57, 56, 55, 54, 53, 52, 51, 51, 51, 50, 50, 50, 50, 50, 53, 55, 56, 55,
+ 54, 53, 52, 52, 51, 51, 50, 50, 50, 50, 49, 49, 53, 54, 55, 54, 54, 53,
+ 52, 52, 51, 51, 50, 50, 49, 49, 49, 49, 52, 53, 54, 54, 53, 52, 52, 51,
+ 51, 50, 50, 50, 49, 49, 49, 49, 52, 52, 53, 53, 53, 52, 51, 51, 50, 50,
+ 50, 49, 49, 49, 49, 49, 52, 52, 53, 53, 53, 52, 51, 51, 50, 50, 50, 49,
+ 49, 49, 49, 49,
+ /* Size 32 */
+ 64, 65, 67, 68, 69, 66, 63, 60, 57, 57, 57, 57, 56, 56, 56, 56, 55, 55,
+ 55, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 52, 52, 65, 66, 66, 67,
+ 67, 65, 62, 60, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 54,
+ 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 67, 66, 66, 65, 65, 63, 61, 59,
+ 57, 57, 57, 57, 57, 57, 57, 57, 57, 56, 56, 56, 55, 55, 55, 54, 54, 53,
+ 53, 53, 52, 52, 52, 52, 68, 67, 65, 64, 63, 61, 60, 58, 57, 57, 57, 58,
+ 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 55, 55, 54, 54, 54, 53, 53, 53,
+ 53, 53, 69, 67, 65, 63, 61, 60, 59, 58, 57, 57, 58, 58, 58, 58, 58, 58,
+ 58, 58, 57, 57, 57, 56, 56, 55, 55, 55, 54, 54, 53, 53, 53, 53, 66, 65,
+ 63, 61, 60, 59, 58, 57, 56, 57, 57, 57, 58, 58, 57, 57, 57, 57, 57, 56,
+ 56, 56, 55, 55, 55, 54, 54, 54, 53, 53, 53, 53, 63, 62, 61, 60, 59, 58,
+ 57, 57, 56, 56, 56, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55,
+ 54, 54, 54, 53, 53, 53, 53, 53, 60, 60, 59, 58, 58, 57, 57, 56, 55, 55,
+ 56, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 53, 53,
+ 53, 53, 53, 53, 57, 57, 57, 57, 57, 56, 56, 55, 55, 55, 55, 55, 55, 55,
+ 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 53, 53, 53, 53, 53, 53, 53,
+ 57, 57, 57, 57, 57, 57, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
+ 54, 54, 54, 54, 54, 53, 53, 53, 53, 53, 52, 52, 52, 52, 57, 57, 57, 57,
+ 58, 57, 56, 56, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 53,
+ 53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 57, 57, 57, 58, 58, 57, 57, 56,
+ 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53, 53, 53, 52,
+ 52, 52, 52, 52, 52, 52, 56, 57, 57, 58, 58, 58, 57, 56, 55, 55, 54, 54,
+ 54, 54, 53, 53, 53, 53, 53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 51, 51,
+ 51, 51, 56, 57, 57, 58, 58, 58, 57, 56, 55, 55, 54, 54, 54, 53, 53, 53,
+ 53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 56, 56,
+ 57, 58, 58, 57, 57, 56, 55, 55, 54, 54, 53, 53, 53, 53, 53, 52, 52, 52,
+ 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 56, 56, 57, 57, 58, 57,
+ 57, 56, 55, 55, 54, 54, 53, 53, 53, 53, 52, 52, 52, 52, 52, 52, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 55, 56, 57, 57, 58, 57, 57, 56, 55, 55,
+ 54, 54, 53, 53, 53, 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 51,
+ 50, 50, 50, 50, 55, 56, 56, 57, 58, 57, 56, 56, 55, 55, 54, 54, 53, 53,
+ 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50,
+ 55, 55, 56, 57, 57, 57, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 52,
+ 51, 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 54, 55, 56, 56,
+ 57, 56, 56, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51, 51,
+ 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 54, 55, 55, 56, 57, 56, 56, 55,
+ 55, 54, 54, 53, 53, 52, 52, 52, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50,
+ 50, 50, 50, 50, 50, 50, 54, 54, 55, 56, 56, 56, 55, 55, 54, 54, 53, 53,
+ 53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50,
+ 50, 50, 53, 54, 55, 55, 56, 55, 55, 55, 54, 54, 53, 53, 52, 52, 52, 51,
+ 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 53, 54,
+ 54, 55, 55, 55, 55, 54, 54, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51, 50,
+ 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 53, 53, 54, 54, 55, 55,
+ 54, 54, 54, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 50,
+ 49, 49, 49, 49, 49, 49, 49, 49, 52, 53, 53, 54, 55, 54, 54, 54, 53, 53,
+ 53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49,
+ 49, 49, 49, 49, 52, 53, 53, 54, 54, 54, 54, 53, 53, 53, 52, 52, 52, 52,
+ 51, 51, 51, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 49, 49, 49,
+ 52, 52, 53, 53, 54, 54, 53, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51, 50,
+ 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 52, 52, 52, 53,
+ 53, 53, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50,
+ 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 52, 52, 52, 53, 53, 53, 53, 53,
+ 53, 52, 52, 52, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49,
+ 49, 49, 49, 49, 49, 49, 52, 52, 52, 53, 53, 53, 53, 53, 53, 52, 52, 52,
+ 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 49, 49,
+ 49, 49, 52, 52, 52, 53, 53, 53, 53, 53, 53, 52, 52, 52, 51, 51, 51, 51,
+ 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 79, 68, 67, 63, 68, 65, 64, 62, 67, 64, 61, 60, 63, 62, 60, 58,
+ /* Size 8 */
+ 77, 84, 68, 67, 66, 64, 63, 61, 84, 73, 68, 70, 69, 67, 65, 64, 68, 68,
+ 65, 66, 66, 65, 64, 62, 67, 70, 66, 64, 63, 63, 62, 61, 66, 69, 66, 63,
+ 62, 61, 60, 60, 64, 67, 65, 63, 61, 60, 59, 59, 63, 65, 64, 62, 60, 59,
+ 59, 58, 61, 64, 62, 61, 60, 59, 58, 58,
+ /* Size 16 */
+ 77, 81, 84, 76, 69, 68, 68, 67, 67, 66, 65, 64, 63, 62, 62, 62, 81, 80,
+ 79, 74, 69, 69, 69, 69, 68, 67, 66, 65, 64, 64, 63, 63, 84, 79, 74, 71,
+ 68, 69, 70, 70, 70, 69, 68, 67, 66, 65, 64, 64, 76, 74, 71, 69, 67, 68,
+ 68, 68, 68, 67, 67, 66, 65, 64, 63, 63, 69, 69, 68, 67, 66, 66, 66, 66,
+ 66, 66, 66, 65, 64, 64, 63, 63, 68, 69, 69, 68, 66, 66, 65, 65, 65, 65,
+ 64, 64, 63, 63, 62, 62, 68, 69, 70, 68, 66, 65, 64, 64, 64, 63, 63, 63,
+ 62, 62, 61, 61, 67, 69, 70, 68, 66, 65, 64, 64, 63, 63, 62, 62, 62, 61,
+ 61, 61, 67, 68, 70, 68, 66, 65, 64, 63, 62, 62, 61, 61, 61, 61, 60, 60,
+ 66, 67, 69, 67, 66, 65, 63, 63, 62, 61, 61, 61, 60, 60, 60, 60, 65, 66,
+ 68, 67, 66, 64, 63, 62, 61, 61, 60, 60, 60, 59, 59, 59, 64, 65, 67, 66,
+ 65, 64, 63, 62, 61, 61, 60, 60, 59, 59, 59, 59, 63, 64, 66, 65, 64, 63,
+ 62, 62, 61, 60, 60, 59, 59, 59, 58, 58, 62, 64, 65, 64, 64, 63, 62, 61,
+ 61, 60, 59, 59, 59, 58, 58, 58, 62, 63, 64, 63, 63, 62, 61, 61, 60, 60,
+ 59, 59, 58, 58, 58, 58, 62, 63, 64, 63, 63, 62, 61, 61, 60, 60, 59, 59,
+ 58, 58, 58, 58,
+ /* Size 32 */
+ 78, 79, 81, 83, 84, 81, 77, 73, 69, 69, 68, 68, 68, 68, 67, 67, 67, 66,
+ 66, 65, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 79, 80, 81, 81,
+ 82, 79, 75, 72, 69, 69, 69, 69, 69, 68, 68, 68, 68, 67, 67, 66, 66, 65,
+ 65, 64, 64, 64, 63, 63, 62, 62, 62, 62, 81, 81, 80, 80, 79, 77, 74, 71,
+ 69, 69, 69, 69, 69, 69, 69, 69, 68, 68, 67, 67, 67, 66, 66, 65, 65, 64,
+ 64, 63, 63, 63, 63, 63, 83, 81, 80, 78, 76, 75, 73, 71, 69, 69, 69, 70,
+ 70, 70, 70, 69, 69, 69, 68, 68, 67, 67, 66, 66, 65, 65, 65, 64, 64, 64,
+ 64, 64, 84, 82, 79, 76, 74, 73, 71, 70, 69, 69, 70, 70, 71, 70, 70, 70,
+ 70, 70, 69, 69, 68, 68, 67, 67, 66, 66, 65, 65, 64, 64, 64, 64, 81, 79,
+ 77, 75, 73, 71, 70, 69, 68, 68, 69, 69, 70, 69, 69, 69, 69, 69, 68, 68,
+ 68, 67, 67, 66, 66, 65, 65, 64, 64, 64, 64, 64, 77, 75, 74, 73, 71, 70,
+ 69, 68, 67, 68, 68, 68, 69, 68, 68, 68, 68, 68, 68, 67, 67, 67, 66, 66,
+ 65, 65, 64, 64, 64, 64, 64, 64, 73, 72, 71, 71, 70, 69, 68, 67, 67, 67,
+ 67, 67, 68, 67, 67, 67, 67, 67, 67, 67, 66, 66, 66, 65, 65, 65, 64, 64,
+ 63, 63, 63, 63, 69, 69, 69, 69, 69, 68, 67, 67, 66, 66, 66, 66, 66, 67,
+ 67, 67, 67, 66, 66, 66, 66, 65, 65, 65, 64, 64, 64, 63, 63, 63, 63, 63,
+ 69, 69, 69, 69, 69, 68, 68, 67, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
+ 66, 65, 65, 65, 65, 64, 64, 64, 63, 63, 63, 63, 63, 63, 68, 69, 69, 69,
+ 70, 69, 68, 67, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 64,
+ 64, 64, 64, 63, 63, 63, 62, 62, 62, 62, 68, 69, 69, 70, 70, 69, 68, 67,
+ 66, 66, 66, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63,
+ 63, 62, 62, 62, 62, 62, 68, 69, 69, 70, 71, 70, 69, 68, 66, 66, 66, 65,
+ 65, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62,
+ 62, 62, 68, 68, 69, 70, 70, 69, 68, 67, 67, 66, 65, 65, 64, 64, 64, 64,
+ 64, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 67, 68,
+ 69, 70, 70, 69, 68, 67, 67, 66, 65, 65, 64, 64, 64, 63, 63, 63, 63, 63,
+ 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 67, 68, 69, 69, 70, 69,
+ 68, 67, 67, 66, 65, 65, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62,
+ 61, 61, 61, 61, 61, 61, 61, 61, 67, 68, 68, 69, 70, 69, 68, 67, 67, 66,
+ 65, 65, 64, 64, 63, 63, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61,
+ 60, 60, 60, 60, 66, 67, 68, 69, 70, 69, 68, 67, 66, 66, 65, 64, 64, 63,
+ 63, 63, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60,
+ 66, 67, 67, 68, 69, 68, 68, 67, 66, 66, 65, 64, 64, 63, 63, 62, 62, 62,
+ 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 65, 66, 67, 68,
+ 69, 68, 67, 67, 66, 65, 65, 64, 63, 63, 63, 62, 62, 62, 61, 61, 61, 61,
+ 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 65, 66, 67, 67, 68, 68, 67, 66,
+ 66, 65, 65, 64, 63, 63, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60,
+ 60, 60, 59, 59, 59, 59, 65, 65, 66, 67, 68, 67, 67, 66, 65, 65, 64, 64,
+ 63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59,
+ 59, 59, 64, 65, 66, 66, 67, 67, 66, 66, 65, 65, 64, 63, 63, 63, 62, 62,
+ 61, 61, 61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 64, 64,
+ 65, 66, 67, 66, 66, 65, 65, 64, 64, 63, 63, 62, 62, 62, 61, 61, 61, 60,
+ 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 63, 64, 65, 65, 66, 66,
+ 65, 65, 64, 64, 64, 63, 63, 62, 62, 61, 61, 61, 60, 60, 60, 60, 60, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 63, 64, 64, 65, 66, 65, 65, 65, 64, 64,
+ 63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 63, 63, 64, 65, 65, 65, 64, 64, 64, 63, 63, 63, 62, 62,
+ 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58,
+ 62, 63, 63, 64, 65, 64, 64, 64, 63, 63, 63, 62, 62, 62, 61, 61, 61, 60,
+ 60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 62, 62, 63, 64,
+ 64, 64, 64, 63, 63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 59, 59,
+ 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 62, 62, 63, 64, 64, 64, 64, 63,
+ 63, 63, 62, 62, 62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+ 58, 58, 58, 58, 58, 58, 62, 62, 63, 64, 64, 64, 64, 63, 63, 63, 62, 62,
+ 62, 61, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58,
+ 58, 58, 62, 62, 63, 64, 64, 64, 64, 63, 63, 63, 62, 62, 62, 61, 61, 61,
+ 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 63, 57, 54, 63, 58, 55, 54, 57, 55, 53, 53, 54, 54, 53, 52,
+ /* Size 8 */
+ 64, 68, 67, 63, 60, 58, 56, 55, 68, 66, 67, 64, 61, 59, 57, 56, 67, 67,
+ 62, 61, 59, 58, 57, 56, 63, 64, 61, 58, 57, 56, 56, 55, 60, 61, 59, 57,
+ 56, 56, 55, 55, 58, 59, 58, 56, 56, 55, 55, 54, 56, 57, 57, 56, 55, 55,
+ 54, 54, 55, 56, 56, 55, 55, 54, 54, 54,
+ /* Size 16 */
+ 64, 66, 68, 68, 67, 65, 63, 62, 60, 59, 58, 57, 56, 56, 55, 55, 66, 67,
+ 67, 67, 67, 65, 64, 62, 61, 60, 58, 58, 57, 56, 56, 56, 68, 67, 66, 66,
+ 67, 66, 64, 63, 61, 60, 59, 58, 57, 57, 56, 56, 68, 67, 66, 65, 65, 63,
+ 62, 61, 60, 59, 58, 58, 57, 56, 56, 56, 67, 67, 67, 65, 62, 61, 61, 60,
+ 59, 58, 58, 57, 57, 56, 56, 56, 65, 65, 66, 63, 61, 60, 59, 59, 58, 58,
+ 57, 57, 56, 56, 56, 56, 63, 64, 64, 62, 61, 59, 58, 58, 57, 57, 56, 56,
+ 56, 56, 55, 55, 62, 62, 63, 61, 60, 59, 58, 57, 57, 56, 56, 56, 55, 55,
+ 55, 55, 60, 61, 61, 60, 59, 58, 57, 57, 56, 56, 56, 55, 55, 55, 55, 55,
+ 59, 60, 60, 59, 58, 58, 57, 56, 56, 56, 55, 55, 55, 55, 55, 55, 58, 58,
+ 59, 58, 58, 57, 56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 57, 58, 58, 58,
+ 57, 57, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 56, 57, 57, 57, 57, 56,
+ 56, 55, 55, 55, 55, 55, 54, 54, 54, 54, 56, 56, 57, 56, 56, 56, 56, 55,
+ 55, 55, 55, 54, 54, 54, 54, 54, 55, 56, 56, 56, 56, 56, 55, 55, 55, 55,
+ 54, 54, 54, 54, 54, 54, 55, 56, 56, 56, 56, 56, 55, 55, 55, 55, 54, 54,
+ 54, 54, 54, 54,
+ /* Size 32 */
+ 64, 65, 66, 67, 68, 68, 68, 67, 67, 66, 65, 64, 63, 62, 62, 61, 60, 59,
+ 59, 58, 58, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 65, 66, 66, 67,
+ 68, 68, 67, 67, 67, 66, 65, 64, 63, 63, 62, 61, 60, 60, 59, 59, 58, 58,
+ 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 66, 66, 67, 67, 67, 67, 67, 67,
+ 67, 66, 65, 65, 64, 63, 62, 61, 61, 60, 60, 59, 58, 58, 58, 57, 57, 57,
+ 56, 56, 56, 56, 56, 56, 67, 67, 67, 67, 67, 67, 67, 67, 67, 66, 66, 65,
+ 64, 63, 63, 62, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56,
+ 56, 56, 68, 68, 67, 67, 66, 66, 66, 67, 67, 66, 66, 65, 64, 64, 63, 62,
+ 61, 61, 60, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 68, 68,
+ 67, 67, 66, 66, 66, 66, 66, 65, 65, 64, 63, 63, 62, 62, 61, 60, 60, 59,
+ 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 68, 67, 67, 67, 66, 66,
+ 65, 65, 65, 64, 63, 63, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58, 58, 57,
+ 57, 57, 56, 56, 56, 56, 56, 56, 67, 67, 67, 67, 67, 66, 65, 64, 63, 63,
+ 62, 62, 62, 61, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56,
+ 56, 56, 56, 56, 67, 67, 67, 67, 67, 66, 65, 63, 62, 62, 61, 61, 61, 60,
+ 60, 59, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56,
+ 66, 66, 66, 66, 66, 65, 64, 63, 62, 61, 61, 60, 60, 60, 59, 59, 59, 58,
+ 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 65, 65, 65, 66,
+ 66, 65, 63, 62, 61, 61, 60, 60, 59, 59, 59, 59, 58, 58, 58, 57, 57, 57,
+ 57, 56, 56, 56, 56, 56, 56, 56, 56, 56, 64, 64, 65, 65, 65, 64, 63, 62,
+ 61, 60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56,
+ 56, 56, 55, 55, 55, 55, 63, 63, 64, 64, 64, 63, 62, 62, 61, 60, 59, 59,
+ 58, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55,
+ 55, 55, 62, 63, 63, 63, 64, 63, 62, 61, 60, 60, 59, 59, 58, 58, 58, 57,
+ 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 62, 62,
+ 62, 63, 63, 62, 61, 61, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56,
+ 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 61, 61, 61, 62, 62, 62,
+ 61, 60, 59, 59, 59, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55, 60, 60, 61, 61, 61, 61, 60, 60, 59, 59,
+ 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55,
+ 55, 55, 55, 55, 59, 60, 60, 60, 61, 60, 60, 59, 59, 58, 58, 58, 57, 57,
+ 57, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
+ 59, 59, 60, 60, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56,
+ 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 58, 59, 59, 59,
+ 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 55, 55, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 58, 58, 58, 59, 59, 59, 58, 58,
+ 58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55,
+ 55, 55, 54, 54, 54, 54, 57, 58, 58, 58, 59, 58, 58, 58, 57, 57, 57, 57,
+ 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54,
+ 54, 54, 57, 57, 58, 58, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56,
+ 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 57, 57,
+ 57, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55,
+ 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 56, 57, 57, 57, 57, 57,
+ 57, 57, 57, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54,
+ 54, 54, 54, 54, 54, 54, 54, 54, 56, 56, 57, 57, 57, 57, 57, 57, 56, 56,
+ 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54,
+ 54, 54, 54, 54, 56, 56, 56, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,
+ 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55,
+ 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 55, 56, 56, 56,
+ 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54,
+ 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 55, 56, 56, 56, 56, 56, 56, 56,
+ 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54,
+ 54, 54, 54, 54, 54, 54, 55, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,
+ 54, 54, 55, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55,
+ 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 74, 72, 66, 62, 72, 67, 63, 61, 66, 63, 61, 60, 62, 61, 60, 59,
+ /* Size 8 */
+ 71, 76, 75, 70, 66, 64, 62, 61, 76, 73, 74, 71, 68, 65, 63, 62, 75, 74,
+ 69, 67, 65, 64, 62, 61, 70, 71, 67, 64, 63, 62, 61, 61, 66, 68, 65, 63,
+ 62, 61, 61, 60, 64, 65, 64, 62, 61, 60, 60, 60, 62, 63, 62, 61, 61, 60,
+ 60, 59, 61, 62, 61, 61, 60, 60, 59, 59,
+ /* Size 16 */
+ 71, 74, 77, 76, 75, 73, 70, 68, 67, 65, 64, 63, 62, 62, 61, 61, 74, 75,
+ 75, 75, 75, 73, 71, 69, 67, 66, 65, 64, 63, 62, 62, 62, 77, 75, 74, 74,
+ 75, 73, 72, 70, 68, 67, 66, 65, 64, 63, 62, 62, 76, 75, 74, 73, 72, 71,
+ 70, 68, 67, 66, 65, 64, 63, 63, 62, 62, 75, 75, 75, 72, 69, 68, 67, 66,
+ 66, 65, 64, 63, 63, 62, 62, 62, 73, 73, 73, 71, 68, 67, 66, 65, 65, 64,
+ 63, 63, 62, 62, 61, 61, 70, 71, 72, 70, 67, 66, 65, 64, 63, 63, 63, 62,
+ 62, 61, 61, 61, 68, 69, 70, 68, 66, 65, 64, 64, 63, 62, 62, 62, 61, 61,
+ 61, 61, 67, 67, 68, 67, 66, 65, 63, 63, 62, 62, 62, 61, 61, 61, 61, 61,
+ 65, 66, 67, 66, 65, 64, 63, 62, 62, 62, 61, 61, 61, 61, 60, 60, 64, 65,
+ 66, 65, 64, 63, 63, 62, 62, 61, 61, 61, 60, 60, 60, 60, 63, 64, 65, 64,
+ 63, 63, 62, 62, 61, 61, 61, 60, 60, 60, 60, 60, 62, 63, 64, 63, 63, 62,
+ 62, 61, 61, 61, 60, 60, 60, 60, 60, 60, 62, 62, 63, 63, 62, 62, 61, 61,
+ 61, 61, 60, 60, 60, 60, 60, 60, 61, 62, 62, 62, 62, 61, 61, 61, 61, 60,
+ 60, 60, 60, 60, 60, 60, 61, 62, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60,
+ 60, 60, 60, 60,
+ /* Size 32 */
+ 72, 73, 74, 76, 77, 76, 76, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66,
+ 66, 65, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 73, 74, 75, 75,
+ 76, 76, 76, 75, 75, 74, 73, 72, 71, 70, 69, 68, 67, 67, 66, 65, 65, 64,
+ 64, 63, 63, 63, 62, 62, 62, 62, 62, 62, 74, 75, 75, 75, 76, 75, 75, 75,
+ 75, 74, 73, 72, 71, 70, 70, 69, 68, 67, 66, 66, 65, 65, 64, 64, 63, 63,
+ 63, 62, 62, 62, 62, 62, 76, 75, 75, 75, 75, 75, 75, 75, 75, 74, 73, 73,
+ 72, 71, 70, 69, 68, 67, 67, 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62,
+ 62, 62, 77, 76, 76, 75, 74, 74, 75, 75, 75, 74, 74, 73, 72, 71, 70, 69,
+ 69, 68, 67, 66, 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62, 62, 76, 76,
+ 75, 75, 74, 74, 74, 74, 74, 73, 72, 72, 71, 70, 69, 69, 68, 67, 67, 66,
+ 65, 65, 64, 64, 64, 63, 63, 63, 62, 62, 62, 62, 76, 76, 75, 75, 75, 74,
+ 73, 73, 72, 72, 71, 70, 70, 69, 69, 68, 67, 67, 66, 66, 65, 65, 64, 64,
+ 63, 63, 63, 62, 62, 62, 62, 62, 76, 75, 75, 75, 75, 74, 73, 72, 71, 70,
+ 70, 69, 69, 68, 68, 67, 66, 66, 66, 65, 65, 64, 64, 63, 63, 63, 63, 62,
+ 62, 62, 62, 62, 75, 75, 75, 75, 75, 74, 72, 71, 69, 69, 68, 68, 67, 67,
+ 67, 66, 66, 65, 65, 65, 64, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 62,
+ 74, 74, 74, 74, 74, 73, 72, 70, 69, 68, 68, 67, 67, 66, 66, 66, 65, 65,
+ 65, 64, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 73, 73, 73, 73,
+ 74, 72, 71, 70, 68, 68, 67, 67, 66, 66, 65, 65, 65, 64, 64, 64, 63, 63,
+ 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 72, 72, 72, 73, 73, 72, 70, 69,
+ 68, 67, 67, 66, 66, 65, 65, 65, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62,
+ 62, 62, 61, 61, 61, 61, 71, 71, 71, 72, 72, 71, 70, 69, 67, 67, 66, 66,
+ 65, 65, 64, 64, 64, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61,
+ 61, 61, 70, 70, 70, 71, 71, 70, 69, 68, 67, 66, 66, 65, 65, 64, 64, 64,
+ 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 69, 69,
+ 70, 70, 70, 69, 69, 68, 67, 66, 65, 65, 64, 64, 64, 63, 63, 63, 63, 62,
+ 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 68, 68, 69, 69, 69, 69,
+ 68, 67, 66, 66, 65, 65, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62,
+ 61, 61, 61, 61, 61, 61, 61, 61, 67, 67, 68, 68, 69, 68, 67, 66, 66, 65,
+ 65, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61,
+ 61, 61, 61, 61, 66, 67, 67, 67, 68, 67, 67, 66, 65, 65, 64, 64, 63, 63,
+ 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+ 66, 66, 66, 67, 67, 67, 66, 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62,
+ 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 65, 65, 66, 66,
+ 66, 66, 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61,
+ 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 64, 65, 65, 65, 66, 65, 65, 65,
+ 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61,
+ 60, 60, 60, 60, 60, 60, 64, 64, 65, 65, 65, 65, 65, 64, 64, 64, 63, 63,
+ 63, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60,
+ 60, 60, 63, 64, 64, 64, 65, 64, 64, 64, 64, 63, 63, 63, 62, 62, 62, 62,
+ 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 63, 63,
+ 64, 64, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61,
+ 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 63, 63, 63, 63, 64, 64,
+ 63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 62, 63, 63, 63, 63, 63, 63, 63, 63, 62,
+ 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 62, 62, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 61,
+ 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 62, 62, 62, 62, 63, 63, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61,
+ 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 61,
+ 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61,
+ 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 58, 57, 55, 58, 56, 56, 55, 57, 56, 54, 53, 55, 55, 53, 53,
+ /* Size 8 */
+ 64, 68, 59, 59, 58, 57, 56, 55, 68, 62, 59, 60, 60, 59, 58, 57, 59, 59,
+ 58, 58, 58, 57, 57, 56, 59, 60, 58, 57, 56, 56, 56, 55, 58, 60, 58, 56,
+ 56, 55, 55, 55, 57, 59, 57, 56, 55, 55, 54, 54, 56, 58, 57, 56, 55, 54,
+ 54, 54, 55, 57, 56, 55, 55, 54, 54, 53,
+ /* Size 16 */
+ 64, 66, 68, 63, 59, 59, 59, 58, 58, 58, 57, 57, 56, 56, 55, 55, 66, 65,
+ 65, 62, 59, 59, 59, 59, 59, 58, 58, 57, 57, 56, 56, 56, 68, 65, 62, 60,
+ 59, 60, 60, 60, 60, 59, 59, 58, 58, 57, 57, 57, 63, 62, 60, 59, 58, 59,
+ 59, 59, 59, 58, 58, 58, 57, 57, 56, 56, 59, 59, 59, 58, 58, 58, 58, 58,
+ 58, 58, 57, 57, 57, 56, 56, 56, 59, 59, 60, 59, 58, 58, 57, 57, 57, 57,
+ 57, 57, 56, 56, 56, 56, 59, 59, 60, 59, 58, 57, 57, 57, 56, 56, 56, 56,
+ 56, 55, 55, 55, 58, 59, 60, 59, 58, 57, 57, 56, 56, 56, 56, 55, 55, 55,
+ 55, 55, 58, 59, 60, 59, 58, 57, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55,
+ 58, 58, 59, 58, 58, 57, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 57, 58,
+ 59, 58, 57, 57, 56, 56, 55, 55, 55, 54, 54, 54, 54, 54, 57, 57, 58, 58,
+ 57, 57, 56, 55, 55, 55, 54, 54, 54, 54, 54, 54, 56, 57, 58, 57, 57, 56,
+ 56, 55, 55, 55, 54, 54, 54, 54, 54, 54, 56, 56, 57, 57, 56, 56, 55, 55,
+ 55, 54, 54, 54, 54, 54, 53, 53, 55, 56, 57, 56, 56, 56, 55, 55, 55, 54,
+ 54, 54, 54, 53, 53, 53, 55, 56, 57, 56, 56, 56, 55, 55, 55, 54, 54, 54,
+ 54, 53, 53, 53,
+ /* Size 32 */
+ 64, 65, 66, 67, 68, 66, 63, 61, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58,
+ 58, 57, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 65, 65, 66, 66,
+ 66, 64, 63, 61, 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 57, 57,
+ 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 66, 66, 65, 65, 65, 63, 62, 61,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57,
+ 56, 56, 56, 56, 56, 56, 67, 66, 65, 64, 63, 62, 61, 60, 59, 59, 59, 60,
+ 60, 60, 60, 59, 59, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 56, 56,
+ 56, 56, 68, 66, 65, 63, 62, 61, 60, 60, 59, 59, 60, 60, 60, 60, 60, 60,
+ 60, 60, 59, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 57, 66, 64,
+ 63, 62, 61, 61, 60, 59, 59, 59, 59, 59, 60, 59, 59, 59, 59, 59, 59, 59,
+ 58, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 63, 63, 62, 61, 60, 60,
+ 59, 59, 58, 58, 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 57,
+ 57, 57, 57, 57, 56, 56, 56, 56, 61, 61, 61, 60, 60, 59, 59, 58, 58, 58,
+ 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 57, 57, 57, 57, 57, 56,
+ 56, 56, 56, 56, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58,
+ 58, 58, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56,
+ 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 57,
+ 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 59, 59, 59, 59,
+ 60, 59, 59, 58, 58, 58, 58, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57,
+ 57, 56, 56, 56, 56, 56, 56, 56, 56, 56, 59, 59, 59, 60, 60, 59, 59, 58,
+ 58, 58, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 56, 56, 56, 56, 56, 56,
+ 56, 56, 55, 55, 55, 55, 59, 59, 59, 60, 60, 60, 59, 58, 58, 58, 57, 57,
+ 57, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55,
+ 55, 55, 58, 59, 59, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 57, 56, 56,
+ 56, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 58, 59,
+ 59, 60, 60, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56,
+ 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 58, 59, 59, 59, 60, 59,
+ 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55,
+ 55, 55, 55, 55, 55, 55, 55, 55, 58, 58, 59, 59, 60, 59, 59, 58, 58, 58,
+ 57, 57, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55,
+ 55, 55, 55, 55, 58, 58, 59, 59, 60, 59, 59, 58, 58, 57, 57, 57, 56, 56,
+ 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54,
+ 58, 58, 58, 59, 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 55, 55,
+ 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 57, 58, 58, 59,
+ 59, 59, 58, 58, 58, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55,
+ 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 57, 57, 58, 58, 59, 58, 58, 58,
+ 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54,
+ 54, 54, 54, 54, 54, 54, 57, 57, 58, 58, 58, 58, 58, 58, 57, 57, 57, 56,
+ 56, 56, 56, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54,
+ 54, 54, 57, 57, 57, 58, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 55, 55,
+ 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 56, 57,
+ 57, 58, 58, 58, 57, 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55,
+ 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 56, 56, 57, 57, 58, 57,
+ 57, 57, 57, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54,
+ 54, 54, 54, 54, 54, 54, 54, 54, 56, 56, 57, 57, 57, 57, 57, 57, 57, 56,
+ 56, 56, 56, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54,
+ 53, 53, 53, 53, 56, 56, 56, 57, 57, 57, 57, 57, 56, 56, 56, 56, 55, 55,
+ 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53,
+ 55, 56, 56, 57, 57, 57, 57, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 54,
+ 54, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53, 53, 55, 56, 56, 56,
+ 57, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54,
+ 54, 54, 54, 53, 53, 53, 53, 53, 53, 53, 55, 56, 56, 56, 57, 56, 56, 56,
+ 56, 56, 56, 55, 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 53,
+ 53, 53, 53, 53, 53, 53, 55, 56, 56, 56, 57, 56, 56, 56, 56, 56, 56, 55,
+ 55, 55, 55, 55, 55, 54, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53,
+ 53, 53, 55, 56, 56, 56, 57, 56, 56, 56, 56, 56, 56, 55, 55, 55, 55, 55,
+ 55, 54, 54, 54, 54, 54, 54, 54, 54, 53, 53, 53, 53, 53, 53, 53 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 74, 67, 66, 63, 67, 65, 64, 63, 66, 64, 62, 61, 63, 63, 61, 60,
+ /* Size 8 */
+ 72, 77, 67, 66, 65, 64, 63, 62, 77, 70, 66, 68, 67, 66, 65, 64, 67, 66,
+ 65, 65, 65, 65, 64, 63, 66, 68, 65, 64, 63, 63, 63, 62, 65, 67, 65, 63,
+ 62, 62, 62, 61, 64, 66, 65, 63, 62, 61, 61, 60, 63, 65, 64, 63, 62, 61,
+ 60, 60, 62, 64, 63, 62, 61, 60, 60, 60,
+ /* Size 16 */
+ 73, 75, 77, 72, 67, 67, 66, 66, 66, 65, 64, 64, 63, 63, 62, 62, 75, 74,
+ 74, 70, 67, 67, 67, 67, 67, 66, 65, 65, 64, 64, 63, 63, 77, 74, 70, 69,
+ 67, 67, 68, 68, 68, 67, 67, 66, 65, 65, 64, 64, 72, 70, 69, 67, 66, 66,
+ 67, 67, 67, 66, 66, 65, 65, 64, 64, 64, 67, 67, 67, 66, 65, 65, 65, 65,
+ 65, 65, 65, 64, 64, 64, 63, 63, 67, 67, 67, 66, 65, 65, 65, 65, 65, 64,
+ 64, 64, 63, 63, 63, 63, 66, 67, 68, 67, 65, 65, 64, 64, 64, 64, 63, 63,
+ 63, 63, 62, 62, 66, 67, 68, 67, 65, 65, 64, 64, 63, 63, 63, 63, 62, 62,
+ 62, 62, 66, 67, 68, 67, 65, 65, 64, 63, 63, 62, 62, 62, 62, 62, 61, 61,
+ 65, 66, 67, 66, 65, 64, 64, 63, 62, 62, 62, 62, 61, 61, 61, 61, 64, 65,
+ 67, 66, 65, 64, 63, 63, 62, 62, 62, 61, 61, 61, 61, 61, 64, 65, 66, 65,
+ 64, 64, 63, 63, 62, 62, 61, 61, 61, 61, 60, 60, 63, 64, 65, 65, 64, 63,
+ 63, 62, 62, 61, 61, 61, 61, 60, 60, 60, 63, 64, 65, 64, 64, 63, 63, 62,
+ 62, 61, 61, 61, 60, 60, 60, 60, 62, 63, 64, 64, 63, 63, 62, 62, 61, 61,
+ 61, 60, 60, 60, 60, 60, 62, 63, 64, 64, 63, 63, 62, 62, 61, 61, 61, 60,
+ 60, 60, 60, 60,
+ /* Size 32 */
+ 73, 74, 75, 76, 77, 75, 72, 70, 67, 67, 67, 67, 66, 66, 66, 66, 66, 65,
+ 65, 65, 65, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 74, 74, 75, 75,
+ 76, 73, 71, 69, 67, 67, 67, 67, 67, 67, 67, 66, 66, 66, 66, 65, 65, 65,
+ 65, 64, 64, 64, 63, 63, 63, 63, 63, 63, 75, 75, 74, 74, 74, 72, 70, 69,
+ 67, 67, 67, 67, 67, 67, 67, 67, 67, 66, 66, 66, 66, 65, 65, 65, 64, 64,
+ 64, 64, 63, 63, 63, 63, 76, 75, 74, 73, 72, 71, 70, 68, 67, 67, 67, 68,
+ 68, 68, 68, 67, 67, 67, 67, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64,
+ 64, 64, 77, 76, 74, 72, 70, 70, 69, 68, 67, 67, 68, 68, 68, 68, 68, 68,
+ 68, 68, 67, 67, 67, 66, 66, 66, 65, 65, 65, 64, 64, 64, 64, 64, 75, 73,
+ 72, 71, 70, 69, 68, 67, 67, 67, 67, 67, 68, 67, 67, 67, 67, 67, 67, 67,
+ 66, 66, 66, 65, 65, 65, 64, 64, 64, 64, 64, 64, 72, 71, 70, 70, 69, 68,
+ 67, 67, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 66, 66, 66, 66, 65, 65,
+ 65, 65, 64, 64, 64, 64, 64, 64, 70, 69, 69, 68, 68, 67, 67, 66, 66, 66,
+ 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64,
+ 64, 64, 64, 64, 67, 67, 67, 67, 67, 67, 66, 66, 65, 65, 65, 65, 66, 66,
+ 66, 66, 66, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63,
+ 67, 67, 67, 67, 67, 67, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+ 65, 65, 65, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 67, 67, 67, 67,
+ 68, 67, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64,
+ 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 67, 67, 67, 68, 68, 67, 67, 66,
+ 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 66, 67, 67, 68, 68, 68, 67, 66, 66, 65, 65, 65,
+ 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62,
+ 62, 62, 66, 67, 67, 68, 68, 67, 67, 66, 66, 65, 65, 65, 64, 64, 64, 64,
+ 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 66, 67,
+ 67, 68, 68, 67, 67, 66, 66, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63,
+ 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 66, 66, 67, 67, 68, 67,
+ 67, 66, 66, 65, 65, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 66, 66, 67, 67, 68, 67, 67, 66, 66, 65,
+ 65, 64, 64, 64, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 65, 66, 66, 67, 68, 67, 67, 66, 65, 65, 65, 64, 64, 64,
+ 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61,
+ 65, 66, 66, 67, 67, 67, 66, 66, 65, 65, 64, 64, 64, 63, 63, 63, 63, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 65, 65, 66, 66,
+ 67, 67, 66, 66, 65, 65, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62,
+ 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 65, 65, 66, 66, 67, 66, 66, 65,
+ 65, 65, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61,
+ 61, 61, 61, 61, 61, 61, 64, 65, 65, 66, 66, 66, 66, 65, 65, 64, 64, 64,
+ 63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+ 61, 61, 64, 65, 65, 65, 66, 66, 65, 65, 65, 64, 64, 64, 63, 63, 63, 62,
+ 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 64, 64,
+ 65, 65, 66, 65, 65, 65, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62,
+ 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 63, 64, 64, 65, 65, 65,
+ 65, 64, 64, 64, 64, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61,
+ 61, 61, 61, 60, 60, 60, 60, 60, 63, 64, 64, 65, 65, 65, 65, 64, 64, 64,
+ 63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60,
+ 60, 60, 60, 60, 63, 63, 64, 64, 65, 64, 64, 64, 64, 64, 63, 63, 63, 62,
+ 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60,
+ 63, 63, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 61,
+ 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 62, 63, 63, 64,
+ 64, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61,
+ 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 62, 63, 63, 64, 64, 64, 64, 64,
+ 63, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60,
+ 60, 60, 60, 60, 60, 60, 62, 63, 63, 64, 64, 64, 64, 64, 63, 63, 63, 63,
+ 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60,
+ 60, 60, 62, 63, 63, 64, 64, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62,
+ 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 63, 60, 59, 63, 61, 59, 59, 60, 59, 58, 58, 59, 59, 58, 58,
+ /* Size 8 */
+ 64, 66, 66, 64, 62, 61, 60, 60, 66, 65, 65, 64, 63, 61, 61, 60, 66, 65,
+ 63, 62, 61, 61, 60, 60, 64, 64, 62, 61, 61, 60, 60, 59, 62, 63, 61, 61,
+ 60, 60, 59, 59, 61, 61, 61, 60, 60, 59, 59, 59, 60, 61, 60, 60, 59, 59,
+ 59, 59, 60, 60, 60, 59, 59, 59, 59, 59,
+ /* Size 16 */
+ 64, 65, 66, 66, 66, 65, 64, 63, 62, 61, 61, 60, 60, 60, 60, 60, 65, 65,
+ 66, 66, 66, 65, 64, 63, 62, 62, 61, 61, 60, 60, 60, 60, 66, 66, 65, 65,
+ 65, 65, 64, 63, 63, 62, 61, 61, 61, 60, 60, 60, 66, 66, 65, 65, 64, 64,
+ 63, 63, 62, 62, 61, 61, 60, 60, 60, 60, 66, 66, 65, 64, 63, 63, 62, 62,
+ 61, 61, 61, 60, 60, 60, 60, 60, 65, 65, 65, 64, 63, 62, 62, 61, 61, 61,
+ 60, 60, 60, 60, 60, 60, 64, 64, 64, 63, 62, 62, 61, 61, 61, 60, 60, 60,
+ 60, 60, 59, 59, 63, 63, 63, 63, 62, 61, 61, 61, 60, 60, 60, 60, 60, 59,
+ 59, 59, 62, 62, 63, 62, 61, 61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59,
+ 61, 62, 62, 62, 61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 61, 61,
+ 61, 61, 61, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 60, 61, 61, 61,
+ 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 60, 60, 61, 60, 60, 60,
+ 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59,
+ /* Size 32 */
+ 64, 65, 65, 66, 66, 66, 66, 66, 66, 65, 65, 64, 64, 63, 63, 62, 62, 62,
+ 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 65, 65, 65, 66,
+ 66, 66, 66, 66, 66, 65, 65, 64, 64, 63, 63, 62, 62, 62, 62, 61, 61, 61,
+ 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 65, 65, 65, 66, 66, 66, 66, 66,
+ 66, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62, 61, 61, 61, 61, 61, 60, 60,
+ 60, 60, 60, 60, 60, 60, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 64,
+ 64, 64, 63, 63, 62, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60,
+ 60, 60, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64, 64, 63, 63,
+ 63, 62, 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 66, 66,
+ 66, 65, 65, 65, 65, 65, 65, 65, 64, 64, 64, 63, 63, 63, 62, 62, 62, 62,
+ 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 66, 66, 66, 65, 65, 65,
+ 65, 65, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 61, 61, 61, 61, 61,
+ 60, 60, 60, 60, 60, 60, 60, 60, 66, 66, 66, 65, 65, 65, 65, 64, 64, 63,
+ 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 66, 66, 66, 65, 65, 65, 64, 64, 63, 63, 63, 62, 62, 62,
+ 62, 62, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 65, 65, 65, 65, 65, 65, 64, 63, 63, 63, 62, 62, 62, 62, 62, 61, 61, 61,
+ 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 65, 65, 65, 65,
+ 65, 64, 64, 63, 63, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 64, 64, 64, 64, 65, 64, 63, 63,
+ 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 64, 64, 64, 64, 64, 64, 63, 63, 62, 62, 62, 61,
+ 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59,
+ 59, 59, 63, 63, 63, 64, 64, 63, 63, 62, 62, 62, 61, 61, 61, 61, 61, 61,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 63, 63,
+ 63, 63, 63, 63, 63, 62, 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 62, 62, 63, 63, 63, 63,
+ 62, 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 59, 59, 59, 59, 59, 59, 59, 62, 62, 62, 62, 63, 62, 62, 62, 61, 61,
+ 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59,
+ 59, 59, 59, 59, 62, 62, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 61, 62, 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 61, 61, 61, 62,
+ 62, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 61, 61, 61, 61, 61, 61, 61, 61,
+ 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 61, 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 60, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60,
+ 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 61, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 69, 68, 65, 63, 68, 65, 64, 63, 65, 64, 62, 62, 63, 63, 62, 62,
+ /* Size 8 */
+ 67, 70, 69, 67, 65, 64, 63, 62, 70, 69, 69, 68, 66, 64, 63, 63, 69, 69,
+ 66, 65, 64, 64, 63, 63, 67, 68, 65, 64, 63, 63, 63, 62, 65, 66, 64, 63,
+ 63, 62, 62, 62, 64, 64, 64, 63, 62, 62, 62, 62, 63, 63, 63, 63, 62, 62,
+ 62, 62, 62, 63, 63, 62, 62, 62, 62, 62,
+ /* Size 16 */
+ 68, 69, 70, 70, 69, 68, 67, 66, 65, 65, 64, 64, 63, 63, 63, 63, 69, 69,
+ 69, 69, 69, 68, 67, 66, 66, 65, 64, 64, 63, 63, 63, 63, 70, 69, 69, 69,
+ 69, 68, 68, 67, 66, 65, 65, 64, 64, 63, 63, 63, 70, 69, 69, 68, 68, 67,
+ 67, 66, 65, 65, 64, 64, 63, 63, 63, 63, 69, 69, 69, 68, 66, 66, 65, 65,
+ 65, 64, 64, 64, 63, 63, 63, 63, 68, 68, 68, 67, 66, 65, 65, 65, 64, 64,
+ 64, 63, 63, 63, 63, 63, 67, 67, 68, 67, 65, 65, 64, 64, 64, 63, 63, 63,
+ 63, 63, 62, 62, 66, 66, 67, 66, 65, 65, 64, 64, 63, 63, 63, 63, 63, 62,
+ 62, 62, 65, 66, 66, 65, 65, 64, 64, 63, 63, 63, 63, 63, 62, 62, 62, 62,
+ 65, 65, 65, 65, 64, 64, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 64, 64,
+ 65, 64, 64, 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 64, 64, 64, 64,
+ 64, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 63, 63, 64, 63, 63, 63,
+ 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62,
+ /* Size 32 */
+ 68, 68, 69, 70, 70, 70, 70, 70, 69, 69, 68, 68, 67, 67, 66, 66, 65, 65,
+ 65, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 68, 69, 69, 69,
+ 70, 70, 70, 69, 69, 69, 68, 68, 67, 67, 66, 66, 65, 65, 65, 65, 64, 64,
+ 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 69, 69, 69, 69, 70, 69, 69, 69,
+ 69, 69, 68, 68, 67, 67, 67, 66, 66, 65, 65, 65, 64, 64, 64, 64, 63, 63,
+ 63, 63, 63, 63, 63, 63, 70, 69, 69, 69, 69, 69, 69, 69, 69, 69, 68, 68,
+ 68, 67, 67, 66, 66, 66, 65, 65, 65, 64, 64, 64, 64, 63, 63, 63, 63, 63,
+ 63, 63, 70, 70, 70, 69, 69, 69, 69, 69, 69, 69, 69, 68, 68, 67, 67, 67,
+ 66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 70, 70,
+ 69, 69, 69, 69, 69, 69, 69, 68, 68, 68, 67, 67, 67, 66, 66, 65, 65, 65,
+ 65, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 70, 70, 69, 69, 69, 69,
+ 68, 68, 68, 68, 67, 67, 67, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64,
+ 64, 63, 63, 63, 63, 63, 63, 63, 70, 69, 69, 69, 69, 69, 68, 68, 67, 67,
+ 67, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63,
+ 63, 63, 63, 63, 69, 69, 69, 69, 69, 69, 68, 67, 67, 66, 66, 66, 66, 65,
+ 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63,
+ 69, 69, 69, 69, 69, 68, 68, 67, 66, 66, 66, 66, 65, 65, 65, 65, 65, 64,
+ 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 68, 68, 68, 68,
+ 69, 68, 67, 67, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64,
+ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 68, 68, 68, 68, 68, 68, 67, 66,
+ 66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 67, 67, 67, 68, 68, 67, 67, 66, 66, 65, 65, 65,
+ 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 67, 67, 67, 67, 67, 67, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64,
+ 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 66, 66,
+ 67, 67, 67, 67, 66, 66, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 66, 66, 66, 66, 67, 66,
+ 66, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 62, 62, 62, 62, 62, 62, 65, 65, 66, 66, 66, 66, 65, 65, 65, 65,
+ 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62,
+ 62, 62, 62, 62, 65, 65, 65, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62,
+ 65, 65, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 64, 65, 65, 65,
+ 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 64, 64, 64, 65, 65, 65, 64, 64,
+ 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 64, 64, 64, 64, 65, 64, 64, 64, 64, 64, 64, 63,
+ 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 64,
+ 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 64, 64, 64,
+ 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 61, 60, 59, 61, 60, 60, 59, 60, 60, 59, 58, 59, 59, 58, 58,
+ /* Size 8 */
+ 64, 66, 61, 61, 61, 60, 60, 59, 66, 63, 61, 62, 62, 61, 61, 60, 61, 61,
+ 61, 61, 61, 61, 60, 60, 61, 62, 61, 60, 60, 60, 60, 59, 61, 62, 61, 60,
+ 60, 59, 59, 59, 60, 61, 61, 60, 59, 59, 59, 59, 60, 61, 60, 60, 59, 59,
+ 59, 59, 59, 60, 60, 59, 59, 59, 59, 58,
+ /* Size 16 */
+ 64, 65, 66, 64, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 59, 59, 65, 65,
+ 64, 63, 61, 61, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60, 66, 64, 63, 62,
+ 61, 62, 62, 62, 62, 62, 61, 61, 61, 60, 60, 60, 64, 63, 62, 62, 61, 61,
+ 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61,
+ 61, 61, 61, 60, 60, 60, 60, 60, 61, 61, 62, 61, 61, 61, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 61, 62, 62, 61, 61, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 59, 59, 61, 61, 62, 61, 61, 60, 60, 60, 60, 60, 60, 60, 59, 59,
+ 59, 59, 61, 61, 62, 61, 61, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59,
+ 61, 61, 62, 61, 61, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 60, 61,
+ 61, 61, 61, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 60, 61, 61, 61,
+ 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 61, 60, 60, 60,
+ 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 59,
+ 59, 59, 59, 59, 59, 59, 58, 58, 59, 60, 60, 60, 60, 60, 59, 59, 59, 59,
+ 59, 59, 59, 58, 58, 58, 59, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+ 59, 58, 58, 58,
+ /* Size 32 */
+ 64, 64, 65, 65, 66, 65, 64, 63, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+ 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 64, 65, 65, 65,
+ 65, 64, 63, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 65, 65, 65, 65, 64, 64, 63, 62,
+ 61, 61, 61, 62, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 65, 65, 65, 64, 64, 63, 63, 62, 61, 61, 62, 62,
+ 62, 62, 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60,
+ 60, 60, 66, 65, 64, 64, 63, 63, 62, 62, 61, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 65, 64,
+ 64, 63, 63, 62, 62, 62, 61, 61, 61, 62, 62, 62, 62, 62, 62, 61, 61, 61,
+ 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 64, 63, 63, 63, 62, 62,
+ 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+ 60, 60, 60, 60, 60, 60, 60, 60, 63, 62, 62, 62, 62, 62, 61, 61, 61, 61,
+ 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+ 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 61, 61, 61, 61, 62, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 62,
+ 62, 61, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 62, 62, 62, 62, 61, 61,
+ 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 59, 59, 59, 59, 61, 61, 62, 62, 62, 62, 61, 61, 61, 61, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59,
+ 59, 59, 61, 61, 61, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 61, 61,
+ 61, 62, 62, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 61, 61, 61, 62, 62, 62,
+ 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 61, 61, 61, 62, 62, 62, 61, 61, 61, 61,
+ 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 61, 61, 61, 61, 62, 61, 61, 61, 61, 61, 60, 60, 60, 60,
+ 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 61, 61, 61, 61, 62, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 61, 61, 61,
+ 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 61, 61, 61, 61, 61, 61, 61,
+ 61, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 60, 60, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60,
+ 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 60, 60, 61, 61, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60,
+ 60, 61, 61, 61, 61, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 61, 61,
+ 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 60, 61, 60, 60, 60, 60, 60,
+ 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 58, 58, 58, 58, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58,
+ 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 59, 60, 60, 60,
+ 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
+ 59, 59, 59, 58, 58, 58, 58, 58, 58, 58, 59, 60, 60, 60, 60, 60, 60, 60,
+ 60, 60, 60, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 58,
+ 58, 58, 58, 58, 58, 58, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58,
+ 58, 58, 59, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 59, 59, 59, 59, 59,
+ 59, 59, 59, 59, 59, 59, 59, 59, 59, 58, 58, 58, 58, 58, 58, 58 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 69, 65, 65, 64, 65, 64, 64, 63, 65, 64, 63, 62, 64, 63, 62, 62,
+ /* Size 8 */
+ 68, 70, 65, 65, 65, 64, 63, 63, 70, 67, 65, 66, 66, 65, 64, 64, 65, 65,
+ 64, 64, 65, 64, 64, 63, 65, 66, 64, 64, 64, 63, 63, 63, 65, 66, 65, 64,
+ 63, 63, 63, 63, 64, 65, 64, 63, 63, 63, 62, 62, 63, 64, 64, 63, 63, 62,
+ 62, 62, 63, 64, 63, 63, 63, 62, 62, 62,
+ /* Size 16 */
+ 68, 69, 70, 68, 65, 65, 65, 65, 65, 64, 64, 64, 64, 63, 63, 63, 69, 69,
+ 69, 67, 65, 65, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 70, 69, 67, 66,
+ 65, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 68, 67, 66, 66, 65, 65,
+ 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 65, 65, 65, 65, 64, 65, 65, 65,
+ 65, 65, 64, 64, 64, 64, 64, 64, 65, 65, 66, 65, 65, 64, 64, 64, 64, 64,
+ 64, 64, 64, 63, 63, 63, 65, 66, 66, 65, 65, 64, 64, 64, 64, 64, 64, 63,
+ 63, 63, 63, 63, 65, 65, 66, 65, 65, 64, 64, 64, 64, 63, 63, 63, 63, 63,
+ 63, 63, 65, 65, 66, 65, 65, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63,
+ 64, 65, 65, 65, 65, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 65,
+ 65, 65, 64, 64, 64, 63, 63, 63, 63, 63, 63, 62, 62, 62, 64, 64, 65, 65,
+ 64, 64, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 64, 64, 65, 64, 64, 64,
+ 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 63, 64, 64, 64, 64, 63, 63, 63,
+ 63, 63, 62, 62, 62, 62, 62, 62, 63, 64, 64, 64, 64, 63, 63, 63, 63, 63,
+ 62, 62, 62, 62, 62, 62, 63, 64, 64, 64, 64, 63, 63, 63, 63, 63, 62, 62,
+ 62, 62, 62, 62,
+ /* Size 32 */
+ 68, 69, 69, 70, 71, 69, 68, 67, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+ 65, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 69, 69, 69, 69,
+ 70, 69, 68, 67, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64, 64,
+ 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 69, 69, 69, 69, 69, 68, 67, 66,
+ 65, 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 70, 69, 69, 68, 68, 67, 67, 66, 65, 66, 66, 66,
+ 66, 66, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64,
+ 64, 64, 71, 70, 69, 68, 67, 67, 66, 66, 65, 66, 66, 66, 66, 66, 66, 66,
+ 66, 66, 66, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 69, 69,
+ 68, 67, 67, 66, 66, 66, 65, 65, 65, 66, 66, 66, 66, 66, 66, 65, 65, 65,
+ 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 68, 68, 67, 67, 66, 66,
+ 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 67, 67, 66, 66, 66, 66, 65, 65, 65, 65,
+ 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
+ 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 65, 65, 66, 66, 66, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 66, 66,
+ 66, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 65, 65, 66, 66, 66, 66, 65, 65,
+ 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 63, 63, 63, 63, 63, 63, 65, 65, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63,
+ 63, 63, 65, 65, 66, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 65, 65,
+ 65, 66, 66, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 65, 65, 65, 66, 66, 66,
+ 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, 65, 65, 65, 66, 66, 66, 65, 65, 65, 65,
+ 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 65, 65, 65, 65, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64,
+ 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+ 65, 65, 65, 65, 66, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64, 65, 65, 65,
+ 65, 65, 65, 65, 65, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 64, 64, 65, 65, 65, 65, 65, 65,
+ 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 62, 62, 62, 62, 62, 64, 64, 65, 65, 65, 65, 65, 65, 64, 64, 64, 64,
+ 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62,
+ 62, 62, 64, 64, 64, 65, 65, 65, 65, 64, 64, 64, 64, 64, 64, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 64, 64,
+ 64, 65, 65, 65, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 64, 64, 64, 64, 65, 64,
+ 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 64, 64,
+ 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 64, 64, 64, 64, 64, 64,
+ 64, 64, 63, 63, 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 62, 62, 62, 62, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
+ 62, 62, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62 } } },
+ { { /* Luma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 8 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 16 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64,
+ /* Size 32 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 8 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 16 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64,
+ /* Size 32 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 } },
+ { /* Chroma matrices */
+ { /* Inter matrices */
+ /* Size 4 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 8 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 16 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64,
+ /* Size 32 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
+ { /* Intra matrices */
+ /* Size 4 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 8 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ /* Size 16 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64,
+ /* Size 32 */
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 } } }
+};
+
+#endif
diff --git a/av1/common/quant_common.h b/av1/common/quant_common.h
new file mode 100644
index 0000000..6ceed49
--- /dev/null
+++ b/av1/common/quant_common.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_QUANT_COMMON_H_
+#define VP10_COMMON_QUANT_COMMON_H_
+
+#include "aom/vpx_codec.h"
+#include "av1/common/seg_common.h"
+#include "av1/common/enums.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MINQ 0
+#define MAXQ 255
+#define QINDEX_RANGE (MAXQ - MINQ + 1)
+#define QINDEX_BITS 8
+#if CONFIG_AOM_QM
+// Total number of QM sets stored
+#define QM_LEVEL_BITS 4
+#define NUM_QM_LEVELS (1 << QM_LEVEL_BITS)
+/* Offset into the list of QMs. Actual number of levels used is
+ (NUM_QM_LEVELS-AOM_QM_OFFSET)
+ Lower value of AOM_QM_OFFSET implies more heavily weighted matrices.*/
+#define DEFAULT_QM_FIRST (NUM_QM_LEVELS / 2)
+#define DEFAULT_QM_LAST (NUM_QM_LEVELS - 1)
+#endif
+
+struct VP10Common;
+
+int16_t vp10_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
+int16_t vp10_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
+
+int vp10_get_qindex(const struct segmentation *seg, int segment_id,
+ int base_qindex);
+#if CONFIG_AOM_QM
+// Reduce the large number of quantizers to a smaller number of levels for which
+// different matrices may be defined
+static inline int aom_get_qmlevel(int qindex, int first, int last) {
+ int qmlevel = (qindex * (last + 1 - first) + QINDEX_RANGE / 2) / QINDEX_RANGE;
+ qmlevel = VPXMIN(qmlevel + first, NUM_QM_LEVELS - 1);
+ return qmlevel;
+}
+void aom_qm_init(struct VP10Common *cm);
+qm_val_t *aom_iqmatrix(struct VP10Common *cm, int qindex, int comp,
+ int log2sizem2, int is_intra);
+qm_val_t *aom_qmatrix(struct VP10Common *cm, int qindex, int comp,
+ int log2sizem2, int is_intra);
+#endif
+
+#if CONFIG_NEW_QUANT
+
+#define QUANT_PROFILES 3
+#define QUANT_RANGES 2
+#define NUQ_KNOTS 3
+
+typedef tran_low_t dequant_val_type_nuq[NUQ_KNOTS + 1];
+typedef tran_low_t cuml_bins_type_nuq[NUQ_KNOTS];
+void vp10_get_dequant_val_nuq(int q, int qindex, int band, tran_low_t *dq,
+ tran_low_t *cuml_bins, int dq_off_index);
+tran_low_t vp10_dequant_abscoeff_nuq(int v, int q, const tran_low_t *dq);
+tran_low_t vp10_dequant_coeff_nuq(int v, int q, const tran_low_t *dq);
+
+static INLINE int get_dq_profile_from_ctx(int q_ctx) {
+ return VPXMIN(q_ctx, QUANT_PROFILES - 1);
+}
+#endif // CONFIG_NEW_QUANT
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_QUANT_COMMON_H_
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
new file mode 100644
index 0000000..0c3b93a
--- /dev/null
+++ b/av1/common/reconinter.c
@@ -0,0 +1,2044 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vpx_scale_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_config.h"
+
+#include "aom/vpx_integer.h"
+#include "aom_dsp/blend.h"
+
+#include "av1/common/blockd.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/reconintra.h"
+#if CONFIG_OBMC
+#include "av1/common/onyxc_int.h"
+#endif // CONFIG_OBMC
+#if CONFIG_GLOBAL_MOTION
+#include "av1/common/warped_motion.h"
+#endif // CONFIG_GLOBAL_MOTION
+
+#if CONFIG_EXT_INTER
+
+#define NSMOOTHERS 1
+static int get_masked_weight(int m, int smoothness) {
+#define SMOOTHER_LEN 32
+ static const uint8_t smoothfn[NSMOOTHERS][2 * SMOOTHER_LEN + 1] = { {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 2, 4, 7, 13, 21, 32, 43, 51, 57, 60, 62, 63, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64,
+ } };
+ if (m < -SMOOTHER_LEN)
+ return 0;
+ else if (m > SMOOTHER_LEN)
+ return (1 << WEDGE_WEIGHT_BITS);
+ else
+ return smoothfn[smoothness][m + SMOOTHER_LEN];
+}
+
+// [smoother][negative][direction]
+DECLARE_ALIGNED(
+ 16, static uint8_t,
+ wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE *
+ MASK_MASTER_SIZE]);
+
+DECLARE_ALIGNED(16, static uint8_t,
+ wedge_signflip_lookup[BLOCK_SIZES][MAX_WEDGE_TYPES]);
+
+// 3 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
+// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
+DECLARE_ALIGNED(16, static uint8_t,
+ wedge_mask_buf[2 * MAX_WEDGE_TYPES * 3 * MAX_WEDGE_SQUARE]);
+
+static wedge_masks_type wedge_masks[BLOCK_SIZES][2];
+
+// Some unused wedge codebooks left temporarily to facilitate experiments.
+// To be removed when setteld.
+static wedge_code_type wedge_codebook_8_hgtw[8] = {
+ { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
+ { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+ { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
+ { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
+};
+
+static wedge_code_type wedge_codebook_8_hltw[8] = {
+ { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
+ { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+ { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
+ { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
+};
+
+static wedge_code_type wedge_codebook_8_heqw[8] = {
+ { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
+ { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+ { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
+ { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
+};
+
+#if !USE_LARGE_WEDGE_CODEBOOK
+static const wedge_code_type wedge_codebook_16_hgtw[16] = {
+ { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
+ { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+ { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
+ { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
+ { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
+ { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
+ { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
+ { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
+};
+
+static const wedge_code_type wedge_codebook_16_hltw[16] = {
+ { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
+ { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+ { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 },
+ { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
+ { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
+ { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
+ { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
+ { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
+};
+
+static const wedge_code_type wedge_codebook_16_heqw[16] = {
+ { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
+ { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+ { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
+ { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
+ { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
+ { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
+ { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
+ { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
+};
+
+const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = {
+ { 0, NULL, NULL, 0, NULL },
+ { 0, NULL, NULL, 0, NULL },
+ { 0, NULL, NULL, 0, NULL },
+ { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[3], 0, wedge_masks[3] },
+ { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[4], 0, wedge_masks[4] },
+ { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[5], 0, wedge_masks[5] },
+ { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[6], 0, wedge_masks[6] },
+ { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[7], 0, wedge_masks[7] },
+ { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[8], 0, wedge_masks[8] },
+ { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[9], 0, wedge_masks[9] },
+ { 0, wedge_codebook_8_hgtw, wedge_signflip_lookup[10], 0, wedge_masks[10] },
+ { 0, wedge_codebook_8_hltw, wedge_signflip_lookup[11], 0, wedge_masks[11] },
+ { 0, wedge_codebook_8_heqw, wedge_signflip_lookup[12], 0, wedge_masks[12] },
+#if CONFIG_EXT_PARTITION
+ { 0, NULL, NULL, 0, NULL },
+ { 0, NULL, NULL, 0, NULL },
+ { 0, NULL, NULL, 0, NULL },
+#endif // CONFIG_EXT_PARTITION
+};
+
+#else
+
+static const wedge_code_type wedge_codebook_32_hgtw[32] = {
+ { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
+ { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+ { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
+ { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
+ { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 },
+ { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 },
+ { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 },
+ { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
+ { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
+ { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
+ { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 },
+ { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 },
+ { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 },
+ { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
+ { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
+ { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
+};
+
+static const wedge_code_type wedge_codebook_32_hltw[32] = {
+ { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
+ { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+ { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 },
+ { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
+ { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 },
+ { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 },
+ { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 },
+ { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
+ { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
+ { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
+ { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 },
+ { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 },
+ { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 },
+ { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
+ { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
+ { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
+};
+
+static const wedge_code_type wedge_codebook_32_heqw[32] = {
+ { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
+ { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+ { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
+ { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
+ { WEDGE_OBLIQUE27, 4, 1 }, { WEDGE_OBLIQUE27, 4, 2 },
+ { WEDGE_OBLIQUE27, 4, 3 }, { WEDGE_OBLIQUE27, 4, 5 },
+ { WEDGE_OBLIQUE27, 4, 6 }, { WEDGE_OBLIQUE27, 4, 7 },
+ { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
+ { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
+ { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
+ { WEDGE_OBLIQUE63, 1, 4 }, { WEDGE_OBLIQUE63, 2, 4 },
+ { WEDGE_OBLIQUE63, 3, 4 }, { WEDGE_OBLIQUE63, 5, 4 },
+ { WEDGE_OBLIQUE63, 6, 4 }, { WEDGE_OBLIQUE63, 7, 4 },
+ { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
+ { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
+ { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
+};
+
+const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = {
+ { 0, NULL, NULL, 0, NULL },
+ { 0, NULL, NULL, 0, NULL },
+ { 0, NULL, NULL, 0, NULL },
+ { 5, wedge_codebook_32_heqw, wedge_signflip_lookup[3], 0, wedge_masks[3] },
+ { 5, wedge_codebook_32_hgtw, wedge_signflip_lookup[4], 0, wedge_masks[4] },
+ { 5, wedge_codebook_32_hltw, wedge_signflip_lookup[5], 0, wedge_masks[5] },
+ { 5, wedge_codebook_32_heqw, wedge_signflip_lookup[6], 0, wedge_masks[6] },
+ { 5, wedge_codebook_32_hgtw, wedge_signflip_lookup[7], 0, wedge_masks[7] },
+ { 5, wedge_codebook_32_hltw, wedge_signflip_lookup[8], 0, wedge_masks[8] },
+ { 5, wedge_codebook_32_heqw, wedge_signflip_lookup[9], 0, wedge_masks[9] },
+ { 0, wedge_codebook_8_hgtw, wedge_signflip_lookup[10], 0, wedge_masks[10] },
+ { 0, wedge_codebook_8_hltw, wedge_signflip_lookup[11], 0, wedge_masks[11] },
+ { 0, wedge_codebook_8_heqw, wedge_signflip_lookup[12], 0, wedge_masks[12] },
+#if CONFIG_EXT_PARTITION
+ { 0, NULL, NULL, 0, NULL },
+ { 0, NULL, NULL, 0, NULL },
+ { 0, NULL, NULL, 0, NULL },
+#endif // CONFIG_EXT_PARTITION
+};
+#endif // USE_LARGE_WEDGE_CODEBOOK
+
+static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
+ BLOCK_SIZE sb_type) {
+ const uint8_t *master;
+ const int bh = 4 << b_height_log2_lookup[sb_type];
+ const int bw = 4 << b_width_log2_lookup[sb_type];
+ const wedge_code_type *a =
+ wedge_params_lookup[sb_type].codebook + wedge_index;
+ const int smoother = wedge_params_lookup[sb_type].smoother;
+ int woff, hoff;
+ const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];
+
+ assert(wedge_index >= 0 &&
+ wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
+ woff = (a->x_offset * bw) >> 3;
+ hoff = (a->y_offset * bh) >> 3;
+ master = wedge_mask_obl[smoother][neg ^ wsignflip][a->direction] +
+ MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
+ MASK_MASTER_SIZE / 2 - woff;
+ return master;
+}
+
+const uint8_t *vp10_get_soft_mask(int wedge_index, int wedge_sign,
+ BLOCK_SIZE sb_type, int offset_x,
+ int offset_y) {
+ const uint8_t *mask =
+ get_wedge_mask_inplace(wedge_index, wedge_sign, sb_type);
+ if (mask) mask -= (offset_x + offset_y * MASK_MASTER_STRIDE);
+ return mask;
+}
+
+static void init_wedge_master_masks() {
+ int i, j, s;
+ const int w = MASK_MASTER_SIZE;
+ const int h = MASK_MASTER_SIZE;
+ const int stride = MASK_MASTER_STRIDE;
+ const int a[2] = { 2, 1 };
+ const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
+ for (s = 0; s < NSMOOTHERS; s++) {
+ for (i = 0; i < h; ++i)
+ for (j = 0; j < w; ++j) {
+ int x = (2 * j + 1 - w);
+ int y = (2 * i + 1 - h);
+ int m = (int)rint((a[0] * x + a[1] * y) / asqrt);
+ wedge_mask_obl[s][1][WEDGE_OBLIQUE63][i * stride + j] =
+ wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] =
+ get_masked_weight(m, s);
+ wedge_mask_obl[s][1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
+ wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
+ (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s);
+ wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j] =
+ wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] =
+ (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(m, s);
+ wedge_mask_obl[s][0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
+ wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
+ get_masked_weight(m, s);
+ wedge_mask_obl[s][1][WEDGE_VERTICAL][i * stride + j] =
+ wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] =
+ get_masked_weight(x, s);
+ wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j] =
+ wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] =
+ (1 << WEDGE_WEIGHT_BITS) - get_masked_weight(x, s);
+ }
+ }
+}
+
+// If the signs for the wedges for various blocksizes are
+// inconsistent flip the sign flag. Do it only once for every
+// wedge codebook.
+static void init_wedge_signs() {
+ BLOCK_SIZE sb_type;
+ memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
+ for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES; ++sb_type) {
+ const int bw = 4 * num_4x4_blocks_wide_lookup[sb_type];
+ const int bh = 4 * num_4x4_blocks_high_lookup[sb_type];
+ const wedge_params_type wedge_params = wedge_params_lookup[sb_type];
+ const int wbits = wedge_params.bits;
+ const int wtypes = 1 << wbits;
+ int i, w;
+ if (wbits == 0) continue;
+ for (w = 0; w < wtypes; ++w) {
+ const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
+ int sum = 0;
+ for (i = 0; i < bw; ++i) sum += mask[i];
+ for (i = 0; i < bh; ++i) sum += mask[i * MASK_MASTER_STRIDE];
+ sum = (sum + (bw + bh) / 2) / (bw + bh);
+ wedge_params.signflip[w] = (sum < 32);
+ }
+ }
+}
+
+static void init_wedge_masks() {
+ uint8_t *dst = wedge_mask_buf;
+ BLOCK_SIZE bsize;
+ memset(wedge_masks, 0, sizeof(wedge_masks));
+ for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES; ++bsize) {
+ const uint8_t *mask;
+ const int bw = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const int bh = 4 * num_4x4_blocks_high_lookup[bsize];
+ const wedge_params_type *wedge_params = &wedge_params_lookup[bsize];
+ const int wbits = wedge_params->bits;
+ const int wtypes = 1 << wbits;
+ int w;
+ if (wbits == 0) continue;
+ for (w = 0; w < wtypes; ++w) {
+ mask = get_wedge_mask_inplace(w, 0, bsize);
+ vpx_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
+ bh);
+ wedge_params->masks[0][w] = dst;
+ dst += bw * bh;
+
+ mask = get_wedge_mask_inplace(w, 1, bsize);
+ vpx_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
+ bh);
+ wedge_params->masks[1][w] = dst;
+ dst += bw * bh;
+ }
+ assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
+ }
+}
+
+// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
+void vp10_init_wedge_masks() {
+ init_wedge_master_masks();
+ init_wedge_signs();
+ init_wedge_masks();
+}
+
+#if CONFIG_SUPERTX
+static void build_masked_compound_wedge_extend(
+ uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
+ const uint8_t *src1, int src1_stride, int wedge_index, int wedge_sign,
+ BLOCK_SIZE sb_type, int wedge_offset_x, int wedge_offset_y, int h, int w) {
+ const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
+ const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
+ const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign, sb_type,
+ wedge_offset_x, wedge_offset_y);
+ vpx_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
+ mask, MASK_MASTER_STRIDE, h, w, subh, subw);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void build_masked_compound_wedge_extend_highbd(
+ uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
+ const uint8_t *src1_8, int src1_stride, int wedge_index, int wedge_sign,
+ BLOCK_SIZE sb_type, int wedge_offset_x, int wedge_offset_y, int h, int w,
+ int bd) {
+ const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
+ const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
+ const uint8_t *mask = vp10_get_soft_mask(wedge_index, wedge_sign, sb_type,
+ wedge_offset_x, wedge_offset_y);
+ vpx_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
+ src1_stride, mask, MASK_MASTER_STRIDE, h, w, subh,
+ subw, bd);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_SUPERTX
+
+static void build_masked_compound_wedge(uint8_t *dst, int dst_stride,
+ const uint8_t *src0, int src0_stride,
+ const uint8_t *src1, int src1_stride,
+ int wedge_index, int wedge_sign,
+ BLOCK_SIZE sb_type, int h, int w) {
+ // Derive subsampling from h and w passed in. May be refactored to
+ // pass in subsampling factors directly.
+ const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
+ const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
+ const uint8_t *mask =
+ vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
+ vpx_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
+ mask, 4 * num_4x4_blocks_wide_lookup[sb_type], h, w, subh,
+ subw);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void build_masked_compound_wedge_highbd(
+ uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
+ const uint8_t *src1_8, int src1_stride, int wedge_index, int wedge_sign,
+ BLOCK_SIZE sb_type, int h, int w, int bd) {
+ // Derive subsampling from h and w passed in. May be refactored to
+ // pass in subsampling factors directly.
+ const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
+ const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
+ const uint8_t *mask =
+ vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
+ vpx_highbd_blend_a64_mask(
+ dst_8, dst_stride, src0_8, src0_stride, src1_8, src1_stride, mask,
+ 4 * num_4x4_blocks_wide_lookup[sb_type], h, w, subh, subw, bd);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
+ uint8_t *dst, int dst_stride,
+ const int subpel_x, const int subpel_y,
+ const struct scale_factors *sf, int w,
+ int h,
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER *interp_filter,
+#else
+ const INTERP_FILTER interp_filter,
+#endif
+ int xs, int ys,
+#if CONFIG_SUPERTX
+ int wedge_offset_x, int wedge_offset_y,
+#endif // CONFIG_SUPERTX
+ const MACROBLOCKD *xd) {
+ const MODE_INFO *mi = xd->mi[0];
+// The prediction filter types used here should be those for
+// the second reference block.
+#if CONFIG_DUAL_FILTER
+ INTERP_FILTER tmp_ipf[4] = {
+ interp_filter[2], interp_filter[3], interp_filter[2], interp_filter[3],
+ };
+#else
+ INTERP_FILTER tmp_ipf = interp_filter;
+#endif // CONFIG_DUAL_FILTER
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]);
+ uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ ? CONVERT_TO_BYTEPTR(tmp_dst_)
+ : tmp_dst_;
+ vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
+ subpel_y, sf, w, h, 0, tmp_ipf, xs, ys, xd);
+#if CONFIG_SUPERTX
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ build_masked_compound_wedge_extend_highbd(
+ dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+ mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_sign,
+ mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd);
+ else
+ build_masked_compound_wedge_extend(
+ dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+ mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_sign,
+ mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w);
+#else
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ build_masked_compound_wedge_highbd(
+ dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+ mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_sign,
+ mi->mbmi.sb_type, h, w, xd->bd);
+ else
+ build_masked_compound_wedge(dst, dst_stride, dst, dst_stride, tmp_dst,
+ MAX_SB_SIZE, mi->mbmi.interinter_wedge_index,
+ mi->mbmi.interinter_wedge_sign,
+ mi->mbmi.sb_type, h, w);
+#endif // CONFIG_SUPERTX
+#else // CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]);
+ vp10_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
+ subpel_y, sf, w, h, 0, tmp_ipf, xs, ys, xd);
+#if CONFIG_SUPERTX
+ build_masked_compound_wedge_extend(
+ dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
+ mi->mbmi.interinter_wedge_index, mi->mbmi.interinter_wedge_sign,
+ mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w);
+#else
+ build_masked_compound_wedge(dst, dst_stride, dst, dst_stride, tmp_dst,
+ MAX_SB_SIZE, mi->mbmi.interinter_wedge_index,
+ mi->mbmi.interinter_wedge_sign, mi->mbmi.sb_type,
+ h, w);
+#endif // CONFIG_SUPERTX
+#endif // CONFIG_VP9_HIGHBITDEPTH
+}
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_build_inter_predictor(
+ const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
+ const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER *interp_filter,
+#else
+ const INTERP_FILTER interp_filter,
+#endif
+ enum mv_precision precision, int x, int y, int bd) {
+ const int is_q4 = precision == MV_PRECISION_Q4;
+ const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
+ is_q4 ? src_mv->col : src_mv->col * 2 };
+ MV32 mv = vp10_scale_mv(&mv_q4, x, y, sf);
+ const int subpel_x = mv.col & SUBPEL_MASK;
+ const int subpel_y = mv.row & SUBPEL_MASK;
+
+ src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
+
+ highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
+ sf, w, h, ref, interp_filter, sf->x_step_q4,
+ sf->y_step_q4, bd);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_build_inter_predictor(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride, const MV *src_mv,
+ const struct scale_factors *sf, int w, int h,
+ int ref,
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER *interp_filter,
+#else
+ const INTERP_FILTER interp_filter,
+#endif
+ enum mv_precision precision, int x, int y) {
+ const int is_q4 = precision == MV_PRECISION_Q4;
+ const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
+ is_q4 ? src_mv->col : src_mv->col * 2 };
+ MV32 mv = vp10_scale_mv(&mv_q4, x, y, sf);
+ const int subpel_x = mv.col & SUBPEL_MASK;
+ const int subpel_y = mv.row & SUBPEL_MASK;
+
+ src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
+
+ inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
+ h, ref, interp_filter, sf->x_step_q4, sf->y_step_q4);
+}
+
+void build_inter_predictors(MACROBLOCKD *xd, int plane,
+#if CONFIG_OBMC
+ int mi_col_offset, int mi_row_offset,
+#endif // CONFIG_OBMC
+ int block, int bw, int bh, int x, int y, int w,
+ int h,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+ int wedge_offset_x, int wedge_offset_y,
+#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
+ int mi_x, int mi_y) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+#if CONFIG_OBMC
+ const MODE_INFO *mi = xd->mi[mi_col_offset + xd->mi_stride * mi_row_offset];
+#else
+ const MODE_INFO *mi = xd->mi[0];
+#endif // CONFIG_OBMC
+ const int is_compound = has_second_ref(&mi->mbmi);
+ int ref;
+#if CONFIG_GLOBAL_MOTION
+ Global_Motion_Params *gm[2];
+ int is_global[2];
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ gm[ref] = &xd->global_motion[mi->mbmi.ref_frame[ref]];
+ is_global[ref] =
+ (get_y_mode(mi, block) == ZEROMV && get_gmtype(gm[ref]) > GLOBAL_ZERO);
+ }
+ // TODO(sarahparker) remove these once gm works with all experiments
+ (void)gm;
+ (void)is_global;
+#endif // CONFIG_GLOBAL_MOTION
+
+// TODO(sarahparker) enable the use of DUAL_FILTER in warped motion functions
+// in order to allow GLOBAL_MOTION and DUAL_FILTER to work together
+#if CONFIG_DUAL_FILTER
+ if (mi->mbmi.sb_type < BLOCK_8X8 && plane > 0) {
+ // block size in log2
+ const int b4_wl = b_width_log2_lookup[mi->mbmi.sb_type];
+ const int b4_hl = b_height_log2_lookup[mi->mbmi.sb_type];
+ const int b8_sl = b_width_log2_lookup[BLOCK_8X8];
+
+ // block size
+ const int b4_w = 1 << b4_wl;
+ const int b4_h = 1 << b4_hl;
+ const int b8_s = 1 << b8_sl;
+ int idx, idy;
+
+ const int x_base = x;
+ const int y_base = y;
+
+ // processing unit size
+ const int x_step = w >> (b8_sl - b4_wl);
+ const int y_step = h >> (b8_sl - b4_hl);
+
+ for (idy = 0; idy < b8_s; idy += b4_h) {
+ for (idx = 0; idx < b8_s; idx += b4_w) {
+ const int chr_idx = (idy * 2) + idx;
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+ struct buf_2d *const pre_buf = &pd->pre[ref];
+ struct buf_2d *const dst_buf = &pd->dst;
+ uint8_t *dst = dst_buf->buf;
+ const MV mv = mi->bmi[chr_idx].as_mv[ref].as_mv;
+ const MV mv_q4 = clamp_mv_to_umv_border_sb(
+ xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
+ uint8_t *pre;
+ MV32 scaled_mv;
+ int xs, ys, subpel_x, subpel_y;
+ const int is_scaled = vp10_is_scaled(sf);
+
+ x = x_base + idx * x_step;
+ y = y_base + idy * y_step;
+
+ dst += dst_buf->stride * y + x;
+
+ if (is_scaled) {
+ pre =
+ pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
+ scaled_mv = vp10_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+ xs = sf->x_step_q4;
+ ys = sf->y_step_q4;
+ } else {
+ pre = pre_buf->buf + y * pre_buf->stride + x;
+ scaled_mv.row = mv_q4.row;
+ scaled_mv.col = mv_q4.col;
+ xs = ys = 16;
+ }
+
+ subpel_x = scaled_mv.col & SUBPEL_MASK;
+ subpel_y = scaled_mv.row & SUBPEL_MASK;
+ pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride +
+ (scaled_mv.col >> SUBPEL_BITS);
+
+#if CONFIG_EXT_INTER
+ if (ref && is_interinter_wedge_used(mi->mbmi.sb_type) &&
+ mi->mbmi.use_wedge_interinter)
+ vp10_make_masked_inter_predictor(
+ pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
+ sf, w, h, mi->mbmi.interp_filter, xs, ys,
+#if CONFIG_SUPERTX
+ wedge_offset_x, wedge_offset_y,
+#endif // CONFIG_SUPERTX
+ xd);
+ else
+#endif // CONFIG_EXT_INTER
+ vp10_make_inter_predictor(
+ pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
+ sf, x_step, y_step, ref, mi->mbmi.interp_filter, xs, ys, xd);
+ }
+ }
+ }
+ return;
+ }
+#endif
+
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+ struct buf_2d *const pre_buf = &pd->pre[ref];
+ struct buf_2d *const dst_buf = &pd->dst;
+ uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
+ const MV mv = mi->mbmi.sb_type < BLOCK_8X8
+ ? average_split_mvs(pd, mi, ref, block)
+ : mi->mbmi.mv[ref].as_mv;
+
+ // TODO(jkoleszar): This clamping is done in the incorrect place for the
+ // scaling case. It needs to be done on the scaled MV, not the pre-scaling
+ // MV. Note however that it performs the subsampling aware scaling so
+ // that the result is always q4.
+ // mv_precision precision is MV_PRECISION_Q4.
+ const MV mv_q4 = clamp_mv_to_umv_border_sb(
+ xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
+
+ uint8_t *pre;
+ MV32 scaled_mv;
+ int xs, ys, subpel_x, subpel_y;
+ const int is_scaled = vp10_is_scaled(sf);
+
+ if (is_scaled) {
+ pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
+ scaled_mv = vp10_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+ xs = sf->x_step_q4;
+ ys = sf->y_step_q4;
+ } else {
+ pre = pre_buf->buf + (y * pre_buf->stride + x);
+ scaled_mv.row = mv_q4.row;
+ scaled_mv.col = mv_q4.col;
+ xs = ys = 16;
+ }
+
+ subpel_x = scaled_mv.col & SUBPEL_MASK;
+ subpel_y = scaled_mv.row & SUBPEL_MASK;
+ pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride +
+ (scaled_mv.col >> SUBPEL_BITS);
+
+#if CONFIG_EXT_INTER
+ if (ref && is_interinter_wedge_used(mi->mbmi.sb_type) &&
+ mi->mbmi.use_wedge_interinter)
+ vp10_make_masked_inter_predictor(pre, pre_buf->stride, dst,
+ dst_buf->stride, subpel_x, subpel_y, sf,
+ w, h, mi->mbmi.interp_filter, xs, ys,
+#if CONFIG_SUPERTX
+ wedge_offset_x, wedge_offset_y,
+#endif // CONFIG_SUPERTX
+ xd);
+ else
+#else // CONFIG_EXT_INTER
+#if CONFIG_GLOBAL_MOTION
+ if (is_global[ref])
+ vp10_warp_plane(&(gm[ref]->motion_params),
+#if CONFIG_VP9_HIGHBITDEPTH
+ xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ pre_buf->buf0, pre_buf->width, pre_buf->height,
+ pre_buf->stride, dst, (mi_x >> pd->subsampling_x) + x,
+ (mi_y >> pd->subsampling_y) + y, w, h, dst_buf->stride,
+ pd->subsampling_x, pd->subsampling_y, xs, ys);
+ else
+#endif // CONFIG_GLOBAL_MOTION
+#endif // CONFIG_EXT_INTER
+ vp10_make_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
+ subpel_x, subpel_y, sf, w, h, ref,
+ mi->mbmi.interp_filter, xs, ys, xd);
+ }
+}
+
+void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, int i,
+ int ir, int ic, int mi_row, int mi_col) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ MODE_INFO *const mi = xd->mi[0];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
+ const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
+
+ uint8_t *const dst = &pd->dst.buf[(ir * pd->dst.stride + ic) << 2];
+ int ref;
+ const int is_compound = has_second_ref(&mi->mbmi);
+
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ const uint8_t *pre =
+ &pd->pre[ref].buf[(ir * pd->pre[ref].stride + ic) << 2];
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp10_highbd_build_inter_predictor(
+ pre, pd->pre[ref].stride, dst, pd->dst.stride,
+ &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height,
+ ref, mi->mbmi.interp_filter, MV_PRECISION_Q3,
+ mi_col * MI_SIZE + 4 * ic, mi_row * MI_SIZE + 4 * ir, xd->bd);
+ } else {
+ vp10_build_inter_predictor(
+ pre, pd->pre[ref].stride, dst, pd->dst.stride,
+ &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height,
+ ref, mi->mbmi.interp_filter, MV_PRECISION_Q3,
+ mi_col * MI_SIZE + 4 * ic, mi_row * MI_SIZE + 4 * ir);
+ }
+#else
+ vp10_build_inter_predictor(
+ pre, pd->pre[ref].stride, dst, pd->dst.stride,
+ &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height,
+ ref, mi->mbmi.interp_filter, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic,
+ mi_row * MI_SIZE + 4 * ir);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+}
+
+static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize,
+ int mi_row, int mi_col,
+ int plane_from, int plane_to) {
+ int plane;
+ const int mi_x = mi_col * MI_SIZE;
+ const int mi_y = mi_row * MI_SIZE;
+ for (plane = plane_from; plane <= plane_to; ++plane) {
+ const struct macroblockd_plane *pd = &xd->plane[plane];
+ const int bw = 4 * num_4x4_blocks_wide_lookup[bsize] >> pd->subsampling_x;
+ const int bh = 4 * num_4x4_blocks_high_lookup[bsize] >> pd->subsampling_y;
+
+ if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+ const PARTITION_TYPE bp = bsize - xd->mi[0]->mbmi.sb_type;
+ const int have_vsplit = bp != PARTITION_HORZ;
+ const int have_hsplit = bp != PARTITION_VERT;
+ const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+ const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+ const int pw = 8 >> (have_vsplit | pd->subsampling_x);
+ const int ph = 8 >> (have_hsplit | pd->subsampling_y);
+ int x, y;
+ assert(bp != PARTITION_NONE && bp < PARTITION_TYPES);
+ assert(bsize == BLOCK_8X8);
+ assert(pw * num_4x4_w == bw && ph * num_4x4_h == bh);
+ for (y = 0; y < num_4x4_h; ++y)
+ for (x = 0; x < num_4x4_w; ++x)
+ build_inter_predictors(xd, plane,
+#if CONFIG_OBMC
+ 0, 0,
+#endif // CONFIG_OBMC
+ y * 2 + x, bw, bh, 4 * x, 4 * y, pw, ph,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+ 0, 0,
+#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
+ mi_x, mi_y);
+ } else {
+ build_inter_predictors(xd, plane,
+#if CONFIG_OBMC
+ 0, 0,
+#endif // CONFIG_OBMC
+ 0, bw, bh, 0, 0, bw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+ 0, 0,
+#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
+ mi_x, mi_y);
+ }
+ }
+}
+
+void vp10_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, 0);
+#if CONFIG_EXT_INTER
+ if (is_interintra_pred(&xd->mi[0]->mbmi))
+ vp10_build_interintra_predictors_sby(xd, xd->plane[0].dst.buf,
+ xd->plane[0].dst.stride, bsize);
+#endif // CONFIG_EXT_INTER
+}
+
+void vp10_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int plane) {
+ build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, plane, plane);
+#if CONFIG_EXT_INTER
+ if (is_interintra_pred(&xd->mi[0]->mbmi)) {
+ if (plane == 0) {
+ vp10_build_interintra_predictors_sby(xd, xd->plane[0].dst.buf,
+ xd->plane[0].dst.stride, bsize);
+ } else {
+ vp10_build_interintra_predictors_sbc(xd, xd->plane[plane].dst.buf,
+ xd->plane[plane].dst.stride, plane,
+ bsize);
+ }
+ }
+#endif // CONFIG_EXT_INTER
+}
+
+void vp10_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 1,
+ MAX_MB_PLANE - 1);
+#if CONFIG_EXT_INTER
+ if (is_interintra_pred(&xd->mi[0]->mbmi))
+ vp10_build_interintra_predictors_sbuv(
+ xd, xd->plane[1].dst.buf, xd->plane[2].dst.buf, xd->plane[1].dst.stride,
+ xd->plane[2].dst.stride, bsize);
+#endif // CONFIG_EXT_INTER
+}
+
+void vp10_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0,
+ MAX_MB_PLANE - 1);
+#if CONFIG_EXT_INTER
+ if (is_interintra_pred(&xd->mi[0]->mbmi))
+ vp10_build_interintra_predictors(
+ xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf,
+ xd->plane[0].dst.stride, xd->plane[1].dst.stride,
+ xd->plane[2].dst.stride, bsize);
+#endif // CONFIG_EXT_INTER
+}
+
+void vp10_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
+ const YV12_BUFFER_CONFIG *src, int mi_row,
+ int mi_col) {
+ uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
+ src->v_buffer };
+ const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
+ src->uv_crop_width };
+ const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
+ src->uv_crop_height };
+ const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
+ src->uv_stride };
+ int i;
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ struct macroblockd_plane *const pd = &planes[i];
+ setup_pred_plane(&pd->dst, buffers[i], widths[i], heights[i], strides[i],
+ mi_row, mi_col, NULL, pd->subsampling_x,
+ pd->subsampling_y);
+ }
+}
+
+void vp10_setup_pre_planes(MACROBLOCKD *xd, int idx,
+ const YV12_BUFFER_CONFIG *src, int mi_row,
+ int mi_col, const struct scale_factors *sf) {
+ if (src != NULL) {
+ int i;
+ uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
+ src->v_buffer };
+ const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
+ src->uv_crop_width };
+ const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
+ src->uv_crop_height };
+ const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
+ src->uv_stride };
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ struct macroblockd_plane *const pd = &xd->plane[i];
+ setup_pred_plane(&pd->pre[idx], buffers[i], widths[i], heights[i],
+ strides[i], mi_row, mi_col, sf, pd->subsampling_x,
+ pd->subsampling_y);
+ }
+ }
+}
+
+#if CONFIG_SUPERTX
+static const uint8_t mask_8[8] = { 64, 64, 62, 52, 12, 2, 0, 0 };
+
+static const uint8_t mask_16[16] = { 63, 62, 60, 58, 55, 50, 43, 36,
+ 28, 21, 14, 9, 6, 4, 2, 1 };
+
+static const uint8_t mask_32[32] = { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63,
+ 61, 57, 52, 45, 36, 28, 19, 12, 7, 3, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+static const uint8_t mask_8_uv[8] = { 64, 64, 62, 52, 12, 2, 0, 0 };
+
+static const uint8_t mask_16_uv[16] = { 64, 64, 64, 64, 61, 53, 45, 36,
+ 28, 19, 11, 3, 0, 0, 0, 0 };
+
+static const uint8_t mask_32_uv[32] = { 64, 64, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 64, 60, 54, 46, 36,
+ 28, 18, 10, 4, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+
+static const uint8_t *get_supertx_mask(int length, int plane) {
+ switch (length) {
+ case 8: return plane ? mask_8_uv : mask_8;
+ case 16: return plane ? mask_16_uv : mask_16;
+ case 32: return plane ? mask_32_uv : mask_32;
+ default: assert(0);
+ }
+ return NULL;
+}
+
+void vp10_build_masked_inter_predictor_complex(
+ MACROBLOCKD *xd, uint8_t *dst, int dst_stride, const uint8_t *pre,
+ int pre_stride, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori,
+ BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, PARTITION_TYPE partition,
+ int plane) {
+ const struct macroblockd_plane *pd = &xd->plane[plane];
+ const int ssx = pd->subsampling_x;
+ const int ssy = pd->subsampling_y;
+ const int top_w = (4 << b_width_log2_lookup[top_bsize]) >> ssx;
+ const int top_h = (4 << b_height_log2_lookup[top_bsize]) >> ssy;
+ const int w = (4 << b_width_log2_lookup[bsize]) >> ssx;
+ const int h = (4 << b_height_log2_lookup[bsize]) >> ssy;
+ const int w_offset = ((mi_col - mi_col_ori) * MI_SIZE) >> ssx;
+ const int h_offset = ((mi_row - mi_row_ori) * MI_SIZE) >> ssy;
+
+ int w_remain, h_remain;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int is_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ assert(bsize <= BLOCK_32X32);
+ assert(IMPLIES(plane == 0, ssx == 0));
+ assert(IMPLIES(plane == 0, ssy == 0));
+
+ switch (partition) {
+ case PARTITION_HORZ: {
+ const uint8_t *const mask = get_supertx_mask(h, ssy);
+
+ w_remain = top_w;
+ h_remain = top_h - h_offset - h;
+ dst += h_offset * dst_stride;
+ pre += h_offset * pre_stride;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (is_hdb)
+ vpx_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre,
+ pre_stride, mask, h, top_w, xd->bd);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vpx_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre, pre_stride,
+ mask, h, top_w);
+
+ dst += h * dst_stride;
+ pre += h * pre_stride;
+ break;
+ }
+ case PARTITION_VERT: {
+ const uint8_t *const mask = get_supertx_mask(w, ssx);
+
+ w_remain = top_w - w_offset - w;
+ h_remain = top_h;
+ dst += w_offset;
+ pre += w_offset;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (is_hdb)
+ vpx_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre,
+ pre_stride, mask, top_h, w, xd->bd);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vpx_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre, pre_stride,
+ mask, top_h, w);
+
+ dst += w;
+ pre += w;
+ break;
+ }
+ default: {
+ assert(0);
+ return;
+ }
+ }
+
+ if (w_remain == 0 || h_remain == 0) {
+ return;
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (is_hdb) {
+ dst = (uint8_t *)CONVERT_TO_SHORTPTR(dst);
+ pre = (const uint8_t *)CONVERT_TO_SHORTPTR(pre);
+ dst_stride *= 2;
+ pre_stride *= 2;
+ w_remain *= 2;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ do {
+ memcpy(dst, pre, w_remain * sizeof(uint8_t));
+ dst += dst_stride;
+ pre += pre_stride;
+ } while (--h_remain);
+}
+
+void vp10_build_inter_predictors_sb_sub8x8_extend(MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+ int mi_row_ori,
+ int mi_col_ori,
+#endif // CONFIG_EXT_INTER
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int block) {
+ // Prediction function used in supertx:
+ // Use the mv at current block (which is less than 8x8)
+ // to get prediction of a block located at (mi_row, mi_col) at size of bsize
+ // bsize can be larger than 8x8.
+ // block (0-3): the sub8x8 location of current block
+ int plane;
+ const int mi_x = mi_col * MI_SIZE;
+ const int mi_y = mi_row * MI_SIZE;
+#if CONFIG_EXT_INTER
+ const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
+ const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
+#endif // CONFIG_EXT_INTER
+
+ // For sub8x8 uv:
+ // Skip uv prediction in supertx except the first block (block = 0)
+ int max_plane = block ? 1 : MAX_MB_PLANE;
+
+ for (plane = 0; plane < max_plane; plane++) {
+ const BLOCK_SIZE plane_bsize =
+ get_plane_block_size(bsize, &xd->plane[plane]);
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ const int bw = 4 * num_4x4_w;
+ const int bh = 4 * num_4x4_h;
+
+ build_inter_predictors(xd, plane,
+#if CONFIG_OBMC
+ 0, 0,
+#endif // CONFIG_OBMC
+ block, bw, bh, 0, 0, bw, bh,
+#if CONFIG_EXT_INTER
+ wedge_offset_x, wedge_offset_y,
+#endif // CONFIG_EXT_INTER
+ mi_x, mi_y);
+ }
+#if CONFIG_EXT_INTER
+ if (is_interintra_pred(&xd->mi[0]->mbmi))
+ vp10_build_interintra_predictors(
+ xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf,
+ xd->plane[0].dst.stride, xd->plane[1].dst.stride,
+ xd->plane[2].dst.stride, bsize);
+#endif // CONFIG_EXT_INTER
+}
+
+void vp10_build_inter_predictors_sb_extend(MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+ int mi_row_ori, int mi_col_ori,
+#endif // CONFIG_EXT_INTER
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ int plane;
+ const int mi_x = mi_col * MI_SIZE;
+ const int mi_y = mi_row * MI_SIZE;
+#if CONFIG_EXT_INTER
+ const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
+ const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
+#endif // CONFIG_EXT_INTER
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const BLOCK_SIZE plane_bsize =
+ get_plane_block_size(bsize, &xd->plane[plane]);
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ const int bw = 4 * num_4x4_w;
+ const int bh = 4 * num_4x4_h;
+
+ if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+ int x, y;
+ assert(bsize == BLOCK_8X8);
+ for (y = 0; y < num_4x4_h; ++y)
+ for (x = 0; x < num_4x4_w; ++x)
+ build_inter_predictors(xd, plane,
+#if CONFIG_OBMC
+ 0, 0,
+#endif // CONFIG_OBMC
+ y * 2 + x, bw, bh, 4 * x, 4 * y, 4, 4,
+#if CONFIG_EXT_INTER
+ wedge_offset_x, wedge_offset_y,
+#endif // CONFIG_EXT_INTER
+ mi_x, mi_y);
+ } else {
+ build_inter_predictors(xd, plane,
+#if CONFIG_OBMC
+ 0, 0,
+#endif // CONFIG_OBMC
+ 0, bw, bh, 0, 0, bw, bh,
+#if CONFIG_EXT_INTER
+ wedge_offset_x, wedge_offset_y,
+#endif // CONFIG_EXT_INTER
+ mi_x, mi_y);
+ }
+ }
+}
+#endif // CONFIG_SUPERTX
+
+#if CONFIG_OBMC
+// obmc_mask_N[overlap_position]
+static const uint8_t obmc_mask_1[1] = { 55 };
+
+static const uint8_t obmc_mask_2[2] = { 45, 62 };
+
+static const uint8_t obmc_mask_4[4] = { 39, 50, 59, 64 };
+
+static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 63, 64 };
+
+static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
+ 56, 58, 60, 61, 63, 64, 64, 64 };
+
+static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
+ 45, 47, 48, 50, 51, 52, 53, 55,
+ 56, 57, 58, 59, 60, 60, 61, 62,
+ 62, 63, 63, 64, 64, 64, 64, 64 };
+
+#if CONFIG_EXT_PARTITION
+static const uint8_t obmc_mask_64[64] = {
+ 33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
+ 45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
+ 56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
+ 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+};
+#endif // CONFIG_EXT_PARTITION
+
+const uint8_t *vp10_get_obmc_mask(int length) {
+ switch (length) {
+ case 1: return obmc_mask_1;
+ case 2: return obmc_mask_2;
+ case 4: return obmc_mask_4;
+ case 8: return obmc_mask_8;
+ case 16: return obmc_mask_16;
+ case 32: return obmc_mask_32;
+#if CONFIG_EXT_PARTITION
+ case 64: return obmc_mask_64;
+#endif // CONFIG_EXT_PARTITION
+ default: assert(0); return NULL;
+ }
+}
+
+// This function combines motion compensated predictions that is generated by
+// top/left neighboring blocks' inter predictors with the regular inter
+// prediction. We assume the original prediction (bmc) is stored in
+// xd->plane[].dst.buf
+void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *above[MAX_MB_PLANE],
+ int above_stride[MAX_MB_PLANE],
+ uint8_t *left[MAX_MB_PLANE],
+ int left_stride[MAX_MB_PLANE]) {
+ const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ int plane, i;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // handle above row
+ if (xd->up_available) {
+ const int overlap = num_4x4_blocks_high_lookup[bsize] * 2;
+ const int miw = VPXMIN(xd->n8_w, cm->mi_cols - mi_col);
+ const int mi_row_offset = -1;
+
+ assert(miw > 0);
+
+ i = 0;
+ do { // for each mi in the above row
+ const int mi_col_offset = i;
+ const MB_MODE_INFO *const above_mbmi =
+ &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
+ const int mi_step =
+ VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
+
+ if (is_neighbor_overlappable(above_mbmi)) {
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *pd = &xd->plane[plane];
+ const int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
+ const int bh = overlap >> pd->subsampling_y;
+ const int dst_stride = pd->dst.stride;
+ uint8_t *const dst = &pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x];
+ const int tmp_stride = above_stride[plane];
+ const uint8_t *const tmp =
+ &above[plane][(i * MI_SIZE) >> pd->subsampling_x];
+ const uint8_t *const mask = vp10_get_obmc_mask(bh);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (is_hbd)
+ vpx_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
+ tmp_stride, mask, bh, bw, xd->bd);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vpx_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
+ tmp_stride, mask, bh, bw);
+ }
+ }
+ i += mi_step;
+ } while (i < miw);
+ }
+
+ // handle left column
+ if (xd->left_available) {
+ const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2;
+ const int mih = VPXMIN(xd->n8_h, cm->mi_rows - mi_row);
+ const int mi_col_offset = -1;
+
+ assert(mih > 0);
+
+ i = 0;
+ do { // for each mi in the left column
+ const int mi_row_offset = i;
+ const MB_MODE_INFO *const left_mbmi =
+ &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
+ const int mi_step =
+ VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
+
+ if (is_neighbor_overlappable(left_mbmi)) {
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *pd = &xd->plane[plane];
+ const int bw = overlap >> pd->subsampling_x;
+ const int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
+ const int dst_stride = pd->dst.stride;
+ uint8_t *const dst =
+ &pd->dst.buf[(i * MI_SIZE * dst_stride) >> pd->subsampling_y];
+ const int tmp_stride = left_stride[plane];
+ const uint8_t *const tmp =
+ &left[plane][(i * MI_SIZE * tmp_stride) >> pd->subsampling_y];
+ const uint8_t *const mask = vp10_get_obmc_mask(bw);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (is_hbd)
+ vpx_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
+ tmp_stride, mask, bh, bw, xd->bd);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vpx_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
+ tmp_stride, mask, bh, bw);
+ }
+ }
+ i += mi_step;
+ } while (i < mih);
+ }
+}
+
+#if CONFIG_EXT_INTER
+void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
+ if (is_interintra_pred(mbmi)) {
+ mbmi->ref_frame[1] = NONE;
+ } else if (has_second_ref(mbmi) && is_interinter_wedge_used(mbmi->sb_type) &&
+ mbmi->use_wedge_interinter) {
+ mbmi->use_wedge_interinter = 0;
+ mbmi->ref_frame[1] = NONE;
+ }
+ return;
+}
+#endif // CONFIG_EXT_INTER
+
+void vp10_build_prediction_by_above_preds(VP10_COMMON *cm, MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_width[MAX_MB_PLANE],
+ int tmp_height[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]) {
+ const TileInfo *const tile = &xd->tile;
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ int i, j, mi_step, ref;
+
+ if (mi_row <= tile->mi_row_start) return;
+
+ for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
+ int mi_row_offset = -1;
+ int mi_col_offset = i;
+ int mi_x, mi_y, bw, bh;
+ MODE_INFO *above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+ MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
+#if CONFIG_EXT_INTER
+ MB_MODE_INFO backup_mbmi;
+#endif // CONFIG_EXT_INTER
+
+ mi_step = VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
+
+ if (!is_neighbor_overlappable(above_mbmi)) continue;
+
+#if CONFIG_EXT_INTER
+ backup_mbmi = *above_mbmi;
+ modify_neighbor_predictor_for_obmc(above_mbmi);
+#endif // CONFIG_EXT_INTER
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ struct macroblockd_plane *const pd = &xd->plane[j];
+ setup_pred_plane(&pd->dst, tmp_buf[j], tmp_width[j], tmp_height[j],
+ tmp_stride[j], 0, i, NULL, pd->subsampling_x,
+ pd->subsampling_y);
+ }
+ for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
+ MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
+ RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+
+ xd->block_refs[ref] = ref_buf;
+ if ((!vp10_is_valid_scale(&ref_buf->sf)))
+ vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+ "Reference frame has invalid dimensions");
+ vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i,
+ &ref_buf->sf);
+ }
+
+ xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
+ mi_x = (mi_col + i) << MI_SIZE_LOG2;
+ mi_y = mi_row << MI_SIZE_LOG2;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ const struct macroblockd_plane *pd = &xd->plane[j];
+ bw = (mi_step * 8) >> pd->subsampling_x;
+ bh = VPXMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y,
+ 4);
+
+ if (above_mbmi->sb_type < BLOCK_8X8) {
+ const PARTITION_TYPE bp = BLOCK_8X8 - above_mbmi->sb_type;
+ const int have_vsplit = bp != PARTITION_HORZ;
+ const int have_hsplit = bp != PARTITION_VERT;
+ const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+ const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+ const int pw = 8 >> (have_vsplit | pd->subsampling_x);
+ int x, y;
+
+ for (y = 0; y < num_4x4_h; ++y)
+ for (x = 0; x < num_4x4_w; ++x) {
+ if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT) && y == 0 &&
+ !pd->subsampling_y)
+ continue;
+
+ build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
+ y * 2 + x, bw, bh, 4 * x, 0, pw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+ 0, 0,
+#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
+ mi_x, mi_y);
+ }
+ } else {
+ build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, bw, bh,
+ 0, 0, bw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+ 0, 0,
+#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
+ mi_x, mi_y);
+ }
+ }
+#if CONFIG_EXT_INTER
+ *above_mbmi = backup_mbmi;
+#endif // CONFIG_EXT_INTER
+ }
+ xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
+}
+
+void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_width[MAX_MB_PLANE],
+ int tmp_height[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]) {
+ const TileInfo *const tile = &xd->tile;
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ int i, j, mi_step, ref;
+
+ if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start)) return;
+
+ for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
+ int mi_row_offset = i;
+ int mi_col_offset = -1;
+ int mi_x, mi_y, bw, bh;
+ MODE_INFO *left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+ MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
+#if CONFIG_EXT_INTER
+ MB_MODE_INFO backup_mbmi;
+#endif // CONFIG_EXT_INTER
+
+ mi_step = VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
+
+ if (!is_neighbor_overlappable(left_mbmi)) continue;
+
+#if CONFIG_EXT_INTER
+ backup_mbmi = *left_mbmi;
+ modify_neighbor_predictor_for_obmc(left_mbmi);
+#endif // CONFIG_EXT_INTER
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ struct macroblockd_plane *const pd = &xd->plane[j];
+ setup_pred_plane(&pd->dst, tmp_buf[j], tmp_width[j], tmp_height[j],
+ tmp_stride[j], i, 0, NULL, pd->subsampling_x,
+ pd->subsampling_y);
+ }
+ for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) {
+ MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
+ RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+
+ xd->block_refs[ref] = ref_buf;
+ if ((!vp10_is_valid_scale(&ref_buf->sf)))
+ vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+ "Reference frame has invalid dimensions");
+ vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col,
+ &ref_buf->sf);
+ }
+
+ xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
+ mi_x = mi_col << MI_SIZE_LOG2;
+ mi_y = (mi_row + i) << MI_SIZE_LOG2;
+
+ for (j = 0; j < MAX_MB_PLANE; ++j) {
+ const struct macroblockd_plane *pd = &xd->plane[j];
+ bw = VPXMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x,
+ 4);
+ bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
+
+ if (left_mbmi->sb_type < BLOCK_8X8) {
+ const PARTITION_TYPE bp = BLOCK_8X8 - left_mbmi->sb_type;
+ const int have_vsplit = bp != PARTITION_HORZ;
+ const int have_hsplit = bp != PARTITION_VERT;
+ const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+ const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+ const int ph = 8 >> (have_hsplit | pd->subsampling_y);
+ int x, y;
+
+ for (y = 0; y < num_4x4_h; ++y)
+ for (x = 0; x < num_4x4_w; ++x) {
+ if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT) && x == 0 &&
+ !pd->subsampling_x)
+ continue;
+
+ build_inter_predictors(xd, j, mi_col_offset, mi_row_offset,
+ y * 2 + x, bw, bh, 0, 4 * y, bw, ph,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+ 0, 0,
+#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
+ mi_x, mi_y);
+ }
+ } else {
+ build_inter_predictors(xd, j, mi_col_offset, mi_row_offset, 0, bw, bh,
+ 0, 0, bw, bh,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+ 0, 0,
+#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
+ mi_x, mi_y);
+ }
+ }
+#if CONFIG_EXT_INTER
+ *left_mbmi = backup_mbmi;
+#endif // CONFIG_EXT_INTER
+ }
+ xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
+}
+#endif // CONFIG_OBMC
+
+#if CONFIG_EXT_INTER
+#if CONFIG_EXT_PARTITION
+static const int ii_weights1d[MAX_SB_SIZE] = {
+ 102, 100, 97, 95, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 73, 71, 69, 68, 67,
+ 65, 64, 62, 61, 60, 59, 58, 57, 55, 54, 53, 52, 52, 51, 50, 49, 48, 47, 47,
+ 46, 45, 45, 44, 43, 43, 42, 41, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 36,
+ 36, 36, 35, 35, 35, 34, 34, 34, 33, 33, 33, 33, 32, 32, 32, 32, 32, 31, 31,
+ 31, 31, 31, 30, 30, 30, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 29, 29, 28,
+ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 27, 27, 27, 27,
+ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+};
+static int ii_size_scales[BLOCK_SIZES] = { 32, 16, 16, 16, 8, 8, 8, 4,
+ 4, 4, 2, 2, 2, 1, 1, 1 };
+#else
+static const int ii_weights1d[MAX_SB_SIZE] = {
+ 102, 100, 97, 95, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 73, 71,
+ 69, 68, 67, 65, 64, 62, 61, 60, 59, 58, 57, 55, 54, 53, 52, 52,
+ 51, 50, 49, 48, 47, 47, 46, 45, 45, 44, 43, 43, 42, 41, 41, 40,
+ 40, 39, 39, 38, 38, 38, 37, 37, 36, 36, 36, 35, 35, 35, 34, 34,
+};
+static int ii_size_scales[BLOCK_SIZES] = { 16, 8, 8, 8, 4, 4, 4,
+ 2, 2, 2, 1, 1, 1 };
+#endif // CONFIG_EXT_PARTITION
+
+static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra,
+ int wedge_index, int wedge_sign,
+ BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
+ uint8_t *comppred, int compstride,
+ const uint8_t *interpred, int interstride,
+ const uint8_t *intrapred, int intrastride) {
+ const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
+ const int size_scale = ii_size_scales[plane_bsize];
+ int i, j;
+
+ if (use_wedge_interintra) {
+ if (is_interintra_wedge_used(bsize)) {
+ const uint8_t *mask =
+ vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
+ const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
+ const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
+ vpx_blend_a64_mask(
+ comppred, compstride, intrapred, intrastride, interpred, interstride,
+ mask, 4 * num_4x4_blocks_wide_lookup[bsize], bh, bw, subh, subw);
+ }
+ return;
+ }
+
+ switch (mode) {
+ case II_V_PRED:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ int scale = ii_weights1d[i * size_scale];
+ comppred[i * compstride + j] =
+ VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+ interpred[i * interstride + j]);
+ }
+ }
+ break;
+
+ case II_H_PRED:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ int scale = ii_weights1d[j * size_scale];
+ comppred[i * compstride + j] =
+ VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+ interpred[i * interstride + j]);
+ }
+ }
+ break;
+
+ case II_D63_PRED:
+ case II_D117_PRED:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ int scale = (ii_weights1d[i * size_scale] * 3 +
+ ii_weights1d[j * size_scale]) >>
+ 2;
+ comppred[i * compstride + j] =
+ VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+ interpred[i * interstride + j]);
+ }
+ }
+ break;
+
+ case II_D207_PRED:
+ case II_D153_PRED:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ int scale = (ii_weights1d[j * size_scale] * 3 +
+ ii_weights1d[i * size_scale]) >>
+ 2;
+ comppred[i * compstride + j] =
+ VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+ interpred[i * interstride + j]);
+ }
+ }
+ break;
+
+ case II_D135_PRED:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ int scale = ii_weights1d[(i < j ? i : j) * size_scale];
+ comppred[i * compstride + j] =
+ VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+ interpred[i * interstride + j]);
+ }
+ }
+ break;
+
+ case II_D45_PRED:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ int scale =
+ (ii_weights1d[i * size_scale] + ii_weights1d[j * size_scale]) >>
+ 1;
+ comppred[i * compstride + j] =
+ VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+ interpred[i * interstride + j]);
+ }
+ }
+ break;
+
+ case II_TM_PRED:
+ case II_DC_PRED:
+ default:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ comppred[i * compstride + j] = VPX_BLEND_AVG(
+ intrapred[i * intrastride + j], interpred[i * interstride + j]);
+ }
+ }
+ break;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void combine_interintra_highbd(
+ INTERINTRA_MODE mode, int use_wedge_interintra, int wedge_index,
+ int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
+ uint8_t *comppred8, int compstride, const uint8_t *interpred8,
+ int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
+ const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
+ const int size_scale = ii_size_scales[plane_bsize];
+ int i, j;
+
+ uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8);
+ const uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8);
+ const uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8);
+
+ if (use_wedge_interintra) {
+ if (is_interintra_wedge_used(bsize)) {
+ const uint8_t *mask =
+ vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
+ const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
+ const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
+ vpx_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
+ interpred8, interstride, mask, bw, bh, bw, subh,
+ subw, bd);
+ }
+ return;
+ }
+
+ switch (mode) {
+ case II_V_PRED:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ int scale = ii_weights1d[i * size_scale];
+ comppred[i * compstride + j] =
+ VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+ interpred[i * interstride + j]);
+ }
+ }
+ break;
+
+ case II_H_PRED:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ int scale = ii_weights1d[j * size_scale];
+ comppred[i * compstride + j] =
+ VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+ interpred[i * interstride + j]);
+ }
+ }
+ break;
+
+ case II_D63_PRED:
+ case II_D117_PRED:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ int scale = (ii_weights1d[i * size_scale] * 3 +
+ ii_weights1d[j * size_scale]) >>
+ 2;
+ comppred[i * compstride + j] =
+ VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+ interpred[i * interstride + j]);
+ }
+ }
+ break;
+
+ case II_D207_PRED:
+ case II_D153_PRED:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ int scale = (ii_weights1d[j * size_scale] * 3 +
+ ii_weights1d[i * size_scale]) >>
+ 2;
+ comppred[i * compstride + j] =
+ VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+ interpred[i * interstride + j]);
+ }
+ }
+ break;
+
+ case II_D135_PRED:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ int scale = ii_weights1d[(i < j ? i : j) * size_scale];
+ comppred[i * compstride + j] =
+ VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+ interpred[i * interstride + j]);
+ }
+ }
+ break;
+
+ case II_D45_PRED:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ int scale =
+ (ii_weights1d[i * size_scale] + ii_weights1d[j * size_scale]) >>
+ 1;
+ comppred[i * compstride + j] =
+ VPX_BLEND_A256(scale, intrapred[i * intrastride + j],
+ interpred[i * interstride + j]);
+ }
+ }
+ break;
+
+ case II_TM_PRED:
+ case II_DC_PRED:
+ default:
+ for (i = 0; i < bh; ++i) {
+ for (j = 0; j < bw; ++j) {
+ comppred[i * compstride + j] = VPX_BLEND_AVG(
+ interpred[i * interstride + j], intrapred[i * intrastride + j]);
+ }
+ }
+ break;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+// Break down rectangular intra prediction for joint spatio-temporal prediction
+// into two square intra predictions.
+static void build_intra_predictors_for_interintra(MACROBLOCKD *xd, uint8_t *ref,
+ int ref_stride, uint8_t *dst,
+ int dst_stride,
+ PREDICTION_MODE mode,
+ BLOCK_SIZE bsize, int plane) {
+ BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
+ const int bwl = b_width_log2_lookup[plane_bsize];
+ const int bhl = b_height_log2_lookup[plane_bsize];
+ const int pxbw = 4 << bwl;
+ const int pxbh = 4 << bhl;
+ TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+
+ if (bwl == bhl) {
+ vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride,
+ dst, dst_stride, 0, 0, plane);
+
+ } else if (bwl < bhl) {
+ uint8_t *src_2 = ref + pxbw * ref_stride;
+ uint8_t *dst_2 = dst + pxbw * dst_stride;
+ vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride,
+ dst, dst_stride, 0, 0, plane);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2);
+ uint16_t *dst_216 = CONVERT_TO_SHORTPTR(dst_2);
+ memcpy(src_216 - ref_stride, dst_216 - dst_stride,
+ sizeof(*src_216) * pxbw);
+ } else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ {
+ memcpy(src_2 - ref_stride, dst_2 - dst_stride, sizeof(*src_2) * pxbw);
+ }
+ vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, src_2, ref_stride,
+ dst_2, dst_stride, 0, 1 << bwl, plane);
+ } else { // bwl > bhl
+ int i;
+ uint8_t *src_2 = ref + pxbh;
+ uint8_t *dst_2 = dst + pxbh;
+ vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, ref, ref_stride,
+ dst, dst_stride, 0, 0, plane);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2);
+ uint16_t *dst_216 = CONVERT_TO_SHORTPTR(dst_2);
+ for (i = 0; i < pxbh; ++i)
+ src_216[i * ref_stride - 1] = dst_216[i * dst_stride - 1];
+ } else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ {
+ for (i = 0; i < pxbh; ++i)
+ src_2[i * ref_stride - 1] = dst_2[i * dst_stride - 1];
+ }
+ vp10_predict_intra_block(xd, bwl, bhl, max_tx_size, mode, src_2, ref_stride,
+ dst_2, dst_stride, 1 << bhl, 0, plane);
+ }
+}
+
+// Mapping of interintra to intra mode for use in the intra component
+static const int interintra_to_intra_mode[INTERINTRA_MODES] = {
+ DC_PRED, V_PRED, H_PRED, D45_PRED, D135_PRED,
+ D117_PRED, D153_PRED, D207_PRED, D63_PRED, TM_PRED
+};
+
+void vp10_build_intra_predictors_for_interintra(MACROBLOCKD *xd,
+ BLOCK_SIZE bsize, int plane,
+ uint8_t *dst, int dst_stride) {
+ build_intra_predictors_for_interintra(
+ xd, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, dst,
+ dst_stride, interintra_to_intra_mode[xd->mi[0]->mbmi.interintra_mode],
+ bsize, plane);
+}
+
+void vp10_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
+ const uint8_t *inter_pred, int inter_stride,
+ const uint8_t *intra_pred, int intra_stride) {
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ combine_interintra_highbd(
+ xd->mi[0]->mbmi.interintra_mode, xd->mi[0]->mbmi.use_wedge_interintra,
+ xd->mi[0]->mbmi.interintra_wedge_index,
+ xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize,
+ xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, inter_pred,
+ inter_stride, intra_pred, intra_stride, xd->bd);
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ combine_interintra(xd->mi[0]->mbmi.interintra_mode,
+ xd->mi[0]->mbmi.use_wedge_interintra,
+ xd->mi[0]->mbmi.interintra_wedge_index,
+ xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize,
+ xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
+ inter_pred, inter_stride, intra_pred, intra_stride);
+}
+
+void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred,
+ int ystride, BLOCK_SIZE bsize) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
+ vp10_build_intra_predictors_for_interintra(
+ xd, bsize, 0, CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
+ vp10_combine_interintra(xd, bsize, 0, ypred, ystride,
+ CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ {
+ DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
+ vp10_build_intra_predictors_for_interintra(xd, bsize, 0, intrapredictor,
+ MAX_SB_SIZE);
+ vp10_combine_interintra(xd, bsize, 0, ypred, ystride, intrapredictor,
+ MAX_SB_SIZE);
+ }
+}
+
+void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred,
+ int ustride, int plane,
+ BLOCK_SIZE bsize) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ DECLARE_ALIGNED(16, uint16_t, uintrapredictor[MAX_SB_SQUARE]);
+ vp10_build_intra_predictors_for_interintra(
+ xd, bsize, plane, CONVERT_TO_BYTEPTR(uintrapredictor), MAX_SB_SIZE);
+ vp10_combine_interintra(xd, bsize, plane, upred, ustride,
+ CONVERT_TO_BYTEPTR(uintrapredictor), MAX_SB_SIZE);
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ {
+ DECLARE_ALIGNED(16, uint8_t, uintrapredictor[MAX_SB_SQUARE]);
+ vp10_build_intra_predictors_for_interintra(xd, bsize, plane,
+ uintrapredictor, MAX_SB_SIZE);
+ vp10_combine_interintra(xd, bsize, plane, upred, ustride, uintrapredictor,
+ MAX_SB_SIZE);
+ }
+}
+
+void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred,
+ uint8_t *vpred, int ustride,
+ int vstride, BLOCK_SIZE bsize) {
+ vp10_build_interintra_predictors_sbc(xd, upred, ustride, 1, bsize);
+ vp10_build_interintra_predictors_sbc(xd, vpred, vstride, 2, bsize);
+}
+
+void vp10_build_interintra_predictors(MACROBLOCKD *xd, uint8_t *ypred,
+ uint8_t *upred, uint8_t *vpred,
+ int ystride, int ustride, int vstride,
+ BLOCK_SIZE bsize) {
+ vp10_build_interintra_predictors_sby(xd, ypred, ystride, bsize);
+ vp10_build_interintra_predictors_sbuv(xd, upred, vpred, ustride, vstride,
+ bsize);
+}
+
+// Builds the inter-predictor for the single ref case
+// for use in the encoder to search the wedges efficiently.
+static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane,
+ int block, int bw, int bh, int x,
+ int y, int w, int h, int mi_x,
+ int mi_y, int ref,
+ uint8_t *const ext_dst,
+ int ext_dst_stride) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ const MODE_INFO *mi = xd->mi[0];
+
+ const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+ struct buf_2d *const pre_buf = &pd->pre[ref];
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *const dst =
+ (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? CONVERT_TO_BYTEPTR(ext_dst)
+ : ext_dst) +
+ ext_dst_stride * y + x;
+#else
+ uint8_t *const dst = ext_dst + ext_dst_stride * y + x;
+#endif
+ const MV mv = mi->mbmi.sb_type < BLOCK_8X8
+ ? average_split_mvs(pd, mi, ref, block)
+ : mi->mbmi.mv[ref].as_mv;
+
+ // TODO(jkoleszar): This clamping is done in the incorrect place for the
+ // scaling case. It needs to be done on the scaled MV, not the pre-scaling
+ // MV. Note however that it performs the subsampling aware scaling so
+ // that the result is always q4.
+ // mv_precision precision is MV_PRECISION_Q4.
+ const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, pd->subsampling_x,
+ pd->subsampling_y);
+
+ uint8_t *pre;
+ MV32 scaled_mv;
+ int xs, ys, subpel_x, subpel_y;
+ const int is_scaled = vp10_is_scaled(sf);
+
+ if (is_scaled) {
+ pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
+ scaled_mv = vp10_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+ xs = sf->x_step_q4;
+ ys = sf->y_step_q4;
+ } else {
+ pre = pre_buf->buf + (y * pre_buf->stride + x);
+ scaled_mv.row = mv_q4.row;
+ scaled_mv.col = mv_q4.col;
+ xs = ys = 16;
+ }
+
+ subpel_x = scaled_mv.col & SUBPEL_MASK;
+ subpel_y = scaled_mv.row & SUBPEL_MASK;
+ pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride +
+ (scaled_mv.col >> SUBPEL_BITS);
+
+ vp10_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride, subpel_x,
+ subpel_y, sf, w, h, 0, mi->mbmi.interp_filter, xs,
+ ys, xd);
+}
+
+void vp10_build_inter_predictors_for_planes_single_buf(
+ MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row,
+ int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3]) {
+ int plane;
+ const int mi_x = mi_col * MI_SIZE;
+ const int mi_y = mi_row * MI_SIZE;
+ for (plane = plane_from; plane <= plane_to; ++plane) {
+ const BLOCK_SIZE plane_bsize =
+ get_plane_block_size(bsize, &xd->plane[plane]);
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ const int bw = 4 * num_4x4_w;
+ const int bh = 4 * num_4x4_h;
+
+ if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+ int x, y;
+ assert(bsize == BLOCK_8X8);
+ for (y = 0; y < num_4x4_h; ++y)
+ for (x = 0; x < num_4x4_w; ++x)
+ build_inter_predictors_single_buf(
+ xd, plane, y * 2 + x, bw, bh, 4 * x, 4 * y, 4, 4, mi_x, mi_y, ref,
+ ext_dst[plane], ext_dst_stride[plane]);
+ } else {
+ build_inter_predictors_single_buf(xd, plane, 0, bw, bh, 0, 0, bw, bh,
+ mi_x, mi_y, ref, ext_dst[plane],
+ ext_dst_stride[plane]);
+ }
+ }
+}
+
+static void build_wedge_inter_predictor_from_buf(
+ MACROBLOCKD *xd, int plane, int x, int y, int w, int h, uint8_t *ext_dst0,
+ int ext_dst_stride0, uint8_t *ext_dst1, int ext_dst_stride1) {
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const int is_compound = has_second_ref(mbmi);
+ MACROBLOCKD_PLANE *const pd = &xd->plane[plane];
+ struct buf_2d *const dst_buf = &pd->dst;
+ uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
+
+ if (is_compound && is_interinter_wedge_used(mbmi->sb_type) &&
+ mbmi->use_wedge_interinter) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ build_masked_compound_wedge_highbd(
+ dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
+ CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1,
+ mbmi->interinter_wedge_index, mbmi->interinter_wedge_sign,
+ mbmi->sb_type, h, w, xd->bd);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ build_masked_compound_wedge(
+ dst, dst_buf->stride, ext_dst0, ext_dst_stride0, ext_dst1,
+ ext_dst_stride1, mbmi->interinter_wedge_index,
+ mbmi->interinter_wedge_sign, mbmi->sb_type, h, w);
+ } else {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ vpx_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
+ dst, dst_buf->stride, NULL, 0, NULL, 0, w, h,
+ xd->bd);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vpx_convolve_copy(ext_dst0, ext_dst_stride0, dst, dst_buf->stride, NULL,
+ 0, NULL, 0, w, h);
+ }
+}
+
+void vp10_build_wedge_inter_predictor_from_buf(
+ MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to,
+ uint8_t *ext_dst0[3], int ext_dst_stride0[3], uint8_t *ext_dst1[3],
+ int ext_dst_stride1[3]) {
+ int plane;
+ for (plane = plane_from; plane <= plane_to; ++plane) {
+ const BLOCK_SIZE plane_bsize =
+ get_plane_block_size(bsize, &xd->plane[plane]);
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+
+ if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
+ int x, y;
+ assert(bsize == BLOCK_8X8);
+ for (y = 0; y < num_4x4_h; ++y)
+ for (x = 0; x < num_4x4_w; ++x)
+ build_wedge_inter_predictor_from_buf(
+ xd, plane, 4 * x, 4 * y, 4, 4, ext_dst0[plane],
+ ext_dst_stride0[plane], ext_dst1[plane], ext_dst_stride1[plane]);
+ } else {
+ const int bw = 4 * num_4x4_w;
+ const int bh = 4 * num_4x4_h;
+ build_wedge_inter_predictor_from_buf(
+ xd, plane, 0, 0, bw, bh, ext_dst0[plane], ext_dst_stride0[plane],
+ ext_dst1[plane], ext_dst_stride1[plane]);
+ }
+ }
+}
+#endif // CONFIG_EXT_INTER
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
new file mode 100644
index 0000000..092926d
--- /dev/null
+++ b/av1/common/reconinter.h
@@ -0,0 +1,596 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_RECONINTER_H_
+#define VP10_COMMON_RECONINTER_H_
+
+#include "av1/common/filter.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/vp10_convolve.h"
+#include "aom/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static INLINE void inter_predictor(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int subpel_x, const int subpel_y,
+ const struct scale_factors *sf, int w, int h,
+ int ref_idx,
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER *interp_filter,
+#else
+ const INTERP_FILTER interp_filter,
+#endif
+ int xs, int ys) {
+#if CONFIG_DUAL_FILTER
+ InterpFilterParams interp_filter_params_x =
+ vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
+ InterpFilterParams interp_filter_params_y =
+ vp10_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
+#else
+ InterpFilterParams interp_filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+#endif
+
+#if CONFIG_DUAL_FILTER
+ if (interp_filter_params_x.taps == SUBPEL_TAPS &&
+ interp_filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2) {
+ const int16_t *kernel_x =
+ vp10_get_interp_filter_subpel_kernel(interp_filter_params_x, subpel_x);
+ const int16_t *kernel_y =
+ vp10_get_interp_filter_subpel_kernel(interp_filter_params_y, subpel_y);
+#else
+ if (interp_filter_params.taps == SUBPEL_TAPS) {
+ const int16_t *kernel_x =
+ vp10_get_interp_filter_subpel_kernel(interp_filter_params, subpel_x);
+ const int16_t *kernel_y =
+ vp10_get_interp_filter_subpel_kernel(interp_filter_params, subpel_y);
+#endif
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ if (IsInterpolatingFilter(interp_filter)) {
+ // Interpolating filter
+ sf->predict[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h);
+ } else {
+ sf->predict_ni[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h);
+ }
+#else
+ sf->predict[subpel_x != 0][subpel_y != 0][ref_idx](
+ src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h);
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ } else {
+ // ref_idx > 0 means this is the second reference frame
+ // first reference frame's prediction result is already in dst
+ // therefore we need to average the first and second results
+ vp10_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
+ subpel_x, xs, subpel_y, ys, ref_idx);
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ const int subpel_x,
+ const int subpel_y,
+ const struct scale_factors *sf, int w,
+ int h, int ref,
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER *interp_filter,
+#else
+ const INTERP_FILTER interp_filter,
+#endif
+ int xs, int ys, int bd) {
+#if CONFIG_DUAL_FILTER
+ InterpFilterParams interp_filter_params_x =
+ vp10_get_interp_filter_params(interp_filter[1 + 2 * ref]);
+ InterpFilterParams interp_filter_params_y =
+ vp10_get_interp_filter_params(interp_filter[0 + 2 * ref]);
+#else
+ InterpFilterParams interp_filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+#endif
+
+#if CONFIG_DUAL_FILTER
+ if (interp_filter_params_x.taps == SUBPEL_TAPS &&
+ interp_filter_params_y.taps == SUBPEL_TAPS && w > 2 && h > 2) {
+ const int16_t *kernel_x =
+ vp10_get_interp_filter_subpel_kernel(interp_filter_params_x, subpel_x);
+ const int16_t *kernel_y =
+ vp10_get_interp_filter_subpel_kernel(interp_filter_params_y, subpel_y);
+#else
+ if (interp_filter_params.taps == SUBPEL_TAPS) {
+ const int16_t *kernel_x =
+ vp10_get_interp_filter_subpel_kernel(interp_filter_params, subpel_x);
+ const int16_t *kernel_y =
+ vp10_get_interp_filter_subpel_kernel(interp_filter_params, subpel_y);
+#endif // CONFIG_DUAL_FILTER
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ if (IsInterpolatingFilter(interp_filter)) {
+ // Interpolating filter
+ sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h,
+ bd);
+ } else {
+ sf->highbd_predict_ni[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h,
+ bd);
+ }
+#else
+ sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
+ src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h, bd);
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ } else {
+ // ref > 0 means this is the second reference frame
+ // first reference frame's prediction result is already in dst
+ // therefore we need to average the first and second results
+ int avg = ref > 0;
+ vp10_highbd_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
+ subpel_x, xs, subpel_y, ys, avg, bd);
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_EXT_INTER
+// Set to one to use larger codebooks
+#define USE_LARGE_WEDGE_CODEBOOK 0
+
+#if USE_LARGE_WEDGE_CODEBOOK
+#define MAX_WEDGE_TYPES (1 << 5)
+#else
+#define MAX_WEDGE_TYPES (1 << 4)
+#endif
+
+#define MAX_WEDGE_SIZE_LOG2 5 // 32x32
+#define MAX_WEDGE_SIZE (1 << MAX_WEDGE_SIZE_LOG2)
+#define MAX_WEDGE_SQUARE (MAX_WEDGE_SIZE * MAX_WEDGE_SIZE)
+
+#define WEDGE_WEIGHT_BITS 6
+
+#define WEDGE_NONE -1
+
+// Angles are with respect to horizontal anti-clockwise
+typedef enum {
+ WEDGE_HORIZONTAL = 0,
+ WEDGE_VERTICAL = 1,
+ WEDGE_OBLIQUE27 = 2,
+ WEDGE_OBLIQUE63 = 3,
+ WEDGE_OBLIQUE117 = 4,
+ WEDGE_OBLIQUE153 = 5,
+ WEDGE_DIRECTIONS
+} WedgeDirectionType;
+
+// 3-tuple: {direction, x_offset, y_offset}
+typedef struct {
+ WedgeDirectionType direction;
+ int x_offset;
+ int y_offset;
+} wedge_code_type;
+
+typedef uint8_t *wedge_masks_type[MAX_WEDGE_TYPES];
+
+typedef struct {
+ int bits;
+ const wedge_code_type *codebook;
+ uint8_t *signflip;
+ int smoother;
+ wedge_masks_type *masks;
+} wedge_params_type;
+
+extern const wedge_params_type wedge_params_lookup[BLOCK_SIZES];
+
+static INLINE int get_wedge_bits_lookup(BLOCK_SIZE sb_type) {
+ return wedge_params_lookup[sb_type].bits;
+}
+
+static INLINE int is_interinter_wedge_used(BLOCK_SIZE sb_type) {
+ (void)sb_type;
+ return wedge_params_lookup[sb_type].bits > 0;
+}
+
+static INLINE int get_interinter_wedge_bits(BLOCK_SIZE sb_type) {
+ const int wbits = wedge_params_lookup[sb_type].bits;
+ return (wbits > 0) ? wbits + 1 : 0;
+}
+
+static INLINE int is_interintra_wedge_used(BLOCK_SIZE sb_type) {
+ (void)sb_type;
+ return wedge_params_lookup[sb_type].bits > 0;
+}
+
+static INLINE int get_interintra_wedge_bits(BLOCK_SIZE sb_type) {
+ return wedge_params_lookup[sb_type].bits;
+}
+#endif // CONFIG_EXT_INTER
+
+void build_inter_predictors(MACROBLOCKD *xd, int plane,
+#if CONFIG_OBMC
+ int mi_col_offset, int mi_row_offset,
+#endif // CONFIG_OBMC
+ int block, int bw, int bh, int x, int y, int w,
+ int h,
+#if CONFIG_SUPERTX && CONFIG_EXT_INTER
+ int wedge_offset_x, int wedge_offset_y,
+#endif // CONFIG_SUPERTX && CONFIG_EXT_INTER
+ int mi_x, int mi_y);
+
+static INLINE void vp10_make_inter_predictor(
+ const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
+ const int subpel_x, const int subpel_y, const struct scale_factors *sf,
+ int w, int h, int ref,
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER *interp_filter,
+#else
+ const INTERP_FILTER interp_filter,
+#endif
+ int xs, int ys, const MACROBLOCKD *xd) {
+ (void)xd;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
+ sf, w, h, ref, interp_filter, xs, ys, xd->bd);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
+ h, ref, interp_filter, xs, ys);
+}
+
+#if CONFIG_EXT_INTER
+void vp10_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
+ uint8_t *dst, int dst_stride,
+ const int subpel_x, const int subpel_y,
+ const struct scale_factors *sf, int w,
+ int h,
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER *interp_filter,
+#else
+ const INTERP_FILTER interp_filter,
+#endif
+ int xs, int ys,
+#if CONFIG_SUPERTX
+ int wedge_offset_x, int wedge_offset_y,
+#endif // CONFIG_SUPERTX
+ const MACROBLOCKD *xd);
+#endif // CONFIG_EXT_INTER
+
+static INLINE int round_mv_comp_q4(int value) {
+ return (value < 0 ? value - 2 : value + 2) / 4;
+}
+
+static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) {
+ MV res = {
+ round_mv_comp_q4(
+ mi->bmi[0].as_mv[idx].as_mv.row + mi->bmi[1].as_mv[idx].as_mv.row +
+ mi->bmi[2].as_mv[idx].as_mv.row + mi->bmi[3].as_mv[idx].as_mv.row),
+ round_mv_comp_q4(
+ mi->bmi[0].as_mv[idx].as_mv.col + mi->bmi[1].as_mv[idx].as_mv.col +
+ mi->bmi[2].as_mv[idx].as_mv.col + mi->bmi[3].as_mv[idx].as_mv.col)
+ };
+ return res;
+}
+
+static INLINE int round_mv_comp_q2(int value) {
+ return (value < 0 ? value - 1 : value + 1) / 2;
+}
+
+static MV mi_mv_pred_q2(const MODE_INFO *mi, int idx, int block0, int block1) {
+ MV res = { round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.row +
+ mi->bmi[block1].as_mv[idx].as_mv.row),
+ round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.col +
+ mi->bmi[block1].as_mv[idx].as_mv.col) };
+ return res;
+}
+
+// TODO(jkoleszar): yet another mv clamping function :-(
+static INLINE MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd,
+ const MV *src_mv, int bw, int bh,
+ int ss_x, int ss_y) {
+ // If the MV points so far into the UMV border that no visible pixels
+ // are used for reconstruction, the subpel part of the MV can be
+ // discarded and the MV limited to 16 pixels with equivalent results.
+ const int spel_left = (VPX_INTERP_EXTEND + bw) << SUBPEL_BITS;
+ const int spel_right = spel_left - SUBPEL_SHIFTS;
+ const int spel_top = (VPX_INTERP_EXTEND + bh) << SUBPEL_BITS;
+ const int spel_bottom = spel_top - SUBPEL_SHIFTS;
+ MV clamped_mv = { src_mv->row * (1 << (1 - ss_y)),
+ src_mv->col * (1 << (1 - ss_x)) };
+ assert(ss_x <= 1);
+ assert(ss_y <= 1);
+
+ clamp_mv(&clamped_mv, xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left,
+ xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right,
+ xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top,
+ xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom);
+
+ return clamped_mv;
+}
+
+static INLINE MV average_split_mvs(const struct macroblockd_plane *pd,
+ const MODE_INFO *mi, int ref, int block) {
+ const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0);
+ MV res = { 0, 0 };
+ switch (ss_idx) {
+ case 0: res = mi->bmi[block].as_mv[ref].as_mv; break;
+ case 1: res = mi_mv_pred_q2(mi, ref, block, block + 2); break;
+ case 2: res = mi_mv_pred_q2(mi, ref, block, block + 1); break;
+ case 3: res = mi_mv_pred_q4(mi, ref); break;
+ default: assert(ss_idx <= 3 && ss_idx >= 0);
+ }
+ return res;
+}
+
+void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, int i,
+ int ir, int ic, int mi_row, int mi_col);
+
+void vp10_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize);
+
+void vp10_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int plane);
+
+void vp10_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize);
+
+void vp10_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize);
+
+#if CONFIG_SUPERTX
+void vp10_build_inter_predictors_sb_sub8x8_extend(MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+ int mi_row_ori,
+ int mi_col_ori,
+#endif // CONFIG_EXT_INTER
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int block);
+
+void vp10_build_inter_predictors_sb_extend(MACROBLOCKD *xd,
+#if CONFIG_EXT_INTER
+ int mi_row_ori, int mi_col_ori,
+#endif // CONFIG_EXT_INTER
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize);
+struct macroblockd_plane;
+void vp10_build_masked_inter_predictor_complex(
+ MACROBLOCKD *xd, uint8_t *dst, int dst_stride, const uint8_t *pre,
+ int pre_stride, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori,
+ BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, PARTITION_TYPE partition,
+ int plane);
+#endif // CONFIG_SUPERTX
+
+void vp10_build_inter_predictor(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride, const MV *mv_q3,
+ const struct scale_factors *sf, int w, int h,
+ int do_avg,
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER *interp_filter,
+#else
+ const INTERP_FILTER interp_filter,
+#endif
+ enum mv_precision precision, int x, int y);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_build_inter_predictor(
+ const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
+ const MV *mv_q3, const struct scale_factors *sf, int w, int h, int do_avg,
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER *interp_filter,
+#else
+ const INTERP_FILTER interp_filter,
+#endif
+ enum mv_precision precision, int x, int y, int bd);
+#endif
+
+static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride,
+ const struct scale_factors *sf) {
+ const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset;
+ const int y = sf ? sf->scale_value_y(y_offset, sf) : y_offset;
+ return y * stride + x;
+}
+
+static INLINE void setup_pred_plane(struct buf_2d *dst, uint8_t *src, int width,
+ int height, int stride, int mi_row,
+ int mi_col,
+ const struct scale_factors *scale,
+ int subsampling_x, int subsampling_y) {
+ const int x = (MI_SIZE * mi_col) >> subsampling_x;
+ const int y = (MI_SIZE * mi_row) >> subsampling_y;
+ dst->buf = src + scaled_buffer_offset(x, y, stride, scale);
+ dst->buf0 = src;
+ dst->width = width;
+ dst->height = height;
+ dst->stride = stride;
+}
+
+void vp10_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
+ const YV12_BUFFER_CONFIG *src, int mi_row,
+ int mi_col);
+
+void vp10_setup_pre_planes(MACROBLOCKD *xd, int idx,
+ const YV12_BUFFER_CONFIG *src, int mi_row,
+ int mi_col, const struct scale_factors *sf);
+
+#if CONFIG_DUAL_FILTER
+// Detect if the block have sub-pixel level motion vectors
+// per component.
+static INLINE int has_subpel_mv_component(const MODE_INFO *const mi,
+ const MACROBLOCKD *const xd,
+ int dir) {
+ const MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ int plane;
+ int ref = (dir >> 1);
+
+ if (bsize >= BLOCK_8X8) {
+ if (dir & 0x01) {
+ if (mbmi->mv[ref].as_mv.col & SUBPEL_MASK) return 1;
+ } else {
+ if (mbmi->mv[ref].as_mv.row & SUBPEL_MASK) return 1;
+ }
+ } else {
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const PARTITION_TYPE bp = BLOCK_8X8 - bsize;
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int have_vsplit = bp != PARTITION_HORZ;
+ const int have_hsplit = bp != PARTITION_VERT;
+ const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+ const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+
+ int x, y;
+ for (y = 0; y < num_4x4_h; ++y) {
+ for (x = 0; x < num_4x4_w; ++x) {
+ const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
+ if (dir & 0x01) {
+ if (mv.col & SUBPEL_MASK) return 1;
+ } else {
+ if (mv.row & SUBPEL_MASK) return 1;
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+#endif
+
+#if CONFIG_EXT_INTERP
+static INLINE int vp10_is_interp_needed(const MACROBLOCKD *const xd) {
+ MODE_INFO *const mi = xd->mi[0];
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const int is_compound = has_second_ref(mbmi);
+ int intpel_mv = 1;
+ int plane;
+
+#if SUPPORT_NONINTERPOLATING_FILTERS
+ // TODO(debargha): This is is currently only for experimentation
+ // with non-interpolating filters. Remove later.
+ // If any of the filters are non-interpolating, then indicate the
+ // interpolation filter always.
+ int i;
+ for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
+ if (!IsInterpolatingFilter(i)) return 1;
+ }
+#endif
+
+ // For scaled references, interpolation filter is indicated all the time.
+ if (vp10_is_scaled(&xd->block_refs[0]->sf)) return 1;
+ if (is_compound && vp10_is_scaled(&xd->block_refs[1]->sf)) return 1;
+
+ if (bsize < BLOCK_8X8) {
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const PARTITION_TYPE bp = BLOCK_8X8 - bsize;
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int have_vsplit = bp != PARTITION_HORZ;
+ const int have_hsplit = bp != PARTITION_VERT;
+ const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
+ const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
+ int ref;
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ int x, y;
+ for (y = 0; y < num_4x4_h; ++y)
+ for (x = 0; x < num_4x4_w; ++x) {
+ const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
+ if (mv_has_subpel(&mv)) return 1;
+ }
+ }
+ }
+ return 0;
+ } else {
+ intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
+ if (is_compound && intpel_mv) {
+ intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
+ }
+ }
+ return !intpel_mv;
+}
+#endif // CONFIG_EXT_INTERP
+
+#if CONFIG_OBMC
+const uint8_t *vp10_get_obmc_mask(int length);
+void vp10_build_obmc_inter_prediction(VP10_COMMON *cm, MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *above[MAX_MB_PLANE],
+ int above_stride[MAX_MB_PLANE],
+ uint8_t *left[MAX_MB_PLANE],
+ int left_stride[MAX_MB_PLANE]);
+void vp10_build_prediction_by_above_preds(VP10_COMMON *cm, MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_width[MAX_MB_PLANE],
+ int tmp_height[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]);
+void vp10_build_prediction_by_left_preds(VP10_COMMON *cm, MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ uint8_t *tmp_buf[MAX_MB_PLANE],
+ int tmp_width[MAX_MB_PLANE],
+ int tmp_height[MAX_MB_PLANE],
+ int tmp_stride[MAX_MB_PLANE]);
+#endif // CONFIG_OBMC
+
+#if CONFIG_EXT_INTER
+#define MASK_MASTER_SIZE (2 * MAX_SB_SIZE)
+#define MASK_MASTER_STRIDE (2 * MAX_SB_SIZE)
+
+void vp10_init_wedge_masks();
+
+static INLINE const uint8_t *vp10_get_contiguous_soft_mask(int wedge_index,
+ int wedge_sign,
+ BLOCK_SIZE sb_type) {
+ return wedge_params_lookup[sb_type].masks[wedge_sign][wedge_index];
+}
+
+const uint8_t *vp10_get_soft_mask(int wedge_index, int wedge_sign,
+ BLOCK_SIZE sb_type, int wedge_offset_x,
+ int wedge_offset_y);
+
+void vp10_build_interintra_predictors(MACROBLOCKD *xd, uint8_t *ypred,
+ uint8_t *upred, uint8_t *vpred,
+ int ystride, int ustride, int vstride,
+ BLOCK_SIZE bsize);
+void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred,
+ int ystride, BLOCK_SIZE bsize);
+void vp10_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred,
+ int ustride, int plane,
+ BLOCK_SIZE bsize);
+void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred,
+ uint8_t *vpred, int ustride,
+ int vstride, BLOCK_SIZE bsize);
+
+void vp10_build_intra_predictors_for_interintra(MACROBLOCKD *xd,
+ BLOCK_SIZE bsize, int plane,
+ uint8_t *intra_pred,
+ int intra_stride);
+void vp10_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
+ const uint8_t *inter_pred, int inter_stride,
+ const uint8_t *intra_pred, int intra_stride);
+void vp10_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred,
+ uint8_t *vpred, int ustride,
+ int vstride, BLOCK_SIZE bsize);
+void vp10_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred,
+ int ystride, BLOCK_SIZE bsize);
+
+// Encoder only
+void vp10_build_inter_predictors_for_planes_single_buf(
+ MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row,
+ int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3]);
+void vp10_build_wedge_inter_predictor_from_buf(
+ MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to,
+ uint8_t *ext_dst0[3], int ext_dst_stride0[3], uint8_t *ext_dst1[3],
+ int ext_dst_stride1[3]);
+#endif // CONFIG_EXT_INTER
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_RECONINTER_H_
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
new file mode 100644
index 0000000..801f61e
--- /dev/null
+++ b/av1/common/reconintra.c
@@ -0,0 +1,1574 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "aom_ports/system_state.h"
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#include "aom_dsp/vpx_dsp_common.h"
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/vpx_once.h"
+#if CONFIG_EXT_INTRA
+#include "av1/common/intra_filters.h"
+#endif
+#include "av1/common/reconintra.h"
+#include "av1/common/onyxc_int.h"
+
+enum {
+ NEED_LEFT = 1 << 1,
+ NEED_ABOVE = 1 << 2,
+ NEED_ABOVERIGHT = 1 << 3,
+ NEED_ABOVELEFT = 1 << 4,
+ NEED_BOTTOMLEFT = 1 << 5,
+};
+
+static const uint8_t extend_modes[INTRA_MODES] = {
+ NEED_ABOVE | NEED_LEFT, // DC
+ NEED_ABOVE, // V
+ NEED_LEFT, // H
+ NEED_ABOVE | NEED_ABOVERIGHT, // D45
+ NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D135
+ NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D117
+ NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // D153
+ NEED_LEFT | NEED_BOTTOMLEFT, // D207
+ NEED_ABOVE | NEED_ABOVERIGHT, // D63
+ NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // TM
+};
+
+static const uint8_t orders_128x128[1] = { 0 };
+static const uint8_t orders_128x64[2] = { 0, 1 };
+static const uint8_t orders_64x128[2] = { 0, 1 };
+static const uint8_t orders_64x64[4] = {
+ 0, 1, 2, 3,
+};
+static const uint8_t orders_64x32[8] = {
+ 0, 2, 1, 3, 4, 6, 5, 7,
+};
+static const uint8_t orders_32x64[8] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+};
+static const uint8_t orders_32x32[16] = {
+ 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15,
+};
+static const uint8_t orders_32x16[32] = {
+ 0, 2, 8, 10, 1, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15,
+ 16, 18, 24, 26, 17, 19, 25, 27, 20, 22, 28, 30, 21, 23, 29, 31,
+};
+static const uint8_t orders_16x32[32] = {
+ 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15,
+ 16, 17, 18, 19, 24, 25, 26, 27, 20, 21, 22, 23, 28, 29, 30, 31,
+};
+static const uint8_t orders_16x16[64] = {
+ 0, 1, 4, 5, 16, 17, 20, 21, 2, 3, 6, 7, 18, 19, 22, 23,
+ 8, 9, 12, 13, 24, 25, 28, 29, 10, 11, 14, 15, 26, 27, 30, 31,
+ 32, 33, 36, 37, 48, 49, 52, 53, 34, 35, 38, 39, 50, 51, 54, 55,
+ 40, 41, 44, 45, 56, 57, 60, 61, 42, 43, 46, 47, 58, 59, 62, 63,
+};
+
+#if CONFIG_EXT_PARTITION
+static const uint8_t orders_16x8[128] = {
+ 0, 2, 8, 10, 32, 34, 40, 42, 1, 3, 9, 11, 33, 35, 41, 43,
+ 4, 6, 12, 14, 36, 38, 44, 46, 5, 7, 13, 15, 37, 39, 45, 47,
+ 16, 18, 24, 26, 48, 50, 56, 58, 17, 19, 25, 27, 49, 51, 57, 59,
+ 20, 22, 28, 30, 52, 54, 60, 62, 21, 23, 29, 31, 53, 55, 61, 63,
+ 64, 66, 72, 74, 96, 98, 104, 106, 65, 67, 73, 75, 97, 99, 105, 107,
+ 68, 70, 76, 78, 100, 102, 108, 110, 69, 71, 77, 79, 101, 103, 109, 111,
+ 80, 82, 88, 90, 112, 114, 120, 122, 81, 83, 89, 91, 113, 115, 121, 123,
+ 84, 86, 92, 94, 116, 118, 124, 126, 85, 87, 93, 95, 117, 119, 125, 127,
+};
+static const uint8_t orders_8x16[128] = {
+ 0, 1, 2, 3, 8, 9, 10, 11, 32, 33, 34, 35, 40, 41, 42, 43,
+ 4, 5, 6, 7, 12, 13, 14, 15, 36, 37, 38, 39, 44, 45, 46, 47,
+ 16, 17, 18, 19, 24, 25, 26, 27, 48, 49, 50, 51, 56, 57, 58, 59,
+ 20, 21, 22, 23, 28, 29, 30, 31, 52, 53, 54, 55, 60, 61, 62, 63,
+ 64, 65, 66, 67, 72, 73, 74, 75, 96, 97, 98, 99, 104, 105, 106, 107,
+ 68, 69, 70, 71, 76, 77, 78, 79, 100, 101, 102, 103, 108, 109, 110, 111,
+ 80, 81, 82, 83, 88, 89, 90, 91, 112, 113, 114, 115, 120, 121, 122, 123,
+ 84, 85, 86, 87, 92, 93, 94, 95, 116, 117, 118, 119, 124, 125, 126, 127,
+};
+static const uint8_t orders_8x8[256] = {
+ 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84,
+ 85, 2, 3, 6, 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83,
+ 86, 87, 8, 9, 12, 13, 24, 25, 28, 29, 72, 73, 76, 77, 88,
+ 89, 92, 93, 10, 11, 14, 15, 26, 27, 30, 31, 74, 75, 78, 79,
+ 90, 91, 94, 95, 32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100,
+ 101, 112, 113, 116, 117, 34, 35, 38, 39, 50, 51, 54, 55, 98, 99,
+ 102, 103, 114, 115, 118, 119, 40, 41, 44, 45, 56, 57, 60, 61, 104,
+ 105, 108, 109, 120, 121, 124, 125, 42, 43, 46, 47, 58, 59, 62, 63,
+ 106, 107, 110, 111, 122, 123, 126, 127, 128, 129, 132, 133, 144, 145, 148,
+ 149, 192, 193, 196, 197, 208, 209, 212, 213, 130, 131, 134, 135, 146, 147,
+ 150, 151, 194, 195, 198, 199, 210, 211, 214, 215, 136, 137, 140, 141, 152,
+ 153, 156, 157, 200, 201, 204, 205, 216, 217, 220, 221, 138, 139, 142, 143,
+ 154, 155, 158, 159, 202, 203, 206, 207, 218, 219, 222, 223, 160, 161, 164,
+ 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, 162, 163,
+ 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, 168,
+ 169, 172, 173, 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253,
+ 170, 171, 174, 175, 186, 187, 190, 191, 234, 235, 238, 239, 250, 251, 254,
+ 255,
+};
+
+/* clang-format off */
+static const uint8_t *const orders[BLOCK_SIZES] = {
+ // 4X4
+ orders_8x8,
+ // 4X8, 8X4, 8X8
+ orders_8x8, orders_8x8, orders_8x8,
+ // 8X16, 16X8, 16X16
+ orders_8x16, orders_16x8, orders_16x16,
+ // 16X32, 32X16, 32X32
+ orders_16x32, orders_32x16, orders_32x32,
+ // 32X64, 64X32, 64X64
+ orders_32x64, orders_64x32, orders_64x64,
+ // 64x128, 128x64, 128x128
+ orders_64x128, orders_128x64, orders_128x128
+};
+/* clang-format on */
+#else
+/* clang-format off */
+static const uint8_t *const orders[BLOCK_SIZES] = {
+ // 4X4
+ orders_16x16,
+ // 4X8, 8X4, 8X8
+ orders_16x16, orders_16x16, orders_16x16,
+ // 8X16, 16X8, 16X16
+ orders_16x32, orders_32x16, orders_32x32,
+ // 16X32, 32X16, 32X32
+ orders_32x64, orders_64x32, orders_64x64,
+ // 32X64, 64X32, 64X64
+ orders_64x128, orders_128x64, orders_128x128
+};
+/* clang-format on */
+#endif // CONFIG_EXT_PARTITION
+
+#if CONFIG_EXT_PARTITION_TYPES
+static const uint8_t orders_verta_64x64[4] = {
+ 0, 2, 1, 2,
+};
+static const uint8_t orders_verta_32x32[16] = {
+ 0, 2, 4, 6, 1, 2, 5, 6, 8, 10, 12, 14, 9, 10, 13, 14,
+};
+static const uint8_t orders_verta_16x16[64] = {
+ 0, 2, 4, 6, 16, 18, 20, 22, 1, 2, 5, 6, 17, 18, 21, 22,
+ 8, 10, 12, 14, 24, 26, 28, 30, 9, 10, 13, 14, 25, 26, 29, 30,
+ 32, 34, 36, 38, 48, 50, 52, 54, 33, 34, 37, 38, 49, 50, 53, 54,
+ 40, 42, 44, 46, 56, 58, 60, 62, 41, 42, 45, 46, 57, 58, 61, 62,
+};
+#if CONFIG_EXT_PARTITION
+static const uint8_t orders_verta_8x8[256] = {
+ 0, 2, 4, 6, 16, 18, 20, 22, 64, 66, 68, 70, 80, 82, 84,
+ 86, 1, 2, 5, 6, 17, 18, 21, 22, 65, 66, 69, 70, 81, 82,
+ 85, 86, 8, 10, 12, 14, 24, 26, 28, 30, 72, 74, 76, 78, 88,
+ 90, 92, 94, 9, 10, 13, 14, 25, 26, 29, 30, 73, 74, 77, 78,
+ 89, 90, 93, 94, 32, 34, 36, 38, 48, 50, 52, 54, 96, 98, 100,
+ 102, 112, 114, 116, 118, 33, 34, 37, 38, 49, 50, 53, 54, 97, 98,
+ 101, 102, 113, 114, 117, 118, 40, 42, 44, 46, 56, 58, 60, 62, 104,
+ 106, 108, 110, 120, 122, 124, 126, 41, 42, 45, 46, 57, 58, 61, 62,
+ 105, 106, 109, 110, 121, 122, 125, 126, 128, 130, 132, 134, 144, 146, 148,
+ 150, 192, 194, 196, 198, 208, 210, 212, 214, 129, 130, 133, 134, 145, 146,
+ 149, 150, 193, 194, 197, 198, 209, 210, 213, 214, 136, 138, 140, 142, 152,
+ 154, 156, 158, 200, 202, 204, 206, 216, 218, 220, 222, 137, 138, 141, 142,
+ 153, 154, 157, 158, 201, 202, 205, 206, 217, 218, 221, 222, 160, 162, 164,
+ 166, 176, 178, 180, 182, 224, 226, 228, 230, 240, 242, 244, 246, 161, 162,
+ 165, 166, 177, 178, 181, 182, 225, 226, 229, 230, 241, 242, 245, 246, 168,
+ 170, 172, 174, 184, 186, 188, 190, 232, 234, 236, 238, 248, 250, 252, 254,
+ 169, 170, 173, 174, 185, 186, 189, 190, 233, 234, 237, 238, 249, 250, 253,
+ 254,
+};
+
+/* clang-format off */
+static const uint8_t *const orders_verta[BLOCK_SIZES] = {
+ // 4X4
+ orders_verta_8x8,
+ // 4X8, 8X4, 8X8
+ orders_verta_8x8, orders_verta_8x8, orders_verta_8x8,
+ // 8X16, 16X8, 16X16
+ orders_8x16, orders_16x8, orders_verta_16x16,
+ // 16X32, 32X16, 32X32
+ orders_16x32, orders_32x16, orders_verta_32x32,
+ // 32X64, 64X32, 64X64
+ orders_32x64, orders_64x32, orders_verta_64x64,
+ // 64x128, 128x64, 128x128
+ orders_64x128, orders_128x64, orders_128x128
+};
+/* clang-format on */
+#else
+/* clang-format off */
+static const uint8_t *const orders_verta[BLOCK_SIZES] = {
+ // 4X4
+ orders_verta_16x16,
+ // 4X8, 8X4, 8X8
+ orders_verta_16x16, orders_verta_16x16, orders_verta_16x16,
+ // 8X16, 16X8, 16X16
+ orders_16x32, orders_32x16, orders_verta_32x32,
+ // 16X32, 32X16, 32X32
+ orders_32x64, orders_64x32, orders_verta_64x64,
+ // 32X64, 64X32, 64X64
+ orders_64x128, orders_128x64, orders_128x128
+};
+/* clang-format on */
+#endif // CONFIG_EXT_PARTITION
+#endif // CONFIG_EXT_PARTITION_TYPES
+
+static int vp10_has_right(BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int right_available,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition,
+#endif
+ TX_SIZE txsz, int y, int x, int ss_x) {
+ const int wl = mi_width_log2_lookup[bsize];
+ const int w = VPXMAX(num_4x4_blocks_wide_lookup[bsize] >> ss_x, 1);
+ const int step = 1 << txsz;
+
+ if (!right_available) {
+ return 0;
+ } else {
+ // Handle block size 4x8 and 4x4
+ if (ss_x == 0 && num_4x4_blocks_wide_lookup[bsize] < 2 && x == 0) return 1;
+
+ if (y == 0) {
+ const int hl = mi_height_log2_lookup[bsize];
+ const uint8_t *order;
+ int my_order, tr_order;
+#if CONFIG_EXT_PARTITION_TYPES
+ if (partition == PARTITION_VERT_A)
+ order = orders_verta[bsize];
+ else
+#endif // CONFIG_EXT_PARTITION_TYPES
+ order = orders[bsize];
+
+ if (x + step < w) return 1;
+
+ mi_row = (mi_row & MAX_MIB_MASK) >> hl;
+ mi_col = (mi_col & MAX_MIB_MASK) >> wl;
+
+ // If top row of coding unit
+ if (mi_row == 0) return 1;
+
+ // If rightmost column of coding unit
+ if (((mi_col + 1) << wl) >= MAX_MIB_SIZE) return 0;
+
+ my_order = order[((mi_row + 0) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 0];
+ tr_order = order[((mi_row - 1) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 1];
+
+ return my_order > tr_order;
+ } else {
+ return x + step < w;
+ }
+ }
+}
+
+static int vp10_has_bottom(BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int bottom_available, TX_SIZE txsz, int y, int x,
+ int ss_y) {
+ if (!bottom_available || x != 0) {
+ return 0;
+ } else {
+ const int wl = mi_width_log2_lookup[bsize];
+ const int hl = mi_height_log2_lookup[bsize];
+ const int h = 1 << (hl + 1 - ss_y);
+ const int step = 1 << txsz;
+ const uint8_t *order = orders[bsize];
+ int my_order, bl_order;
+
+ // Handle block size 8x4 and 4x4
+ if (ss_y == 0 && num_4x4_blocks_high_lookup[bsize] < 2 && y == 0) return 1;
+
+ if (y + step < h) return 1;
+
+ mi_row = (mi_row & MAX_MIB_MASK) >> hl;
+ mi_col = (mi_col & MAX_MIB_MASK) >> wl;
+
+ if (mi_col == 0)
+ return (mi_row << (hl + !ss_y)) + y + step < (MAX_MIB_SIZE << !ss_y);
+
+ if (((mi_row + 1) << hl) >= MAX_MIB_SIZE) return 0;
+
+ my_order = order[((mi_row + 0) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col + 0];
+ bl_order = order[((mi_row + 1) << (MAX_MIB_SIZE_LOG2 - wl)) + mi_col - 1];
+
+ return bl_order < my_order;
+ }
+}
+
+typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left);
+
+static intra_pred_fn pred[INTRA_MODES][TX_SIZES];
+static intra_pred_fn dc_pred[2][2][TX_SIZES];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above, const uint16_t *left,
+ int bd);
+static intra_high_pred_fn pred_high[INTRA_MODES][4];
+static intra_high_pred_fn dc_pred_high[2][2][4];
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static void vp10_init_intra_predictors_internal(void) {
+#define INIT_NO_4X4(p, type) \
+ p[TX_8X8] = vpx_##type##_predictor_8x8; \
+ p[TX_16X16] = vpx_##type##_predictor_16x16; \
+ p[TX_32X32] = vpx_##type##_predictor_32x32
+
+#define INIT_ALL_SIZES(p, type) \
+ p[TX_4X4] = vpx_##type##_predictor_4x4; \
+ INIT_NO_4X4(p, type)
+
+ INIT_ALL_SIZES(pred[V_PRED], v);
+ INIT_ALL_SIZES(pred[H_PRED], h);
+ INIT_ALL_SIZES(pred[D207_PRED], d207e);
+ INIT_ALL_SIZES(pred[D45_PRED], d45e);
+ INIT_ALL_SIZES(pred[D63_PRED], d63e);
+ INIT_ALL_SIZES(pred[D117_PRED], d117);
+ INIT_ALL_SIZES(pred[D135_PRED], d135);
+ INIT_ALL_SIZES(pred[D153_PRED], d153);
+ INIT_ALL_SIZES(pred[TM_PRED], tm);
+
+ INIT_ALL_SIZES(dc_pred[0][0], dc_128);
+ INIT_ALL_SIZES(dc_pred[0][1], dc_top);
+ INIT_ALL_SIZES(dc_pred[1][0], dc_left);
+ INIT_ALL_SIZES(dc_pred[1][1], dc);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ INIT_ALL_SIZES(pred_high[V_PRED], highbd_v);
+ INIT_ALL_SIZES(pred_high[H_PRED], highbd_h);
+ INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207e);
+ INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45e);
+ INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63e);
+ INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117);
+ INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135);
+ INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153);
+ INIT_ALL_SIZES(pred_high[TM_PRED], highbd_tm);
+
+ INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128);
+ INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top);
+ INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left);
+ INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#undef intra_pred_allsizes
+}
+
+#if CONFIG_EXT_INTRA
+
+static const uint8_t ext_intra_extend_modes[FILTER_INTRA_MODES] = {
+ NEED_LEFT | NEED_ABOVE, // FILTER_DC
+ NEED_LEFT | NEED_ABOVE, // FILTER_V
+ NEED_LEFT | NEED_ABOVE, // FILTER_H
+ NEED_LEFT | NEED_ABOVE, // FILTER_D45
+ NEED_LEFT | NEED_ABOVE, // FILTER_D135
+ NEED_LEFT | NEED_ABOVE, // FILTER_D117
+ NEED_LEFT | NEED_ABOVE, // FILTER_D153
+ NEED_LEFT | NEED_ABOVE, // FILTER_D207
+ NEED_LEFT | NEED_ABOVE, // FILTER_D63
+ NEED_LEFT | NEED_ABOVE, // FILTER_TM
+};
+
+static int intra_subpel_interp(int base, int shift, const uint8_t *ref,
+ int ref_start_idx, int ref_end_idx,
+ INTRA_FILTER filter_type) {
+ int val, k, idx, filter_idx = 0;
+ const int16_t *filter = NULL;
+
+ if (filter_type == INTRA_FILTER_LINEAR) {
+ val = ref[base] * (256 - shift) + ref[base + 1] * shift;
+ val = ROUND_POWER_OF_TWO(val, 8);
+ } else {
+ filter_idx = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
+ filter = vp10_intra_filter_kernels[filter_type][filter_idx];
+
+ if (filter_idx < (1 << SUBPEL_BITS)) {
+ val = 0;
+ for (k = 0; k < SUBPEL_TAPS; ++k) {
+ idx = base + 1 - (SUBPEL_TAPS / 2) + k;
+ idx = VPXMAX(VPXMIN(idx, ref_end_idx), ref_start_idx);
+ val += ref[idx] * filter[k];
+ }
+ val = ROUND_POWER_OF_TWO(val, FILTER_BITS);
+ } else {
+ val = ref[base + 1];
+ }
+ }
+
+ return val;
+}
+
+// Directional prediction, zone 1: 0 < angle < 90
+static void dr_prediction_z1(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left, int dx,
+ int dy, INTRA_FILTER filter_type) {
+ int r, c, x, base, shift, val;
+
+ (void)left;
+ (void)dy;
+ assert(dy == 1);
+ assert(dx < 0);
+
+ if (filter_type != INTRA_FILTER_LINEAR) {
+ const int pad_size = SUBPEL_TAPS >> 1;
+ int len;
+ DECLARE_ALIGNED(16, uint8_t, buf[SUBPEL_SHIFTS][MAX_SB_SIZE]);
+ DECLARE_ALIGNED(16, uint8_t, src[MAX_SB_SIZE + SUBPEL_TAPS]);
+ uint8_t flags[SUBPEL_SHIFTS];
+
+ memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0]));
+ memset(src, above[0], pad_size * sizeof(above[0]));
+ memcpy(src + pad_size, above, 2 * bs * sizeof(above[0]));
+ memset(src + pad_size + 2 * bs, above[2 * bs - 1],
+ pad_size * sizeof(above[0]));
+ flags[0] = 1;
+ x = -dx;
+ for (r = 0; r < bs; ++r, dst += stride, x -= dx) {
+ base = x >> 8;
+ shift = x & 0xFF;
+ shift = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
+ if (shift == SUBPEL_SHIFTS) {
+ base += 1;
+ shift = 0;
+ }
+ len = VPXMIN(bs, 2 * bs - 1 - base);
+ if (len <= 0) {
+ int i;
+ for (i = r; i < bs; ++i) {
+ memset(dst, above[2 * bs - 1], bs * sizeof(dst[0]));
+ dst += stride;
+ }
+ return;
+ }
+
+ if (len <= (bs >> 1) && !flags[shift]) {
+ base = x >> 8;
+ shift = x & 0xFF;
+ for (c = 0; c < len; ++c) {
+ val = intra_subpel_interp(base, shift, above, 0, 2 * bs - 1,
+ filter_type);
+ dst[c] = clip_pixel(val);
+ ++base;
+ }
+ } else {
+ if (!flags[shift]) {
+ const int16_t *filter = vp10_intra_filter_kernels[filter_type][shift];
+ vpx_convolve8_horiz(src + pad_size, 2 * bs, buf[shift], 2 * bs,
+ filter, 16, NULL, 16, 2 * bs,
+ 2 * bs < 16 ? 2 : 1);
+ flags[shift] = 1;
+ }
+ memcpy(dst, shift == 0 ? src + pad_size + base : &buf[shift][base],
+ len * sizeof(dst[0]));
+ }
+
+ if (len < bs)
+ memset(dst + len, above[2 * bs - 1], (bs - len) * sizeof(dst[0]));
+ }
+ return;
+ }
+
+ // For linear filter, C code is faster.
+ x = -dx;
+ for (r = 0; r < bs; ++r, dst += stride, x -= dx) {
+ base = x >> 8;
+ shift = x & 0xFF;
+
+ if (base >= 2 * bs - 1) {
+ int i;
+ for (i = r; i < bs; ++i) {
+ memset(dst, above[2 * bs - 1], bs * sizeof(dst[0]));
+ dst += stride;
+ }
+ return;
+ }
+
+ for (c = 0; c < bs; ++c, ++base) {
+ if (base < 2 * bs - 1) {
+ val = above[base] * (256 - shift) + above[base + 1] * shift;
+ val = ROUND_POWER_OF_TWO(val, 8);
+ dst[c] = clip_pixel(val);
+ } else {
+ dst[c] = above[2 * bs - 1];
+ }
+ }
+ }
+}
+
+// Directional prediction, zone 2: 90 < angle < 180
+static void dr_prediction_z2(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left, int dx,
+ int dy, INTRA_FILTER filter_type) {
+ int r, c, x, y, shift1, shift2, val, base1, base2;
+
+ assert(dx > 0);
+ assert(dy > 0);
+
+ x = -dx;
+ for (r = 0; r < bs; ++r, x -= dx, dst += stride) {
+ base1 = x >> 8;
+ y = (r << 8) - dy;
+ for (c = 0; c < bs; ++c, ++base1, y -= dy) {
+ if (base1 >= -1) {
+ shift1 = x & 0xFF;
+ val =
+ intra_subpel_interp(base1, shift1, above, -1, bs - 1, filter_type);
+ } else {
+ base2 = y >> 8;
+ if (base2 >= 0) {
+ shift2 = y & 0xFF;
+ val =
+ intra_subpel_interp(base2, shift2, left, 0, bs - 1, filter_type);
+ } else {
+ val = left[0];
+ }
+ }
+ dst[c] = clip_pixel(val);
+ }
+ }
+}
+
+// Directional prediction, zone 3: 180 < angle < 270
+static void dr_prediction_z3(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left, int dx,
+ int dy, INTRA_FILTER filter_type) {
+ int r, c, y, base, shift, val;
+
+ (void)above;
+ (void)dx;
+
+ assert(dx == 1);
+ assert(dy < 0);
+
+ if (filter_type != INTRA_FILTER_LINEAR) {
+ const int pad_size = SUBPEL_TAPS >> 1;
+ int len, i;
+ DECLARE_ALIGNED(16, uint8_t, buf[MAX_SB_SIZE][4 * SUBPEL_SHIFTS]);
+ DECLARE_ALIGNED(16, uint8_t, src[(MAX_SB_SIZE + SUBPEL_TAPS) * 4]);
+ uint8_t flags[SUBPEL_SHIFTS];
+
+ memset(flags, 0, SUBPEL_SHIFTS * sizeof(flags[0]));
+ for (i = 0; i < pad_size; ++i) src[4 * i] = left[0];
+ for (i = 0; i < 2 * bs; ++i) src[4 * (i + pad_size)] = left[i];
+ for (i = 0; i < pad_size; ++i)
+ src[4 * (i + 2 * bs + pad_size)] = left[2 * bs - 1];
+ flags[0] = 1;
+ y = -dy;
+ for (c = 0; c < bs; ++c, y -= dy) {
+ base = y >> 8;
+ shift = y & 0xFF;
+ shift = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
+ if (shift == SUBPEL_SHIFTS) {
+ base += 1;
+ shift = 0;
+ }
+ len = VPXMIN(bs, 2 * bs - 1 - base);
+
+ if (len <= 0) {
+ for (r = 0; r < bs; ++r) {
+ dst[r * stride + c] = left[2 * bs - 1];
+ }
+ continue;
+ }
+
+ if (len <= (bs >> 1) && !flags[shift]) {
+ base = y >> 8;
+ shift = y & 0xFF;
+ for (r = 0; r < len; ++r) {
+ val = intra_subpel_interp(base, shift, left, 0, 2 * bs - 1,
+ filter_type);
+ dst[r * stride + c] = clip_pixel(val);
+ ++base;
+ }
+ } else {
+ if (!flags[shift]) {
+ const int16_t *filter = vp10_intra_filter_kernels[filter_type][shift];
+ vpx_convolve8_vert(src + 4 * pad_size, 4, buf[0] + 4 * shift,
+ 4 * SUBPEL_SHIFTS, NULL, 16, filter, 16,
+ 2 * bs < 16 ? 4 : 4, 2 * bs);
+ flags[shift] = 1;
+ }
+
+ if (shift == 0) {
+ for (r = 0; r < len; ++r) {
+ dst[r * stride + c] = left[r + base];
+ }
+ } else {
+ for (r = 0; r < len; ++r) {
+ dst[r * stride + c] = buf[r + base][4 * shift];
+ }
+ }
+ }
+
+ if (len < bs) {
+ for (r = len; r < bs; ++r) {
+ dst[r * stride + c] = left[2 * bs - 1];
+ }
+ }
+ }
+ return;
+ }
+
+ // For linear filter, C code is faster.
+ y = -dy;
+ for (c = 0; c < bs; ++c, y -= dy) {
+ base = y >> 8;
+ shift = y & 0xFF;
+
+ for (r = 0; r < bs; ++r, ++base) {
+ if (base < 2 * bs - 1) {
+ val = left[base] * (256 - shift) + left[base + 1] * shift;
+ val = ROUND_POWER_OF_TWO(val, 8);
+ dst[r * stride + c] = clip_pixel(val);
+ } else {
+ for (; r < bs; ++r) dst[r * stride + c] = left[2 * bs - 1];
+ break;
+ }
+ }
+ }
+}
+
+// Get the shift (up-scaled by 256) in X w.r.t a unit change in Y.
+// If angle > 0 && angle < 90, dx = -((int)(256 / t));
+// If angle > 90 && angle < 180, dx = (int)(256 / t);
+// If angle > 180 && angle < 270, dx = 1;
+static inline int get_dx(int angle) {
+ if (angle > 0 && angle < 90) {
+ return -dr_intra_derivative[angle];
+ } else if (angle > 90 && angle < 180) {
+ return dr_intra_derivative[180 - angle];
+ } else {
+ // In this case, we are not really going to use dx. We may return any value.
+ return 1;
+ }
+}
+
+// Get the shift (up-scaled by 256) in Y w.r.t a unit change in X.
+// If angle > 0 && angle < 90, dy = 1;
+// If angle > 90 && angle < 180, dy = (int)(256 * t);
+// If angle > 180 && angle < 270, dy = -((int)(256 * t));
+static inline int get_dy(int angle) {
+ if (angle > 90 && angle < 180) {
+ return dr_intra_derivative[angle - 90];
+ } else if (angle > 180 && angle < 270) {
+ return -dr_intra_derivative[270 - angle];
+ } else {
+ // In this case, we are not really going to use dy. We may return any value.
+ return 1;
+ }
+}
+
+static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
+ const uint8_t *above, const uint8_t *left, int angle,
+ INTRA_FILTER filter_type) {
+ const int dx = get_dx(angle);
+ const int dy = get_dy(angle);
+ const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+ assert(angle > 0 && angle < 270);
+
+ if (angle > 0 && angle < 90) {
+ dr_prediction_z1(dst, stride, bs, above, left, dx, dy, filter_type);
+ } else if (angle > 90 && angle < 180) {
+ dr_prediction_z2(dst, stride, bs, above, left, dx, dy, filter_type);
+ } else if (angle > 180 && angle < 270) {
+ dr_prediction_z3(dst, stride, bs, above, left, dx, dy, filter_type);
+ } else if (angle == 90) {
+ pred[V_PRED][tx_size](dst, stride, above, left);
+ } else if (angle == 180) {
+ pred[H_PRED][tx_size](dst, stride, above, left);
+ }
+}
+
+static void filter_intra_predictors_4tap(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above,
+ const uint8_t *left, int mode) {
+ int k, r, c;
+ int pred[33][65];
+ int mean, ipred;
+ const TX_SIZE tx_size =
+ (bs == 32) ? TX_32X32
+ : ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+ const int c0 = filter_intra_taps_4[tx_size][mode][0];
+ const int c1 = filter_intra_taps_4[tx_size][mode][1];
+ const int c2 = filter_intra_taps_4[tx_size][mode][2];
+ const int c3 = filter_intra_taps_4[tx_size][mode][3];
+
+ k = 0;
+ mean = 0;
+ while (k < bs) {
+ mean = mean + (int)left[k];
+ mean = mean + (int)above[k];
+ k++;
+ }
+ mean = (mean + bs) / (2 * bs);
+
+ for (r = 0; r < bs; ++r) pred[r + 1][0] = (int)left[r] - mean;
+
+ for (c = 0; c < 2 * bs + 1; ++c) pred[0][c] = (int)above[c - 1] - mean;
+
+ for (r = 1; r < bs + 1; ++r)
+ for (c = 1; c < 2 * bs + 1 - r; ++c) {
+ ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
+ c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
+ pred[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
+ }
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ ipred = pred[r + 1][c + 1] + mean;
+ dst[c] = clip_pixel(ipred);
+ }
+ dst += stride;
+ }
+}
+
+void vp10_dc_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED);
+}
+
+void vp10_v_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED);
+}
+
+void vp10_h_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED);
+}
+
+void vp10_d45_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED);
+}
+
+void vp10_d135_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED);
+}
+
+void vp10_d117_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED);
+}
+
+void vp10_d153_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED);
+}
+
+void vp10_d207_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED);
+}
+
+void vp10_d63_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED);
+}
+
+void vp10_tm_filter_predictor_c(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED);
+}
+
+static void filter_intra_predictors(int mode, uint8_t *dst, ptrdiff_t stride,
+ int bs, const uint8_t *above,
+ const uint8_t *left) {
+ switch (mode) {
+ case DC_PRED: vp10_dc_filter_predictor(dst, stride, bs, above, left); break;
+ case V_PRED: vp10_v_filter_predictor(dst, stride, bs, above, left); break;
+ case H_PRED: vp10_h_filter_predictor(dst, stride, bs, above, left); break;
+ case D45_PRED:
+ vp10_d45_filter_predictor(dst, stride, bs, above, left);
+ break;
+ case D135_PRED:
+ vp10_d135_filter_predictor(dst, stride, bs, above, left);
+ break;
+ case D117_PRED:
+ vp10_d117_filter_predictor(dst, stride, bs, above, left);
+ break;
+ case D153_PRED:
+ vp10_d153_filter_predictor(dst, stride, bs, above, left);
+ break;
+ case D207_PRED:
+ vp10_d207_filter_predictor(dst, stride, bs, above, left);
+ break;
+ case D63_PRED:
+ vp10_d63_filter_predictor(dst, stride, bs, above, left);
+ break;
+ case TM_PRED: vp10_tm_filter_predictor(dst, stride, bs, above, left); break;
+ default: assert(0);
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static int highbd_intra_subpel_interp(int base, int shift, const uint16_t *ref,
+ int ref_start_idx, int ref_end_idx,
+ INTRA_FILTER filter_type) {
+ int val, k, idx, filter_idx = 0;
+ const int16_t *filter = NULL;
+
+ if (filter_type == INTRA_FILTER_LINEAR) {
+ val = ref[base] * (256 - shift) + ref[base + 1] * shift;
+ val = ROUND_POWER_OF_TWO(val, 8);
+ } else {
+ filter_idx = ROUND_POWER_OF_TWO(shift, 8 - SUBPEL_BITS);
+ filter = vp10_intra_filter_kernels[filter_type][filter_idx];
+
+ if (filter_idx < (1 << SUBPEL_BITS)) {
+ val = 0;
+ for (k = 0; k < SUBPEL_TAPS; ++k) {
+ idx = base + 1 - (SUBPEL_TAPS / 2) + k;
+ idx = VPXMAX(VPXMIN(idx, ref_end_idx), ref_start_idx);
+ val += ref[idx] * filter[k];
+ }
+ val = ROUND_POWER_OF_TWO(val, FILTER_BITS);
+ } else {
+ val = ref[base + 1];
+ }
+ }
+
+ return val;
+}
+
+// Directional prediction, zone 1: 0 < angle < 90
+static void highbd_dr_prediction_z1(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above, const uint16_t *left,
+ int dx, int dy, int bd,
+ INTRA_FILTER filter_type) {
+ int r, c, x, y, base, shift, val;
+
+ (void)left;
+ (void)dy;
+ assert(dy == 1);
+ assert(dx < 0);
+
+ for (r = 0; r < bs; ++r) {
+ y = r + 1;
+ for (c = 0; c < bs; ++c) {
+ x = (c << 8) - y * dx;
+ base = x >> 8;
+ shift = x - (base << 8);
+ if (base < 2 * bs - 1) {
+ val = highbd_intra_subpel_interp(base, shift, above, 0, 2 * bs - 1,
+ filter_type);
+ dst[c] = clip_pixel_highbd(val, bd);
+ } else {
+ dst[c] = above[2 * bs - 1];
+ }
+ }
+ dst += stride;
+ }
+}
+
+// Directional prediction, zone 2: 90 < angle < 180
+static void highbd_dr_prediction_z2(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above, const uint16_t *left,
+ int dx, int dy, int bd,
+ INTRA_FILTER filter_type) {
+ int r, c, x, y, shift, val, base;
+
+ assert(dx > 0);
+ assert(dy > 0);
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ y = r + 1;
+ x = (c << 8) - y * dx;
+ base = x >> 8;
+ if (base >= -1) {
+ shift = x - (base << 8);
+ val = highbd_intra_subpel_interp(base, shift, above, -1, bs - 1,
+ filter_type);
+ } else {
+ x = c + 1;
+ y = (r << 8) - x * dy;
+ base = y >> 8;
+ if (base >= 0) {
+ shift = y - (base << 8);
+ val = highbd_intra_subpel_interp(base, shift, left, 0, bs - 1,
+ filter_type);
+ } else {
+ val = left[0];
+ }
+ }
+ dst[c] = clip_pixel_highbd(val, bd);
+ }
+ dst += stride;
+ }
+}
+
+// Directional prediction, zone 3: 180 < angle < 270
+static void highbd_dr_prediction_z3(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above, const uint16_t *left,
+ int dx, int dy, int bd,
+ INTRA_FILTER filter_type) {
+ int r, c, x, y, base, shift, val;
+
+ (void)above;
+ (void)dx;
+ assert(dx == 1);
+ assert(dy < 0);
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ x = c + 1;
+ y = (r << 8) - x * dy;
+ base = y >> 8;
+ shift = y - (base << 8);
+ if (base < 2 * bs - 1) {
+ val = highbd_intra_subpel_interp(base, shift, left, 0, 2 * bs - 1,
+ filter_type);
+ dst[c] = clip_pixel_highbd(val, bd);
+ } else {
+ dst[c] = left[2 * bs - 1];
+ }
+ }
+ dst += stride;
+ }
+}
+
+static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r;
+ (void)left;
+ (void)bd;
+ for (r = 0; r < bs; r++) {
+ memcpy(dst, above, bs * sizeof(uint16_t));
+ dst += stride;
+ }
+}
+
+static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r;
+ (void)above;
+ (void)bd;
+ for (r = 0; r < bs; r++) {
+ vpx_memset16(dst, left[r], bs);
+ dst += stride;
+ }
+}
+
+static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above, const uint16_t *left,
+ int angle, int bd, INTRA_FILTER filter) {
+ const int dx = get_dx(angle);
+ const int dy = get_dy(angle);
+ assert(angle > 0 && angle < 270);
+
+ if (angle > 0 && angle < 90) {
+ highbd_dr_prediction_z1(dst, stride, bs, above, left, dx, dy, bd, filter);
+ } else if (angle > 90 && angle < 180) {
+ highbd_dr_prediction_z2(dst, stride, bs, above, left, dx, dy, bd, filter);
+ } else if (angle > 180 && angle < 270) {
+ highbd_dr_prediction_z3(dst, stride, bs, above, left, dx, dy, bd, filter);
+ } else if (angle == 90) {
+ highbd_v_predictor(dst, stride, bs, above, left, bd);
+ } else if (angle == 180) {
+ highbd_h_predictor(dst, stride, bs, above, left, bd);
+ }
+}
+
+static void highbd_filter_intra_predictors_4tap(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int mode,
+ int bd) {
+ int k, r, c;
+ int pred[33][65];
+ int mean, ipred;
+ const TX_SIZE tx_size =
+ (bs == 32) ? TX_32X32
+ : ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+ const int c0 = filter_intra_taps_4[tx_size][mode][0];
+ const int c1 = filter_intra_taps_4[tx_size][mode][1];
+ const int c2 = filter_intra_taps_4[tx_size][mode][2];
+ const int c3 = filter_intra_taps_4[tx_size][mode][3];
+
+ k = 0;
+ mean = 0;
+ while (k < bs) {
+ mean = mean + (int)left[k];
+ mean = mean + (int)above[k];
+ k++;
+ }
+ mean = (mean + bs) / (2 * bs);
+
+ for (r = 0; r < bs; ++r) pred[r + 1][0] = (int)left[r] - mean;
+
+ for (c = 0; c < 2 * bs + 1; ++c) pred[0][c] = (int)above[c - 1] - mean;
+
+ for (r = 1; r < bs + 1; ++r)
+ for (c = 1; c < 2 * bs + 1 - r; ++c) {
+ ipred = c0 * pred[r - 1][c] + c1 * pred[r][c - 1] +
+ c2 * pred[r - 1][c - 1] + c3 * pred[r - 1][c + 1];
+ pred[r][c] = ROUND_POWER_OF_TWO_SIGNED(ipred, FILTER_INTRA_PREC_BITS);
+ }
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ ipred = pred[r + 1][c + 1] + mean;
+ dst[c] = clip_pixel_highbd(ipred, bd);
+ }
+ dst += stride;
+ }
+}
+
+void vp10_highbd_dc_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, DC_PRED,
+ bd);
+}
+
+void vp10_highbd_v_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, V_PRED, bd);
+}
+
+void vp10_highbd_h_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, H_PRED, bd);
+}
+
+void vp10_highbd_d45_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D45_PRED,
+ bd);
+}
+
+void vp10_highbd_d135_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D135_PRED,
+ bd);
+}
+
+void vp10_highbd_d117_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D117_PRED,
+ bd);
+}
+
+void vp10_highbd_d153_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D153_PRED,
+ bd);
+}
+
+void vp10_highbd_d207_filter_predictor_c(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D207_PRED,
+ bd);
+}
+
+void vp10_highbd_d63_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, D63_PRED,
+ bd);
+}
+
+void vp10_highbd_tm_filter_predictor_c(uint16_t *dst, ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_filter_intra_predictors_4tap(dst, stride, bs, above, left, TM_PRED,
+ bd);
+}
+
+static void highbd_filter_intra_predictors(int mode, uint16_t *dst,
+ ptrdiff_t stride, int bs,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ switch (mode) {
+ case DC_PRED:
+ vp10_highbd_dc_filter_predictor(dst, stride, bs, above, left, bd);
+ break;
+ case V_PRED:
+ vp10_highbd_v_filter_predictor(dst, stride, bs, above, left, bd);
+ break;
+ case H_PRED:
+ vp10_highbd_h_filter_predictor(dst, stride, bs, above, left, bd);
+ break;
+ case D45_PRED:
+ vp10_highbd_d45_filter_predictor(dst, stride, bs, above, left, bd);
+ break;
+ case D135_PRED:
+ vp10_highbd_d135_filter_predictor(dst, stride, bs, above, left, bd);
+ break;
+ case D117_PRED:
+ vp10_highbd_d117_filter_predictor(dst, stride, bs, above, left, bd);
+ break;
+ case D153_PRED:
+ vp10_highbd_d153_filter_predictor(dst, stride, bs, above, left, bd);
+ break;
+ case D207_PRED:
+ vp10_highbd_d207_filter_predictor(dst, stride, bs, above, left, bd);
+ break;
+ case D63_PRED:
+ vp10_highbd_d63_filter_predictor(dst, stride, bs, above, left, bd);
+ break;
+ case TM_PRED:
+ vp10_highbd_tm_filter_predictor(dst, stride, bs, above, left, bd);
+ break;
+ default: assert(0);
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void build_intra_predictors_high(
+ const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8,
+ int dst_stride, PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px,
+ int n_topright_px, int n_left_px, int n_bottomleft_px, int plane) {
+ int i;
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ DECLARE_ALIGNED(16, uint16_t, left_col[MAX_SB_SIZE]);
+ DECLARE_ALIGNED(16, uint16_t, above_data[MAX_SB_SIZE + 16]);
+ uint16_t *above_row = above_data + 16;
+ const uint16_t *const_above_row = above_row;
+ const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+ int need_left = extend_modes[mode] & NEED_LEFT;
+ int need_above = extend_modes[mode] & NEED_ABOVE;
+ const uint16_t *above_ref = ref - ref_stride;
+ int base = 128 << (xd->bd - 8);
+// 127 127 127 .. 127 127 127 127 127 127
+// 129 A B .. Y Z
+// 129 C D .. W X
+// 129 E F .. U V
+// 129 G H .. S T T T T T
+
+#if CONFIG_EXT_INTRA
+ const EXT_INTRA_MODE_INFO *ext_intra_mode_info =
+ &xd->mi[0]->mbmi.ext_intra_mode_info;
+ const EXT_INTRA_MODE ext_intra_mode =
+ ext_intra_mode_info->ext_intra_mode[plane != 0];
+ int p_angle = 0;
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ p_angle = mode_to_angle_map[mode] +
+ xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+ if (p_angle <= 90)
+ need_above = 1, need_left = 0;
+ else if (p_angle < 180)
+ need_above = 1, need_left = 1;
+ else
+ need_above = 0, need_left = 1;
+ }
+
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ EXT_INTRA_MODE ext_intra_mode =
+ ext_intra_mode_info->ext_intra_mode[plane != 0];
+ need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
+ need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
+ }
+#endif // CONFIG_EXT_INTRA
+
+ (void)plane;
+ assert(n_top_px >= 0);
+ assert(n_topright_px >= 0);
+ assert(n_left_px >= 0);
+ assert(n_bottomleft_px >= 0);
+
+ if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
+ int i;
+ const int val = (n_left_px == 0) ? base + 1 : base - 1;
+ for (i = 0; i < bs; ++i) {
+ vpx_memset16(dst, val, bs);
+ dst += dst_stride;
+ }
+ return;
+ }
+
+ // NEED_LEFT
+ if (need_left) {
+#if CONFIG_EXT_INTRA
+ int need_bottom;
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ need_bottom = 0;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_bottom = p_angle > 180;
+ } else {
+ need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+ }
+#else
+ const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+#endif // CONFIG_EXT_INTRA
+ i = 0;
+ if (n_left_px > 0) {
+ for (; i < n_left_px; i++) left_col[i] = ref[i * ref_stride - 1];
+ if (need_bottom && n_bottomleft_px > 0) {
+ assert(i == bs);
+ for (; i < bs + n_bottomleft_px; i++)
+ left_col[i] = ref[i * ref_stride - 1];
+ }
+ if (i < (bs << need_bottom))
+ vpx_memset16(&left_col[i], left_col[i - 1], (bs << need_bottom) - i);
+ } else {
+ vpx_memset16(left_col, base + 1, bs << need_bottom);
+ }
+ }
+
+ // NEED_ABOVE
+ if (need_above) {
+#if CONFIG_EXT_INTRA
+ int need_right;
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ need_right = 1;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_right = p_angle < 90;
+ } else {
+ need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+ }
+#else
+ const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+#endif // CONFIG_EXT_INTRA
+ if (n_top_px > 0) {
+ memcpy(above_row, above_ref, n_top_px * 2);
+ i = n_top_px;
+ if (need_right && n_topright_px > 0) {
+ assert(n_top_px == bs);
+ memcpy(above_row + bs, above_ref + bs, n_topright_px * 2);
+ i += n_topright_px;
+ }
+ if (i < (bs << need_right))
+ vpx_memset16(&above_row[i], above_row[i - 1], (bs << need_right) - i);
+ } else {
+ vpx_memset16(above_row, base - 1, bs << need_right);
+ }
+ }
+
+#if CONFIG_EXT_INTRA
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] ||
+ (extend_modes[mode] & NEED_ABOVELEFT) ||
+ (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) {
+ above_row[-1] =
+ n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
+ }
+#else
+ if ((extend_modes[mode] & NEED_ABOVELEFT)) {
+ above_row[-1] =
+ n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : base + 1) : base - 1;
+ }
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTRA
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ highbd_filter_intra_predictors(ext_intra_mode, dst, dst_stride, bs,
+ const_above_row, left_col, xd->bd);
+ return;
+ }
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ INTRA_FILTER filter = INTRA_FILTER_LINEAR;
+ if (plane == 0 && vp10_is_intra_filter_switchable(p_angle))
+ filter = xd->mi[0]->mbmi.intra_filter;
+ highbd_dr_predictor(dst, dst_stride, bs, const_above_row, left_col, p_angle,
+ xd->bd, filter);
+ return;
+ }
+#endif // CONFIG_EXT_INTRA
+
+ // predict
+ if (mode == DC_PRED) {
+ dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
+ dst, dst_stride, const_above_row, left_col, xd->bd);
+ } else {
+ pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col,
+ xd->bd);
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
+ int ref_stride, uint8_t *dst, int dst_stride,
+ PREDICTION_MODE mode, TX_SIZE tx_size,
+ int n_top_px, int n_topright_px,
+ int n_left_px, int n_bottomleft_px,
+ int plane) {
+ int i;
+ DECLARE_ALIGNED(16, uint8_t, left_col[MAX_SB_SIZE]);
+ const uint8_t *above_ref = ref - ref_stride;
+ DECLARE_ALIGNED(16, uint8_t, above_data[MAX_SB_SIZE + 16]);
+ uint8_t *above_row = above_data + 16;
+ const uint8_t *const_above_row = above_row;
+ const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+ int need_left = extend_modes[mode] & NEED_LEFT;
+ int need_above = extend_modes[mode] & NEED_ABOVE;
+#if CONFIG_EXT_INTRA
+ const EXT_INTRA_MODE_INFO *ext_intra_mode_info =
+ &xd->mi[0]->mbmi.ext_intra_mode_info;
+ const EXT_INTRA_MODE ext_intra_mode =
+ ext_intra_mode_info->ext_intra_mode[plane != 0];
+ int p_angle = 0;
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ p_angle = mode_to_angle_map[mode] +
+ xd->mi[0]->mbmi.angle_delta[plane != 0] * ANGLE_STEP;
+ if (p_angle <= 90)
+ need_above = 1, need_left = 0;
+ else if (p_angle < 180)
+ need_above = 1, need_left = 1;
+ else
+ need_above = 0, need_left = 1;
+ }
+
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ EXT_INTRA_MODE ext_intra_mode =
+ ext_intra_mode_info->ext_intra_mode[plane != 0];
+ need_left = ext_intra_extend_modes[ext_intra_mode] & NEED_LEFT;
+ need_above = ext_intra_extend_modes[ext_intra_mode] & NEED_ABOVE;
+ }
+#endif // CONFIG_EXT_INTRA
+
+ // 127 127 127 .. 127 127 127 127 127 127
+ // 129 A B .. Y Z
+ // 129 C D .. W X
+ // 129 E F .. U V
+ // 129 G H .. S T T T T T
+ // ..
+
+ (void)xd;
+ (void)plane;
+ assert(n_top_px >= 0);
+ assert(n_topright_px >= 0);
+ assert(n_left_px >= 0);
+ assert(n_bottomleft_px >= 0);
+
+ if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
+ int i;
+ const int val = (n_left_px == 0) ? 129 : 127;
+ for (i = 0; i < bs; ++i) {
+ memset(dst, val, bs);
+ dst += dst_stride;
+ }
+ return;
+ }
+
+ // NEED_LEFT
+ if (need_left) {
+#if CONFIG_EXT_INTRA
+ int need_bottom;
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ need_bottom = 0;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_bottom = p_angle > 180;
+ } else {
+ need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+ }
+#else
+ const int need_bottom = !!(extend_modes[mode] & NEED_BOTTOMLEFT);
+#endif // CONFIG_EXT_INTRA
+ i = 0;
+ if (n_left_px > 0) {
+ for (; i < n_left_px; i++) left_col[i] = ref[i * ref_stride - 1];
+ if (need_bottom && n_bottomleft_px > 0) {
+ assert(i == bs);
+ for (; i < bs + n_bottomleft_px; i++)
+ left_col[i] = ref[i * ref_stride - 1];
+ }
+ if (i < (bs << need_bottom))
+ memset(&left_col[i], left_col[i - 1], (bs << need_bottom) - i);
+ } else {
+ memset(left_col, 129, bs << need_bottom);
+ }
+ }
+
+ // NEED_ABOVE
+ if (need_above) {
+#if CONFIG_EXT_INTRA
+ int need_right;
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ need_right = 1;
+ } else if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ need_right = p_angle < 90;
+ } else {
+ need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+ }
+#else
+ const int need_right = !!(extend_modes[mode] & NEED_ABOVERIGHT);
+#endif // CONFIG_EXT_INTRA
+ if (n_top_px > 0) {
+ memcpy(above_row, above_ref, n_top_px);
+ i = n_top_px;
+ if (need_right && n_topright_px > 0) {
+ assert(n_top_px == bs);
+ memcpy(above_row + bs, above_ref + bs, n_topright_px);
+ i += n_topright_px;
+ }
+ if (i < (bs << need_right))
+ memset(&above_row[i], above_row[i - 1], (bs << need_right) - i);
+ } else {
+ memset(above_row, 127, bs << need_right);
+ }
+ }
+
+#if CONFIG_EXT_INTRA
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0] ||
+ (extend_modes[mode] & NEED_ABOVELEFT) ||
+ (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8)) {
+ above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
+ }
+#else
+ if ((extend_modes[mode] & NEED_ABOVELEFT)) {
+ above_row[-1] = n_top_px > 0 ? (n_left_px > 0 ? above_ref[-1] : 129) : 127;
+ }
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_EXT_INTRA
+ if (ext_intra_mode_info->use_ext_intra_mode[plane != 0]) {
+ filter_intra_predictors(ext_intra_mode, dst, dst_stride, bs,
+ const_above_row, left_col);
+ return;
+ }
+
+ if (mode != DC_PRED && mode != TM_PRED &&
+ xd->mi[0]->mbmi.sb_type >= BLOCK_8X8) {
+ INTRA_FILTER filter = INTRA_FILTER_LINEAR;
+ if (plane == 0 && vp10_is_intra_filter_switchable(p_angle))
+ filter = xd->mi[0]->mbmi.intra_filter;
+ dr_predictor(dst, dst_stride, tx_size, const_above_row, left_col, p_angle,
+ filter);
+ return;
+ }
+#endif // CONFIG_EXT_INTRA
+
+ // predict
+ if (mode == DC_PRED) {
+ dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride,
+ const_above_row, left_col);
+ } else {
+ pred[mode][tx_size](dst, dst_stride, const_above_row, left_col);
+ }
+}
+
+void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
+ TX_SIZE tx_size, PREDICTION_MODE mode,
+ const uint8_t *ref, int ref_stride, uint8_t *dst,
+ int dst_stride, int col_off, int row_off,
+ int plane) {
+ const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int txw = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int txh = num_4x4_blocks_high_txsize_lookup[tx_size];
+ const int have_top = row_off || xd->up_available;
+ const int have_left = col_off || xd->left_available;
+ const int x = col_off * 4;
+ const int y = row_off * 4;
+ const int bw = pd->subsampling_x ? 1 << bwl_in : VPXMAX(2, 1 << bwl_in);
+ const int bh = pd->subsampling_y ? 1 << bhl_in : VPXMAX(2, 1 << bhl_in);
+ const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
+ const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
+ const int wpx = 4 * bw;
+ const int hpx = 4 * bh;
+ const int txwpx = 4 * txw;
+ const int txhpx = 4 * txh;
+ // Distance between the right edge of this prediction block to
+ // the frame right edge
+ const int xr =
+ (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + (wpx - x - txwpx);
+ // Distance between the bottom edge of this prediction block to
+ // the frame bottom edge
+ const int yd =
+ (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + (hpx - y - txhpx);
+ const int right_available =
+ (mi_col + ((col_off + txw) >> (1 - pd->subsampling_x))) <
+ xd->tile.mi_col_end;
+#if CONFIG_EXT_PARTITION_TYPES
+ const PARTITION_TYPE partition = xd->mi[0]->mbmi.partition;
+#endif
+ const int have_right =
+ vp10_has_right(bsize, mi_row, mi_col, right_available,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ tx_size, row_off, col_off, pd->subsampling_x);
+ const int have_bottom =
+ vp10_has_bottom(bsize, mi_row, mi_col, yd > 0, tx_size, row_off, col_off,
+ pd->subsampling_y);
+
+ if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) {
+ const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int stride = 4 * (1 << bwl_in);
+ int r, c;
+ uint8_t *map = NULL;
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint16_t *palette = xd->mi[0]->mbmi.palette_mode_info.palette_colors +
+ plane * PALETTE_MAX_SIZE;
+#else
+ uint8_t *palette = xd->mi[0]->mbmi.palette_mode_info.palette_colors +
+ plane * PALETTE_MAX_SIZE;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ map = xd->plane[plane != 0].color_index_map;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
+ for (r = 0; r < bs; ++r)
+ for (c = 0; c < bs; ++c)
+ dst16[r * dst_stride + c] = palette[map[(r + y) * stride + c + x]];
+ } else {
+ for (r = 0; r < bs; ++r)
+ for (c = 0; c < bs; ++c)
+ dst[r * dst_stride + c] =
+ (uint8_t)(palette[map[(r + y) * stride + c + x]]);
+ }
+#else
+ for (r = 0; r < bs; ++r)
+ for (c = 0; c < bs; ++c)
+ dst[r * dst_stride + c] = palette[map[(r + y) * stride + c + x]];
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ return;
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ build_intra_predictors_high(
+ xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
+ have_top ? VPXMIN(txwpx, xr + txwpx) : 0,
+ have_top && have_right ? VPXMIN(txwpx, xr) : 0,
+ have_left ? VPXMIN(txhpx, yd + txhpx) : 0,
+ have_bottom && have_left ? VPXMIN(txhpx, yd) : 0, plane);
+ return;
+ }
+#endif
+ build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size,
+ have_top ? VPXMIN(txwpx, xr + txwpx) : 0,
+ have_top && have_right ? VPXMIN(txwpx, xr) : 0,
+ have_left ? VPXMIN(txhpx, yd + txhpx) : 0,
+ have_bottom && have_left ? VPXMIN(txhpx, yd) : 0,
+ plane);
+}
+
+void vp10_init_intra_predictors(void) {
+ once(vp10_init_intra_predictors_internal);
+}
diff --git a/av1/common/reconintra.h b/av1/common/reconintra.h
new file mode 100644
index 0000000..d20b5a4
--- /dev/null
+++ b/av1/common/reconintra.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_RECONINTRA_H_
+#define VP10_COMMON_RECONINTRA_H_
+
+#include "aom/vpx_integer.h"
+#include "av1/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_init_intra_predictors(void);
+
+void vp10_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
+ TX_SIZE tx_size, PREDICTION_MODE mode,
+ const uint8_t *ref, int ref_stride, uint8_t *dst,
+ int dst_stride, int aoff, int loff, int plane);
+#if CONFIG_EXT_INTRA
+int vp10_is_intra_filter_switchable(int angle);
+#endif // CONFIG_EXT_INTRA
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_RECONINTRA_H_
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
new file mode 100644
index 0000000..4d4c9fc
--- /dev/null
+++ b/av1/common/restoration.c
@@ -0,0 +1,642 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/restoration.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+
+#define BILATERAL_PARAM_PRECISION 16
+#define BILATERAL_AMP_RANGE 256
+#define BILATERAL_AMP_RANGE_SYM (2 * BILATERAL_AMP_RANGE + 1)
+
+static uint8_t
+ bilateral_filter_coeffs_r_kf[BILATERAL_LEVELS_KF][BILATERAL_AMP_RANGE_SYM];
+static uint8_t
+ bilateral_filter_coeffs_r[BILATERAL_LEVELS][BILATERAL_AMP_RANGE_SYM];
+static uint8_t bilateral_filter_coeffs_s_kf[BILATERAL_LEVELS_KF]
+ [RESTORATION_WIN][RESTORATION_WIN];
+static uint8_t bilateral_filter_coeffs_s[BILATERAL_LEVELS][RESTORATION_WIN]
+ [RESTORATION_WIN];
+
+typedef struct bilateral_params {
+ int sigma_x; // spatial variance x
+ int sigma_y; // spatial variance y
+ int sigma_r; // range variance
+} BilateralParamsType;
+
+static BilateralParamsType bilateral_level_to_params_arr[BILATERAL_LEVELS] = {
+ // Values are rounded to 1/16 th precision
+ { 8, 9, 30 }, { 9, 8, 30 }, { 9, 11, 32 }, { 11, 9, 32 },
+ { 14, 14, 32 }, { 18, 18, 36 }, { 24, 24, 40 }, { 32, 32, 40 },
+};
+
+static BilateralParamsType
+ bilateral_level_to_params_arr_kf[BILATERAL_LEVELS_KF] = {
+ // Values are rounded to 1/16 th precision
+ { 8, 8, 30 }, { 9, 9, 32 }, { 10, 10, 32 }, { 12, 12, 32 },
+ { 14, 14, 32 }, { 18, 18, 36 }, { 24, 24, 40 }, { 30, 30, 44 },
+ { 36, 36, 48 }, { 42, 42, 48 }, { 48, 48, 48 }, { 48, 48, 56 },
+ { 56, 56, 48 }, { 56, 56, 56 }, { 56, 56, 64 }, { 64, 64, 48 },
+ };
+
+typedef void (*restore_func_type)(uint8_t *data8, int width, int height,
+ int stride, RestorationInternal *rst,
+ uint8_t *tmpdata8, int tmpstride);
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef void (*restore_func_highbd_type)(uint8_t *data8, int width, int height,
+ int stride, RestorationInternal *rst,
+ uint8_t *tmpdata8, int tmpstride,
+ int bit_depth);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static INLINE BilateralParamsType vp10_bilateral_level_to_params(int index,
+ int kf) {
+ return kf ? bilateral_level_to_params_arr_kf[index]
+ : bilateral_level_to_params_arr[index];
+}
+
+typedef struct TileParams {
+ int width;
+ int height;
+} TileParams;
+
+static TileParams restoration_tile_sizes[RESTORATION_TILESIZES] = {
+ { 64, 64 }, { 128, 128 }, { 256, 256 }
+};
+
+void vp10_get_restoration_tile_size(int tilesize, int width, int height,
+ int *tile_width, int *tile_height,
+ int *nhtiles, int *nvtiles) {
+ *tile_width = (tilesize < 0)
+ ? width
+ : VPXMIN(restoration_tile_sizes[tilesize].width, width);
+ *tile_height = (tilesize < 0)
+ ? height
+ : VPXMIN(restoration_tile_sizes[tilesize].height, height);
+ *nhtiles = (width + (*tile_width >> 1)) / *tile_width;
+ *nvtiles = (height + (*tile_height >> 1)) / *tile_height;
+}
+
+int vp10_get_restoration_ntiles(int tilesize, int width, int height) {
+ int nhtiles, nvtiles;
+ int tile_width, tile_height;
+ vp10_get_restoration_tile_size(tilesize, width, height, &tile_width,
+ &tile_height, &nhtiles, &nvtiles);
+ return (nhtiles * nvtiles);
+}
+
+void vp10_loop_restoration_precal() {
+ int i;
+ for (i = 0; i < BILATERAL_LEVELS_KF; i++) {
+ const BilateralParamsType param = vp10_bilateral_level_to_params(i, 1);
+ const int sigma_x = param.sigma_x;
+ const int sigma_y = param.sigma_y;
+ const int sigma_r = param.sigma_r;
+ const double sigma_r_d = (double)sigma_r / BILATERAL_PARAM_PRECISION;
+ const double sigma_x_d = (double)sigma_x / BILATERAL_PARAM_PRECISION;
+ const double sigma_y_d = (double)sigma_y / BILATERAL_PARAM_PRECISION;
+
+ uint8_t *fr = bilateral_filter_coeffs_r_kf[i] + BILATERAL_AMP_RANGE;
+ int j, x, y;
+ for (j = 0; j <= BILATERAL_AMP_RANGE; j++) {
+ fr[j] = (uint8_t)(0.5 +
+ RESTORATION_FILT_STEP *
+ exp(-(j * j) / (2 * sigma_r_d * sigma_r_d)));
+ fr[-j] = fr[j];
+ }
+ for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; y++) {
+ for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; x++) {
+ bilateral_filter_coeffs_s_kf
+ [i][y + RESTORATION_HALFWIN][x + RESTORATION_HALFWIN] =
+ (uint8_t)(0.5 +
+ RESTORATION_FILT_STEP *
+ exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) -
+ (y * y) / (2 * sigma_y_d * sigma_y_d)));
+ }
+ }
+ }
+ for (i = 0; i < BILATERAL_LEVELS; i++) {
+ const BilateralParamsType param = vp10_bilateral_level_to_params(i, 0);
+ const int sigma_x = param.sigma_x;
+ const int sigma_y = param.sigma_y;
+ const int sigma_r = param.sigma_r;
+ const double sigma_r_d = (double)sigma_r / BILATERAL_PARAM_PRECISION;
+ const double sigma_x_d = (double)sigma_x / BILATERAL_PARAM_PRECISION;
+ const double sigma_y_d = (double)sigma_y / BILATERAL_PARAM_PRECISION;
+
+ uint8_t *fr = bilateral_filter_coeffs_r[i] + BILATERAL_AMP_RANGE;
+ int j, x, y;
+ for (j = 0; j <= BILATERAL_AMP_RANGE; j++) {
+ fr[j] = (uint8_t)(0.5 +
+ RESTORATION_FILT_STEP *
+ exp(-(j * j) / (2 * sigma_r_d * sigma_r_d)));
+ fr[-j] = fr[j];
+ }
+ for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; y++) {
+ for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; x++) {
+ bilateral_filter_coeffs_s[i][y +
+ RESTORATION_HALFWIN][x +
+ RESTORATION_HALFWIN] =
+ (uint8_t)(0.5 +
+ RESTORATION_FILT_STEP *
+ exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) -
+ (y * y) / (2 * sigma_y_d * sigma_y_d)));
+ }
+ }
+ }
+}
+
+int vp10_bilateral_level_bits(const VP10_COMMON *const cm) {
+ return cm->frame_type == KEY_FRAME ? BILATERAL_LEVEL_BITS_KF
+ : BILATERAL_LEVEL_BITS;
+}
+
+void vp10_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi,
+ int kf, int width, int height) {
+ int i, tile_idx;
+ rst->restoration_type = rsi->restoration_type;
+ rst->subsampling_x = 0;
+ rst->subsampling_y = 0;
+ if (rsi->restoration_type == RESTORE_BILATERAL) {
+ rst->tilesize_index = BILATERAL_TILESIZE;
+ rst->ntiles =
+ vp10_get_restoration_ntiles(rst->tilesize_index, width, height);
+ vp10_get_restoration_tile_size(rst->tilesize_index, width, height,
+ &rst->tile_width, &rst->tile_height,
+ &rst->nhtiles, &rst->nvtiles);
+ rst->bilateral_level = rsi->bilateral_level;
+ rst->wr_lut = (uint8_t **)malloc(sizeof(*rst->wr_lut) * rst->ntiles);
+ assert(rst->wr_lut != NULL);
+ rst->wx_lut = (uint8_t(**)[RESTORATION_WIN])malloc(sizeof(*rst->wx_lut) *
+ rst->ntiles);
+ assert(rst->wx_lut != NULL);
+ for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+ const int level = rsi->bilateral_level[tile_idx];
+ if (level >= 0) {
+ rst->wr_lut[tile_idx] = kf ? bilateral_filter_coeffs_r_kf[level]
+ : bilateral_filter_coeffs_r[level];
+ rst->wx_lut[tile_idx] = kf ? bilateral_filter_coeffs_s_kf[level]
+ : bilateral_filter_coeffs_s[level];
+ }
+ }
+ } else if (rsi->restoration_type == RESTORE_WIENER) {
+ rst->tilesize_index = WIENER_TILESIZE;
+ rst->ntiles =
+ vp10_get_restoration_ntiles(rst->tilesize_index, width, height);
+ vp10_get_restoration_tile_size(rst->tilesize_index, width, height,
+ &rst->tile_width, &rst->tile_height,
+ &rst->nhtiles, &rst->nvtiles);
+ rst->wiener_level = rsi->wiener_level;
+ rst->vfilter =
+ (int(*)[RESTORATION_WIN])malloc(sizeof(*rst->vfilter) * rst->ntiles);
+ assert(rst->vfilter != NULL);
+ rst->hfilter =
+ (int(*)[RESTORATION_WIN])malloc(sizeof(*rst->hfilter) * rst->ntiles);
+ assert(rst->hfilter != NULL);
+ for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+ rst->vfilter[tile_idx][RESTORATION_HALFWIN] =
+ rst->hfilter[tile_idx][RESTORATION_HALFWIN] = RESTORATION_FILT_STEP;
+ for (i = 0; i < RESTORATION_HALFWIN; ++i) {
+ rst->vfilter[tile_idx][i] =
+ rst->vfilter[tile_idx][RESTORATION_WIN - 1 - i] =
+ rsi->vfilter[tile_idx][i];
+ rst->hfilter[tile_idx][i] =
+ rst->hfilter[tile_idx][RESTORATION_WIN - 1 - i] =
+ rsi->hfilter[tile_idx][i];
+ rst->vfilter[tile_idx][RESTORATION_HALFWIN] -=
+ 2 * rsi->vfilter[tile_idx][i];
+ rst->hfilter[tile_idx][RESTORATION_HALFWIN] -=
+ 2 * rsi->hfilter[tile_idx][i];
+ }
+ }
+ }
+}
+
+static void loop_bilateral_filter(uint8_t *data, int width, int height,
+ int stride, RestorationInternal *rst,
+ uint8_t *tmpdata, int tmpstride) {
+ int i, j, tile_idx, htile_idx, vtile_idx;
+ int h_start, h_end, v_start, v_end;
+ int tile_width, tile_height;
+
+ tile_width = rst->tile_width >> rst->subsampling_x;
+ tile_height = rst->tile_height >> rst->subsampling_y;
+
+ for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+ uint8_t *data_p, *tmpdata_p;
+ const uint8_t *wr_lut_ = rst->wr_lut[tile_idx] + BILATERAL_AMP_RANGE;
+
+ if (rst->bilateral_level[tile_idx] < 0) continue;
+
+ htile_idx = tile_idx % rst->nhtiles;
+ vtile_idx = tile_idx / rst->nhtiles;
+ h_start =
+ htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+ h_end = (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width)
+ : (width - RESTORATION_HALFWIN);
+ v_start =
+ vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+ v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
+ : (height - RESTORATION_HALFWIN);
+
+ data_p = data + h_start + v_start * stride;
+ tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+
+ for (i = 0; i < (v_end - v_start); ++i) {
+ for (j = 0; j < (h_end - h_start); ++j) {
+ int x, y;
+ int flsum = 0, wtsum = 0, wt;
+ uint8_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride;
+ for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) {
+ for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) {
+ wt = (int)rst->wx_lut[tile_idx][y + RESTORATION_HALFWIN]
+ [x + RESTORATION_HALFWIN] *
+ (int)wr_lut_[data_p2[x] - data_p[j]];
+ wtsum += wt;
+ flsum += wt * data_p2[x];
+ }
+ data_p2 += stride;
+ }
+ if (wtsum > 0)
+ tmpdata_p[j] = clip_pixel((int)((flsum + wtsum / 2) / wtsum));
+ else
+ tmpdata_p[j] = data_p[j];
+ }
+ tmpdata_p += tmpstride;
+ data_p += stride;
+ }
+ for (i = v_start; i < v_end; ++i) {
+ memcpy(data + i * stride + h_start, tmpdata + i * tmpstride + h_start,
+ (h_end - h_start) * sizeof(*data));
+ }
+ }
+}
+
+uint8_t hor_sym_filter(uint8_t *d, int *hfilter) {
+ int32_t s =
+ (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * hfilter[RESTORATION_HALFWIN];
+ int i;
+ for (i = 1; i <= RESTORATION_HALFWIN; ++i)
+ s += (d[i] + d[-i]) * hfilter[RESTORATION_HALFWIN + i];
+ return clip_pixel(s >> RESTORATION_FILT_BITS);
+}
+
+uint8_t ver_sym_filter(uint8_t *d, int stride, int *vfilter) {
+ int32_t s =
+ (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * vfilter[RESTORATION_HALFWIN];
+ int i;
+ for (i = 1; i <= RESTORATION_HALFWIN; ++i)
+ s += (d[i * stride] + d[-i * stride]) * vfilter[RESTORATION_HALFWIN + i];
+ return clip_pixel(s >> RESTORATION_FILT_BITS);
+}
+
+static void loop_wiener_filter(uint8_t *data, int width, int height, int stride,
+ RestorationInternal *rst, uint8_t *tmpdata,
+ int tmpstride) {
+ int i, j, tile_idx, htile_idx, vtile_idx;
+ int h_start, h_end, v_start, v_end;
+ int tile_width, tile_height;
+ uint8_t *data_p, *tmpdata_p;
+
+ tile_width = rst->tile_width >> rst->subsampling_x;
+ tile_height = rst->tile_height >> rst->subsampling_y;
+
+ // Initialize tmp buffer
+ data_p = data;
+ tmpdata_p = tmpdata;
+ for (i = 0; i < height; ++i) {
+ memcpy(tmpdata_p, data_p, sizeof(*data_p) * width);
+ data_p += stride;
+ tmpdata_p += tmpstride;
+ }
+
+ // Filter row-wise tile-by-tile
+ for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+ if (rst->wiener_level[tile_idx] == 0) continue;
+ htile_idx = tile_idx % rst->nhtiles;
+ vtile_idx = tile_idx / rst->nhtiles;
+ h_start =
+ htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+ h_end = (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width)
+ : (width - RESTORATION_HALFWIN);
+ v_start = vtile_idx * tile_height;
+ v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
+ : height;
+ data_p = data + h_start + v_start * stride;
+ tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+ for (i = 0; i < (v_end - v_start); ++i) {
+ for (j = 0; j < (h_end - h_start); ++j) {
+ *tmpdata_p++ = hor_sym_filter(data_p++, rst->hfilter[tile_idx]);
+ }
+ data_p += stride - (h_end - h_start);
+ tmpdata_p += tmpstride - (h_end - h_start);
+ }
+ }
+
+ // Filter column-wise tile-by-tile (bands of thickness RESTORATION_HALFWIN
+ // at top and bottom of tiles allow filtering overlap, and are not optimally
+ // filtered)
+ for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+ if (rst->wiener_level[tile_idx] == 0) continue;
+ htile_idx = tile_idx % rst->nhtiles;
+ vtile_idx = tile_idx / rst->nhtiles;
+ h_start = htile_idx * tile_width;
+ h_end =
+ (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width) : width;
+ v_start =
+ vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+ v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
+ : (height - RESTORATION_HALFWIN);
+ data_p = data + h_start + v_start * stride;
+ tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+ for (i = 0; i < (v_end - v_start); ++i) {
+ for (j = 0; j < (h_end - h_start); ++j) {
+ *data_p++ =
+ ver_sym_filter(tmpdata_p++, tmpstride, rst->vfilter[tile_idx]);
+ }
+ data_p += stride - (h_end - h_start);
+ tmpdata_p += tmpstride - (h_end - h_start);
+ }
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void loop_bilateral_filter_highbd(uint8_t *data8, int width, int height,
+ int stride, RestorationInternal *rst,
+ uint8_t *tmpdata8, int tmpstride,
+ int bit_depth) {
+ int i, j, tile_idx, htile_idx, vtile_idx;
+ int h_start, h_end, v_start, v_end;
+ int tile_width, tile_height;
+
+ uint16_t *data = CONVERT_TO_SHORTPTR(data8);
+ uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8);
+
+ tile_width = rst->tile_width >> rst->subsampling_x;
+ tile_height = rst->tile_height >> rst->subsampling_y;
+
+ for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+ uint16_t *data_p, *tmpdata_p;
+ const uint8_t *wr_lut_ = rst->wr_lut[tile_idx] + BILATERAL_AMP_RANGE;
+
+ if (rst->bilateral_level[tile_idx] < 0) continue;
+
+ htile_idx = tile_idx % rst->nhtiles;
+ vtile_idx = tile_idx / rst->nhtiles;
+ h_start =
+ htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+ h_end = (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width)
+ : (width - RESTORATION_HALFWIN);
+ v_start =
+ vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+ v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
+ : (height - RESTORATION_HALFWIN);
+
+ data_p = data + h_start + v_start * stride;
+ tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+
+ for (i = 0; i < (v_end - v_start); ++i) {
+ for (j = 0; j < (h_end - h_start); ++j) {
+ int x, y;
+ int flsum = 0, wtsum = 0, wt;
+ uint16_t *data_p2 = data_p + j - RESTORATION_HALFWIN * stride;
+ for (y = -RESTORATION_HALFWIN; y <= RESTORATION_HALFWIN; ++y) {
+ for (x = -RESTORATION_HALFWIN; x <= RESTORATION_HALFWIN; ++x) {
+ wt = (int)rst->wx_lut[tile_idx][y + RESTORATION_HALFWIN]
+ [x + RESTORATION_HALFWIN] *
+ (int)wr_lut_[data_p2[x] - data_p[j]];
+ wtsum += wt;
+ flsum += wt * data_p2[x];
+ }
+ data_p2 += stride;
+ }
+ if (wtsum > 0)
+ tmpdata_p[j] =
+ clip_pixel_highbd((int)((flsum + wtsum / 2) / wtsum), bit_depth);
+ else
+ tmpdata_p[j] = data_p[j];
+ }
+ tmpdata_p += tmpstride;
+ data_p += stride;
+ }
+ for (i = v_start; i < v_end; ++i) {
+ memcpy(data + i * stride + h_start, tmpdata + i * tmpstride + h_start,
+ (h_end - h_start) * sizeof(*data));
+ }
+ }
+}
+
+uint16_t hor_sym_filter_highbd(uint16_t *d, int *hfilter, int bd) {
+ int32_t s =
+ (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * hfilter[RESTORATION_HALFWIN];
+ int i;
+ for (i = 1; i <= RESTORATION_HALFWIN; ++i)
+ s += (d[i] + d[-i]) * hfilter[RESTORATION_HALFWIN + i];
+ return clip_pixel_highbd(s >> RESTORATION_FILT_BITS, bd);
+}
+
+uint16_t ver_sym_filter_highbd(uint16_t *d, int stride, int *vfilter, int bd) {
+ int32_t s =
+ (1 << (RESTORATION_FILT_BITS - 1)) + d[0] * vfilter[RESTORATION_HALFWIN];
+ int i;
+ for (i = 1; i <= RESTORATION_HALFWIN; ++i)
+ s += (d[i * stride] + d[-i * stride]) * vfilter[RESTORATION_HALFWIN + i];
+ return clip_pixel_highbd(s >> RESTORATION_FILT_BITS, bd);
+}
+
+static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
+ int stride, RestorationInternal *rst,
+ uint8_t *tmpdata8, int tmpstride,
+ int bit_depth) {
+ uint16_t *data = CONVERT_TO_SHORTPTR(data8);
+ uint16_t *tmpdata = CONVERT_TO_SHORTPTR(tmpdata8);
+ int i, j, tile_idx, htile_idx, vtile_idx;
+ int h_start, h_end, v_start, v_end;
+ int tile_width, tile_height;
+ uint16_t *data_p, *tmpdata_p;
+
+ tile_width = rst->tile_width >> rst->subsampling_x;
+ tile_height = rst->tile_height >> rst->subsampling_y;
+
+ // Initialize tmp buffer
+ data_p = data;
+ tmpdata_p = tmpdata;
+ for (i = 0; i < height; ++i) {
+ memcpy(tmpdata_p, data_p, sizeof(*data_p) * width);
+ data_p += stride;
+ tmpdata_p += tmpstride;
+ }
+
+ // Filter row-wise tile-by-tile
+ for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+ if (rst->wiener_level[tile_idx] == 0) continue;
+ htile_idx = tile_idx % rst->nhtiles;
+ vtile_idx = tile_idx / rst->nhtiles;
+ h_start =
+ htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+ h_end = (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width)
+ : (width - RESTORATION_HALFWIN);
+ v_start = vtile_idx * tile_height;
+ v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
+ : height;
+ data_p = data + h_start + v_start * stride;
+ tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+ for (i = 0; i < (v_end - v_start); ++i) {
+ for (j = 0; j < (h_end - h_start); ++j) {
+ *tmpdata_p++ =
+ hor_sym_filter_highbd(data_p++, rst->hfilter[tile_idx], bit_depth);
+ }
+ data_p += stride - (h_end - h_start);
+ tmpdata_p += tmpstride - (h_end - h_start);
+ }
+ }
+
+ // Filter column-wise tile-by-tile (bands of thickness RESTORATION_HALFWIN
+ // at top and bottom of tiles allow filtering overlap, and are not optimally
+ // filtered)
+ for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
+ if (rst->wiener_level[tile_idx] == 0) continue;
+ htile_idx = tile_idx % rst->nhtiles;
+ vtile_idx = tile_idx / rst->nhtiles;
+ h_start = htile_idx * tile_width;
+ h_end =
+ (htile_idx < rst->nhtiles - 1) ? ((htile_idx + 1) * tile_width) : width;
+ v_start =
+ vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+ v_end = (vtile_idx < rst->nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
+ : (height - RESTORATION_HALFWIN);
+ data_p = data + h_start + v_start * stride;
+ tmpdata_p = tmpdata + h_start + v_start * tmpstride;
+ for (i = 0; i < (v_end - v_start); ++i) {
+ for (j = 0; j < (h_end - h_start); ++j) {
+ *data_p++ = ver_sym_filter_highbd(tmpdata_p++, tmpstride,
+ rst->vfilter[tile_idx], bit_depth);
+ }
+ data_p += stride - (h_end - h_start);
+ tmpdata_p += tmpstride - (h_end - h_start);
+ }
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+ int start_mi_row, int end_mi_row, int y_only) {
+ const int ywidth = frame->y_crop_width;
+ const int ystride = frame->y_stride;
+ const int uvwidth = frame->uv_crop_width;
+ const int uvstride = frame->uv_stride;
+ const int ystart = start_mi_row << MI_SIZE_LOG2;
+ const int uvstart = ystart >> cm->subsampling_y;
+ int yend = end_mi_row << MI_SIZE_LOG2;
+ int uvend = yend >> cm->subsampling_y;
+ restore_func_type restore_func =
+ cm->rst_internal.restoration_type == RESTORE_BILATERAL
+ ? loop_bilateral_filter
+ : loop_wiener_filter;
+#if CONFIG_VP9_HIGHBITDEPTH
+ restore_func_highbd_type restore_func_highbd =
+ cm->rst_internal.restoration_type == RESTORE_BILATERAL
+ ? loop_bilateral_filter_highbd
+ : loop_wiener_filter_highbd;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ YV12_BUFFER_CONFIG tmp_buf;
+ memset(&tmp_buf, 0, sizeof(YV12_BUFFER_CONFIG));
+
+ yend = VPXMIN(yend, cm->height);
+ uvend = VPXMIN(uvend, cm->subsampling_y ? (cm->height + 1) >> 1 : cm->height);
+
+ if (vpx_realloc_frame_buffer(
+ &tmp_buf, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VPX_DEC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL) < 0)
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate tmp restoration buffer");
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth)
+ restore_func_highbd(frame->y_buffer + ystart * ystride, ywidth,
+ yend - ystart, ystride, &cm->rst_internal,
+ tmp_buf.y_buffer + ystart * tmp_buf.y_stride,
+ tmp_buf.y_stride, cm->bit_depth);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ restore_func(frame->y_buffer + ystart * ystride, ywidth, yend - ystart,
+ ystride, &cm->rst_internal,
+ tmp_buf.y_buffer + ystart * tmp_buf.y_stride,
+ tmp_buf.y_stride);
+ if (!y_only) {
+ cm->rst_internal.subsampling_x = cm->subsampling_x;
+ cm->rst_internal.subsampling_y = cm->subsampling_y;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ restore_func_highbd(frame->u_buffer + uvstart * uvstride, uvwidth,
+ uvend - uvstart, uvstride, &cm->rst_internal,
+ tmp_buf.u_buffer + uvstart * tmp_buf.uv_stride,
+ tmp_buf.uv_stride, cm->bit_depth);
+ restore_func_highbd(frame->v_buffer + uvstart * uvstride, uvwidth,
+ uvend - uvstart, uvstride, &cm->rst_internal,
+ tmp_buf.v_buffer + uvstart * tmp_buf.uv_stride,
+ tmp_buf.uv_stride, cm->bit_depth);
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ restore_func(frame->u_buffer + uvstart * uvstride, uvwidth,
+ uvend - uvstart, uvstride, &cm->rst_internal,
+ tmp_buf.u_buffer + uvstart * tmp_buf.uv_stride,
+ tmp_buf.uv_stride);
+ restore_func(frame->v_buffer + uvstart * uvstride, uvwidth,
+ uvend - uvstart, uvstride, &cm->rst_internal,
+ tmp_buf.v_buffer + uvstart * tmp_buf.uv_stride,
+ tmp_buf.uv_stride);
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ vpx_free_frame_buffer(&tmp_buf);
+ if (cm->rst_internal.restoration_type == RESTORE_BILATERAL) {
+ free(cm->rst_internal.wr_lut);
+ cm->rst_internal.wr_lut = NULL;
+ free(cm->rst_internal.wx_lut);
+ cm->rst_internal.wx_lut = NULL;
+ }
+ if (cm->rst_internal.restoration_type == RESTORE_WIENER) {
+ free(cm->rst_internal.vfilter);
+ cm->rst_internal.vfilter = NULL;
+ free(cm->rst_internal.hfilter);
+ cm->rst_internal.hfilter = NULL;
+ }
+}
+
+void vp10_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+ RestorationInfo *rsi, int y_only,
+ int partial_frame) {
+ int start_mi_row, end_mi_row, mi_rows_to_filter;
+ if (rsi->restoration_type != RESTORE_NONE) {
+ start_mi_row = 0;
+ mi_rows_to_filter = cm->mi_rows;
+ if (partial_frame && cm->mi_rows > 8) {
+ start_mi_row = cm->mi_rows >> 1;
+ start_mi_row &= 0xfffffff8;
+ mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
+ }
+ end_mi_row = start_mi_row + mi_rows_to_filter;
+ vp10_loop_restoration_init(&cm->rst_internal, rsi,
+ cm->frame_type == KEY_FRAME, cm->width,
+ cm->height);
+ vp10_loop_restoration_rows(frame, cm, start_mi_row, end_mi_row, y_only);
+ }
+}
diff --git a/av1/common/restoration.h b/av1/common/restoration.h
new file mode 100644
index 0000000..c1e937a
--- /dev/null
+++ b/av1/common/restoration.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_RESTORATION_H_
+#define VP10_COMMON_RESTORATION_H_
+
+#include "aom_ports/mem.h"
+#include "./vpx_config.h"
+
+#include "av1/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BILATERAL_LEVEL_BITS_KF 4
+#define BILATERAL_LEVELS_KF (1 << BILATERAL_LEVEL_BITS_KF)
+#define BILATERAL_LEVEL_BITS 3
+#define BILATERAL_LEVELS (1 << BILATERAL_LEVEL_BITS)
+// #define DEF_BILATERAL_LEVEL 2
+
+#define RESTORATION_TILESIZES 3
+#define BILATERAL_TILESIZE 0
+#define WIENER_TILESIZE 2
+
+#define RESTORATION_HALFWIN 3
+#define RESTORATION_HALFWIN1 (RESTORATION_HALFWIN + 1)
+#define RESTORATION_WIN (2 * RESTORATION_HALFWIN + 1)
+#define RESTORATION_WIN2 ((RESTORATION_WIN) * (RESTORATION_WIN))
+
+#define RESTORATION_FILT_BITS 7
+#define RESTORATION_FILT_STEP (1 << RESTORATION_FILT_BITS)
+
+#define WIENER_FILT_TAP0_MINV (-5)
+#define WIENER_FILT_TAP1_MINV (-23)
+#define WIENER_FILT_TAP2_MINV (-16)
+
+#define WIENER_FILT_TAP0_BITS 4
+#define WIENER_FILT_TAP1_BITS 5
+#define WIENER_FILT_TAP2_BITS 6
+
+#define WIENER_FILT_BITS \
+ ((WIENER_FILT_TAP0_BITS + WIENER_FILT_TAP1_BITS + WIENER_FILT_TAP2_BITS) * 2)
+
+#define WIENER_FILT_TAP0_MAXV \
+ (WIENER_FILT_TAP0_MINV - 1 + (1 << WIENER_FILT_TAP0_BITS))
+#define WIENER_FILT_TAP1_MAXV \
+ (WIENER_FILT_TAP1_MINV - 1 + (1 << WIENER_FILT_TAP1_BITS))
+#define WIENER_FILT_TAP2_MAXV \
+ (WIENER_FILT_TAP2_MINV - 1 + (1 << WIENER_FILT_TAP2_BITS))
+
+typedef enum {
+ RESTORE_NONE,
+ RESTORE_BILATERAL,
+ RESTORE_WIENER,
+} RestorationType;
+
+typedef struct {
+ RestorationType restoration_type;
+ // Bilateral filter
+ int *bilateral_level;
+ // Wiener filter
+ int *wiener_level;
+ int (*vfilter)[RESTORATION_HALFWIN], (*hfilter)[RESTORATION_HALFWIN];
+} RestorationInfo;
+
+typedef struct {
+ RestorationType restoration_type;
+ int subsampling_x;
+ int subsampling_y;
+ int tilesize_index;
+ int ntiles;
+ int tile_width, tile_height;
+ int nhtiles, nvtiles;
+ // Bilateral filter
+ int *bilateral_level;
+ uint8_t (**wx_lut)[RESTORATION_WIN];
+ uint8_t **wr_lut;
+ // Wiener filter
+ int *wiener_level;
+ int (*vfilter)[RESTORATION_WIN], (*hfilter)[RESTORATION_WIN];
+} RestorationInternal;
+
+int vp10_bilateral_level_bits(const struct VP10Common *const cm);
+int vp10_get_restoration_ntiles(int tilesize, int width, int height);
+void vp10_get_restoration_tile_size(int tilesize, int width, int height,
+ int *tile_width, int *tile_height,
+ int *nhtiles, int *nvtiles);
+void vp10_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi,
+ int kf, int width, int height);
+void vp10_loop_restoration_frame(YV12_BUFFER_CONFIG *frame,
+ struct VP10Common *cm, RestorationInfo *rsi,
+ int y_only, int partial_frame);
+void vp10_loop_restoration_rows(YV12_BUFFER_CONFIG *frame,
+ struct VP10Common *cm, int start_mi_row,
+ int end_mi_row, int y_only);
+void vp10_loop_restoration_precal();
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_RESTORATION_H_
diff --git a/av1/common/scale.c b/av1/common/scale.c
new file mode 100644
index 0000000..6bd3b74
--- /dev/null
+++ b/av1/common/scale.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_dsp_rtcd.h"
+#include "av1/common/filter.h"
+#include "av1/common/scale.h"
+#include "aom_dsp/vpx_filter.h"
+
+static INLINE int scaled_x(int val, const struct scale_factors *sf) {
+ return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT);
+}
+
+static INLINE int scaled_y(int val, const struct scale_factors *sf) {
+ return (int)((int64_t)val * sf->y_scale_fp >> REF_SCALE_SHIFT);
+}
+
+static int unscaled_value(int val, const struct scale_factors *sf) {
+ (void)sf;
+ return val;
+}
+
+static int get_fixed_point_scale_factor(int other_size, int this_size) {
+ // Calculate scaling factor once for each reference frame
+ // and use fixed point scaling factors in decoding and encoding routines.
+ // Hardware implementations can calculate scale factor in device driver
+ // and use multiplication and shifting on hardware instead of division.
+ return (other_size << REF_SCALE_SHIFT) / this_size;
+}
+
+MV32 vp10_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) {
+ const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf) & SUBPEL_MASK;
+ const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf) & SUBPEL_MASK;
+ const MV32 res = { scaled_y(mv->row, sf) + y_off_q4,
+ scaled_x(mv->col, sf) + x_off_q4 };
+ return res;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
+ int other_h, int this_w, int this_h,
+ int use_highbd) {
+#else
+void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
+ int other_h, int this_w, int this_h) {
+#endif
+ if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
+ sf->x_scale_fp = REF_INVALID_SCALE;
+ sf->y_scale_fp = REF_INVALID_SCALE;
+ return;
+ }
+
+ sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w);
+ sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h);
+ sf->x_step_q4 = scaled_x(16, sf);
+ sf->y_step_q4 = scaled_y(16, sf);
+
+ if (vp10_is_scaled(sf)) {
+ sf->scale_value_x = scaled_x;
+ sf->scale_value_y = scaled_y;
+ } else {
+ sf->scale_value_x = unscaled_value;
+ sf->scale_value_y = unscaled_value;
+ }
+
+// TODO(agrange): Investigate the best choice of functions to use here
+// for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what
+// to do at full-pel offsets. The current selection, where the filter is
+// applied in one direction only, and not at all for 0,0, seems to give the
+// best quality, but it may be worth trying an additional mode that does
+// do the filtering on full-pel.
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ sf->predict_ni[0][0][0] = vpx_convolve8_c;
+ sf->predict_ni[0][0][1] = vpx_convolve8_avg_c;
+ sf->predict_ni[0][1][0] = vpx_convolve8_c;
+ sf->predict_ni[0][1][1] = vpx_convolve8_avg_c;
+ sf->predict_ni[1][0][0] = vpx_convolve8_c;
+ sf->predict_ni[1][0][1] = vpx_convolve8_avg_c;
+ sf->predict_ni[1][1][0] = vpx_convolve8;
+ sf->predict_ni[1][1][1] = vpx_convolve8_avg;
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ if (sf->x_step_q4 == 16) {
+ if (sf->y_step_q4 == 16) {
+ // No scaling in either direction.
+ sf->predict[0][0][0] = vpx_convolve_copy;
+ sf->predict[0][0][1] = vpx_convolve_avg;
+ sf->predict[0][1][0] = vpx_convolve8_vert;
+ sf->predict[0][1][1] = vpx_convolve8_avg_vert;
+ sf->predict[1][0][0] = vpx_convolve8_horiz;
+ sf->predict[1][0][1] = vpx_convolve8_avg_horiz;
+ } else {
+ // No scaling in x direction. Must always scale in the y direction.
+ sf->predict[0][0][0] = vpx_convolve8_vert;
+ sf->predict[0][0][1] = vpx_convolve8_avg_vert;
+ sf->predict[0][1][0] = vpx_convolve8_vert;
+ sf->predict[0][1][1] = vpx_convolve8_avg_vert;
+ sf->predict[1][0][0] = vpx_convolve8;
+ sf->predict[1][0][1] = vpx_convolve8_avg;
+ }
+ } else {
+ if (sf->y_step_q4 == 16) {
+ // No scaling in the y direction. Must always scale in the x direction.
+ sf->predict[0][0][0] = vpx_convolve8_horiz;
+ sf->predict[0][0][1] = vpx_convolve8_avg_horiz;
+ sf->predict[0][1][0] = vpx_convolve8;
+ sf->predict[0][1][1] = vpx_convolve8_avg;
+ sf->predict[1][0][0] = vpx_convolve8_horiz;
+ sf->predict[1][0][1] = vpx_convolve8_avg_horiz;
+ } else {
+ // Must always scale in both directions.
+ sf->predict[0][0][0] = vpx_convolve8;
+ sf->predict[0][0][1] = vpx_convolve8_avg;
+ sf->predict[0][1][0] = vpx_convolve8;
+ sf->predict[0][1][1] = vpx_convolve8_avg;
+ sf->predict[1][0][0] = vpx_convolve8;
+ sf->predict[1][0][1] = vpx_convolve8_avg;
+ }
+ }
+ // 2D subpel motion always gets filtered in both directions
+ sf->predict[1][1][0] = vpx_convolve8;
+ sf->predict[1][1][1] = vpx_convolve8_avg;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (use_highbd) {
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ sf->highbd_predict_ni[0][0][0] = vpx_highbd_convolve8_c;
+ sf->highbd_predict_ni[0][0][1] = vpx_highbd_convolve8_avg_c;
+ sf->highbd_predict_ni[0][1][0] = vpx_highbd_convolve8_c;
+ sf->highbd_predict_ni[0][1][1] = vpx_highbd_convolve8_avg_c;
+ sf->highbd_predict_ni[1][0][0] = vpx_highbd_convolve8_c;
+ sf->highbd_predict_ni[1][0][1] = vpx_highbd_convolve8_avg_c;
+ sf->highbd_predict_ni[1][1][0] = vpx_highbd_convolve8;
+ sf->highbd_predict_ni[1][1][1] = vpx_highbd_convolve8_avg;
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ if (sf->x_step_q4 == 16) {
+ if (sf->y_step_q4 == 16) {
+ // No scaling in either direction.
+ sf->highbd_predict[0][0][0] = vpx_highbd_convolve_copy;
+ sf->highbd_predict[0][0][1] = vpx_highbd_convolve_avg;
+ sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert;
+ sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert;
+ sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz;
+ sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz;
+ } else {
+ // No scaling in x direction. Must always scale in the y direction.
+ sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_vert;
+ sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_vert;
+ sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert;
+ sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert;
+ sf->highbd_predict[1][0][0] = vpx_highbd_convolve8;
+ sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg;
+ }
+ } else {
+ if (sf->y_step_q4 == 16) {
+ // No scaling in the y direction. Must always scale in the x direction.
+ sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_horiz;
+ sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_horiz;
+ sf->highbd_predict[0][1][0] = vpx_highbd_convolve8;
+ sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg;
+ sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz;
+ sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz;
+ } else {
+ // Must always scale in both directions.
+ sf->highbd_predict[0][0][0] = vpx_highbd_convolve8;
+ sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg;
+ sf->highbd_predict[0][1][0] = vpx_highbd_convolve8;
+ sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg;
+ sf->highbd_predict[1][0][0] = vpx_highbd_convolve8;
+ sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg;
+ }
+ }
+ // 2D subpel motion always gets filtered in both directions.
+ sf->highbd_predict[1][1][0] = vpx_highbd_convolve8;
+ sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+}
diff --git a/av1/common/scale.h b/av1/common/scale.h
new file mode 100644
index 0000000..bb02601
--- /dev/null
+++ b/av1/common/scale.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_SCALE_H_
+#define VP10_COMMON_SCALE_H_
+
+#include "av1/common/mv.h"
+#include "aom_dsp/vpx_convolve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define REF_SCALE_SHIFT 14
+#define REF_NO_SCALE (1 << REF_SCALE_SHIFT)
+#define REF_INVALID_SCALE -1
+
+struct scale_factors {
+ int x_scale_fp; // horizontal fixed point scale factor
+ int y_scale_fp; // vertical fixed point scale factor
+ int x_step_q4;
+ int y_step_q4;
+
+ int (*scale_value_x)(int val, const struct scale_factors *sf);
+ int (*scale_value_y)(int val, const struct scale_factors *sf);
+
+ convolve_fn_t predict[2][2][2]; // horiz, vert, avg
+#if CONFIG_VP9_HIGHBITDEPTH
+ highbd_convolve_fn_t highbd_predict[2][2][2]; // horiz, vert, avg
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+// Functions for non-interpolating filters (those that filter zero offsets)
+#if CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+ convolve_fn_t predict_ni[2][2][2]; // horiz, vert, avg
+#if CONFIG_VP9_HIGHBITDEPTH
+ highbd_convolve_fn_t highbd_predict_ni[2][2][2]; // horiz, vert, avg
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_INTERP && SUPPORT_NONINTERPOLATING_FILTERS
+};
+
+MV32 vp10_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
+ int other_h, int this_w, int this_h,
+ int use_high);
+#else
+void vp10_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
+ int other_h, int this_w, int this_h);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static INLINE int vp10_is_valid_scale(const struct scale_factors *sf) {
+ return sf->x_scale_fp != REF_INVALID_SCALE &&
+ sf->y_scale_fp != REF_INVALID_SCALE;
+}
+
+static INLINE int vp10_is_scaled(const struct scale_factors *sf) {
+ return vp10_is_valid_scale(sf) &&
+ (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE);
+}
+
+static INLINE int valid_ref_frame_size(int ref_width, int ref_height,
+ int this_width, int this_height) {
+ return 2 * this_width >= ref_width && 2 * this_height >= ref_height &&
+ this_width <= 16 * ref_width && this_height <= 16 * ref_height;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_SCALE_H_
diff --git a/av1/common/scan.c b/av1/common/scan.c
new file mode 100644
index 0000000..dbc36eb
--- /dev/null
+++ b/av1/common/scan.c
@@ -0,0 +1,4169 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/scan.h"
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = {
+ 0, 4, 1, 5, 8, 2, 12, 9, 3, 6, 13, 10, 7, 14, 11, 15,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x4[16]) = {
+ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x4[16]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, col_scan_4x4[16]) = {
+ 0, 4, 8, 1, 12, 5, 9, 2, 13, 6, 10, 3, 7, 14, 11, 15,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4[16]) = {
+ 0, 1, 4, 2, 5, 3, 6, 8, 9, 7, 12, 10, 13, 11, 14, 15,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, default_scan_4x8[32]) = {
+ 0, 1, 4, 5, 2, 8, 6, 9, 10, 3, 12, 7, 13, 11, 14, 16,
+ 17, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x8[32]) = {
+ 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29,
+ 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_4x8[32]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_8x4[32]) = {
+ 0, 1, 8, 9, 2, 16, 10, 17, 18, 3, 24, 11, 25, 19, 26, 4,
+ 12, 27, 20, 5, 28, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x4[32]) = {
+ 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27,
+ 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x4[32]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = {
+ 0, 8, 1, 16, 9, 2, 17, 24, 10, 3, 18, 25, 32, 11, 4, 26,
+ 33, 19, 40, 12, 34, 27, 5, 41, 20, 48, 13, 35, 42, 28, 21, 6,
+ 49, 56, 36, 43, 29, 7, 14, 50, 57, 44, 22, 37, 15, 51, 58, 30,
+ 45, 23, 52, 59, 38, 31, 60, 53, 46, 39, 61, 54, 47, 62, 55, 63,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x8[64]) = {
+ 0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57,
+ 2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59,
+ 4, 12, 20, 28, 36, 44, 52, 60, 5, 13, 21, 29, 37, 45, 53, 61,
+ 6, 14, 22, 30, 38, 46, 54, 62, 7, 15, 23, 31, 39, 47, 55, 63,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x8[64]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, col_scan_8x8[64]) = {
+ 0, 8, 16, 1, 24, 9, 32, 17, 2, 40, 25, 10, 33, 18, 48, 3,
+ 26, 41, 11, 56, 19, 34, 4, 49, 27, 42, 12, 35, 20, 57, 50, 28,
+ 5, 43, 13, 36, 58, 51, 21, 44, 6, 29, 59, 37, 14, 52, 22, 7,
+ 45, 60, 30, 15, 38, 53, 23, 46, 31, 61, 39, 54, 47, 62, 55, 63,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8[64]) = {
+ 0, 1, 2, 8, 9, 3, 16, 10, 4, 17, 11, 24, 5, 18, 25, 12,
+ 19, 26, 32, 6, 13, 20, 33, 27, 7, 34, 40, 21, 28, 41, 14, 35,
+ 48, 42, 29, 36, 49, 22, 43, 15, 56, 37, 50, 44, 30, 57, 23, 51,
+ 58, 45, 38, 52, 31, 59, 53, 46, 60, 39, 61, 47, 54, 55, 62, 63,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, default_scan_8x16[128]) = {
+ 0, 1, 8, 2, 9, 16, 3, 10, 17, 24, 4, 11, 18, 25, 32,
+ 5, 12, 19, 26, 33, 40, 6, 13, 20, 27, 34, 41, 48, 7, 14,
+ 21, 28, 35, 42, 49, 56, 15, 22, 29, 36, 43, 50, 57, 64, 23,
+ 30, 37, 44, 51, 58, 65, 72, 31, 38, 45, 52, 59, 66, 73, 80,
+ 39, 46, 53, 60, 67, 74, 81, 88, 47, 54, 61, 68, 75, 82, 89,
+ 96, 55, 62, 69, 76, 83, 90, 97, 104, 63, 70, 77, 84, 91, 98,
+ 105, 112, 71, 78, 85, 92, 99, 106, 113, 120, 79, 86, 93, 100, 107,
+ 114, 121, 87, 94, 101, 108, 115, 122, 95, 102, 109, 116, 123, 103, 110,
+ 117, 124, 111, 118, 125, 119, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_16x8[128]) = {
+ 0, 1, 16, 2, 17, 32, 3, 18, 33, 48, 4, 19, 34, 49, 64, 5,
+ 20, 35, 50, 65, 80, 6, 21, 36, 51, 66, 81, 96, 7, 22, 37, 52,
+ 67, 82, 97, 112, 8, 23, 38, 53, 68, 83, 98, 113, 9, 24, 39, 54,
+ 69, 84, 99, 114, 10, 25, 40, 55, 70, 85, 100, 115, 11, 26, 41, 56,
+ 71, 86, 101, 116, 12, 27, 42, 57, 72, 87, 102, 117, 13, 28, 43, 58,
+ 73, 88, 103, 118, 14, 29, 44, 59, 74, 89, 104, 119, 15, 30, 45, 60,
+ 75, 90, 105, 120, 31, 46, 61, 76, 91, 106, 121, 47, 62, 77, 92, 107,
+ 122, 63, 78, 93, 108, 123, 79, 94, 109, 124, 95, 110, 125, 111, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x16[128]) = {
+ 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120,
+ 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 105, 113, 121,
+ 2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122,
+ 3, 11, 19, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99, 107, 115, 123,
+ 4, 12, 20, 28, 36, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124,
+ 5, 13, 21, 29, 37, 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125,
+ 6, 14, 22, 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126,
+ 7, 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x8[128]) = {
+ 0, 16, 32, 48, 64, 80, 96, 112, 1, 17, 33, 49, 65, 81, 97, 113,
+ 2, 18, 34, 50, 66, 82, 98, 114, 3, 19, 35, 51, 67, 83, 99, 115,
+ 4, 20, 36, 52, 68, 84, 100, 116, 5, 21, 37, 53, 69, 85, 101, 117,
+ 6, 22, 38, 54, 70, 86, 102, 118, 7, 23, 39, 55, 71, 87, 103, 119,
+ 8, 24, 40, 56, 72, 88, 104, 120, 9, 25, 41, 57, 73, 89, 105, 121,
+ 10, 26, 42, 58, 74, 90, 106, 122, 11, 27, 43, 59, 75, 91, 107, 123,
+ 12, 28, 44, 60, 76, 92, 108, 124, 13, 29, 45, 61, 77, 93, 109, 125,
+ 14, 30, 46, 62, 78, 94, 110, 126, 15, 31, 47, 63, 79, 95, 111, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_8x16[128]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x8[128]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_16x32[512]) = {
+ 0, 1, 16, 2, 17, 32, 3, 18, 33, 48, 4, 19, 34, 49, 64,
+ 5, 20, 35, 50, 65, 80, 6, 21, 36, 51, 66, 81, 96, 7, 22,
+ 37, 52, 67, 82, 97, 112, 8, 23, 38, 53, 68, 83, 98, 113, 128,
+ 9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 10, 25, 40, 55, 70,
+ 85, 100, 115, 130, 145, 160, 11, 26, 41, 56, 71, 86, 101, 116, 131,
+ 146, 161, 176, 12, 27, 42, 57, 72, 87, 102, 117, 132, 147, 162, 177,
+ 192, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208,
+ 14, 29, 44, 59, 74, 89, 104, 119, 134, 149, 164, 179, 194, 209, 224,
+ 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225,
+ 240, 31, 46, 61, 76, 91, 106, 121, 136, 151, 166, 181, 196, 211, 226,
+ 241, 256, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227,
+ 242, 257, 272, 63, 78, 93, 108, 123, 138, 153, 168, 183, 198, 213, 228,
+ 243, 258, 273, 288, 79, 94, 109, 124, 139, 154, 169, 184, 199, 214, 229,
+ 244, 259, 274, 289, 304, 95, 110, 125, 140, 155, 170, 185, 200, 215, 230,
+ 245, 260, 275, 290, 305, 320, 111, 126, 141, 156, 171, 186, 201, 216, 231,
+ 246, 261, 276, 291, 306, 321, 336, 127, 142, 157, 172, 187, 202, 217, 232,
+ 247, 262, 277, 292, 307, 322, 337, 352, 143, 158, 173, 188, 203, 218, 233,
+ 248, 263, 278, 293, 308, 323, 338, 353, 368, 159, 174, 189, 204, 219, 234,
+ 249, 264, 279, 294, 309, 324, 339, 354, 369, 384, 175, 190, 205, 220, 235,
+ 250, 265, 280, 295, 310, 325, 340, 355, 370, 385, 400, 191, 206, 221, 236,
+ 251, 266, 281, 296, 311, 326, 341, 356, 371, 386, 401, 416, 207, 222, 237,
+ 252, 267, 282, 297, 312, 327, 342, 357, 372, 387, 402, 417, 432, 223, 238,
+ 253, 268, 283, 298, 313, 328, 343, 358, 373, 388, 403, 418, 433, 448, 239,
+ 254, 269, 284, 299, 314, 329, 344, 359, 374, 389, 404, 419, 434, 449, 464,
+ 255, 270, 285, 300, 315, 330, 345, 360, 375, 390, 405, 420, 435, 450, 465,
+ 480, 271, 286, 301, 316, 331, 346, 361, 376, 391, 406, 421, 436, 451, 466,
+ 481, 496, 287, 302, 317, 332, 347, 362, 377, 392, 407, 422, 437, 452, 467,
+ 482, 497, 303, 318, 333, 348, 363, 378, 393, 408, 423, 438, 453, 468, 483,
+ 498, 319, 334, 349, 364, 379, 394, 409, 424, 439, 454, 469, 484, 499, 335,
+ 350, 365, 380, 395, 410, 425, 440, 455, 470, 485, 500, 351, 366, 381, 396,
+ 411, 426, 441, 456, 471, 486, 501, 367, 382, 397, 412, 427, 442, 457, 472,
+ 487, 502, 383, 398, 413, 428, 443, 458, 473, 488, 503, 399, 414, 429, 444,
+ 459, 474, 489, 504, 415, 430, 445, 460, 475, 490, 505, 431, 446, 461, 476,
+ 491, 506, 447, 462, 477, 492, 507, 463, 478, 493, 508, 479, 494, 509, 495,
+ 510, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_32x16[512]) = {
+ 0, 1, 32, 2, 33, 64, 3, 34, 65, 96, 4, 35, 66, 97, 128,
+ 5, 36, 67, 98, 129, 160, 6, 37, 68, 99, 130, 161, 192, 7, 38,
+ 69, 100, 131, 162, 193, 224, 8, 39, 70, 101, 132, 163, 194, 225, 256,
+ 9, 40, 71, 102, 133, 164, 195, 226, 257, 288, 10, 41, 72, 103, 134,
+ 165, 196, 227, 258, 289, 320, 11, 42, 73, 104, 135, 166, 197, 228, 259,
+ 290, 321, 352, 12, 43, 74, 105, 136, 167, 198, 229, 260, 291, 322, 353,
+ 384, 13, 44, 75, 106, 137, 168, 199, 230, 261, 292, 323, 354, 385, 416,
+ 14, 45, 76, 107, 138, 169, 200, 231, 262, 293, 324, 355, 386, 417, 448,
+ 15, 46, 77, 108, 139, 170, 201, 232, 263, 294, 325, 356, 387, 418, 449,
+ 480, 16, 47, 78, 109, 140, 171, 202, 233, 264, 295, 326, 357, 388, 419,
+ 450, 481, 17, 48, 79, 110, 141, 172, 203, 234, 265, 296, 327, 358, 389,
+ 420, 451, 482, 18, 49, 80, 111, 142, 173, 204, 235, 266, 297, 328, 359,
+ 390, 421, 452, 483, 19, 50, 81, 112, 143, 174, 205, 236, 267, 298, 329,
+ 360, 391, 422, 453, 484, 20, 51, 82, 113, 144, 175, 206, 237, 268, 299,
+ 330, 361, 392, 423, 454, 485, 21, 52, 83, 114, 145, 176, 207, 238, 269,
+ 300, 331, 362, 393, 424, 455, 486, 22, 53, 84, 115, 146, 177, 208, 239,
+ 270, 301, 332, 363, 394, 425, 456, 487, 23, 54, 85, 116, 147, 178, 209,
+ 240, 271, 302, 333, 364, 395, 426, 457, 488, 24, 55, 86, 117, 148, 179,
+ 210, 241, 272, 303, 334, 365, 396, 427, 458, 489, 25, 56, 87, 118, 149,
+ 180, 211, 242, 273, 304, 335, 366, 397, 428, 459, 490, 26, 57, 88, 119,
+ 150, 181, 212, 243, 274, 305, 336, 367, 398, 429, 460, 491, 27, 58, 89,
+ 120, 151, 182, 213, 244, 275, 306, 337, 368, 399, 430, 461, 492, 28, 59,
+ 90, 121, 152, 183, 214, 245, 276, 307, 338, 369, 400, 431, 462, 493, 29,
+ 60, 91, 122, 153, 184, 215, 246, 277, 308, 339, 370, 401, 432, 463, 494,
+ 30, 61, 92, 123, 154, 185, 216, 247, 278, 309, 340, 371, 402, 433, 464,
+ 495, 31, 62, 93, 124, 155, 186, 217, 248, 279, 310, 341, 372, 403, 434,
+ 465, 496, 63, 94, 125, 156, 187, 218, 249, 280, 311, 342, 373, 404, 435,
+ 466, 497, 95, 126, 157, 188, 219, 250, 281, 312, 343, 374, 405, 436, 467,
+ 498, 127, 158, 189, 220, 251, 282, 313, 344, 375, 406, 437, 468, 499, 159,
+ 190, 221, 252, 283, 314, 345, 376, 407, 438, 469, 500, 191, 222, 253, 284,
+ 315, 346, 377, 408, 439, 470, 501, 223, 254, 285, 316, 347, 378, 409, 440,
+ 471, 502, 255, 286, 317, 348, 379, 410, 441, 472, 503, 287, 318, 349, 380,
+ 411, 442, 473, 504, 319, 350, 381, 412, 443, 474, 505, 351, 382, 413, 444,
+ 475, 506, 383, 414, 445, 476, 507, 415, 446, 477, 508, 447, 478, 509, 479,
+ 510, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x32[512]) = {
+ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224,
+ 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464,
+ 480, 496, 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193,
+ 209, 225, 241, 257, 273, 289, 305, 321, 337, 353, 369, 385, 401, 417, 433,
+ 449, 465, 481, 497, 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162,
+ 178, 194, 210, 226, 242, 258, 274, 290, 306, 322, 338, 354, 370, 386, 402,
+ 418, 434, 450, 466, 482, 498, 3, 19, 35, 51, 67, 83, 99, 115, 131,
+ 147, 163, 179, 195, 211, 227, 243, 259, 275, 291, 307, 323, 339, 355, 371,
+ 387, 403, 419, 435, 451, 467, 483, 499, 4, 20, 36, 52, 68, 84, 100,
+ 116, 132, 148, 164, 180, 196, 212, 228, 244, 260, 276, 292, 308, 324, 340,
+ 356, 372, 388, 404, 420, 436, 452, 468, 484, 500, 5, 21, 37, 53, 69,
+ 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, 261, 277, 293, 309,
+ 325, 341, 357, 373, 389, 405, 421, 437, 453, 469, 485, 501, 6, 22, 38,
+ 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, 262, 278,
+ 294, 310, 326, 342, 358, 374, 390, 406, 422, 438, 454, 470, 486, 502, 7,
+ 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
+ 263, 279, 295, 311, 327, 343, 359, 375, 391, 407, 423, 439, 455, 471, 487,
+ 503, 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216,
+ 232, 248, 264, 280, 296, 312, 328, 344, 360, 376, 392, 408, 424, 440, 456,
+ 472, 488, 504, 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185,
+ 201, 217, 233, 249, 265, 281, 297, 313, 329, 345, 361, 377, 393, 409, 425,
+ 441, 457, 473, 489, 505, 10, 26, 42, 58, 74, 90, 106, 122, 138, 154,
+ 170, 186, 202, 218, 234, 250, 266, 282, 298, 314, 330, 346, 362, 378, 394,
+ 410, 426, 442, 458, 474, 490, 506, 11, 27, 43, 59, 75, 91, 107, 123,
+ 139, 155, 171, 187, 203, 219, 235, 251, 267, 283, 299, 315, 331, 347, 363,
+ 379, 395, 411, 427, 443, 459, 475, 491, 507, 12, 28, 44, 60, 76, 92,
+ 108, 124, 140, 156, 172, 188, 204, 220, 236, 252, 268, 284, 300, 316, 332,
+ 348, 364, 380, 396, 412, 428, 444, 460, 476, 492, 508, 13, 29, 45, 61,
+ 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253, 269, 285, 301,
+ 317, 333, 349, 365, 381, 397, 413, 429, 445, 461, 477, 493, 509, 14, 30,
+ 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254, 270,
+ 286, 302, 318, 334, 350, 366, 382, 398, 414, 430, 446, 462, 478, 494, 510,
+ 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239,
+ 255, 271, 287, 303, 319, 335, 351, 367, 383, 399, 415, 431, 447, 463, 479,
+ 495, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x16[512]) = {
+ 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480,
+ 1, 33, 65, 97, 129, 161, 193, 225, 257, 289, 321, 353, 385, 417, 449, 481,
+ 2, 34, 66, 98, 130, 162, 194, 226, 258, 290, 322, 354, 386, 418, 450, 482,
+ 3, 35, 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483,
+ 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, 388, 420, 452, 484,
+ 5, 37, 69, 101, 133, 165, 197, 229, 261, 293, 325, 357, 389, 421, 453, 485,
+ 6, 38, 70, 102, 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486,
+ 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, 455, 487,
+ 8, 40, 72, 104, 136, 168, 200, 232, 264, 296, 328, 360, 392, 424, 456, 488,
+ 9, 41, 73, 105, 137, 169, 201, 233, 265, 297, 329, 361, 393, 425, 457, 489,
+ 10, 42, 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490,
+ 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, 395, 427, 459, 491,
+ 12, 44, 76, 108, 140, 172, 204, 236, 268, 300, 332, 364, 396, 428, 460, 492,
+ 13, 45, 77, 109, 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493,
+ 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430, 462, 494,
+ 15, 47, 79, 111, 143, 175, 207, 239, 271, 303, 335, 367, 399, 431, 463, 495,
+ 16, 48, 80, 112, 144, 176, 208, 240, 272, 304, 336, 368, 400, 432, 464, 496,
+ 17, 49, 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497,
+ 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, 402, 434, 466, 498,
+ 19, 51, 83, 115, 147, 179, 211, 243, 275, 307, 339, 371, 403, 435, 467, 499,
+ 20, 52, 84, 116, 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500,
+ 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437, 469, 501,
+ 22, 54, 86, 118, 150, 182, 214, 246, 278, 310, 342, 374, 406, 438, 470, 502,
+ 23, 55, 87, 119, 151, 183, 215, 247, 279, 311, 343, 375, 407, 439, 471, 503,
+ 24, 56, 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504,
+ 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, 409, 441, 473, 505,
+ 26, 58, 90, 122, 154, 186, 218, 250, 282, 314, 346, 378, 410, 442, 474, 506,
+ 27, 59, 91, 123, 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507,
+ 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444, 476, 508,
+ 29, 61, 93, 125, 157, 189, 221, 253, 285, 317, 349, 381, 413, 445, 477, 509,
+ 30, 62, 94, 126, 158, 190, 222, 254, 286, 318, 350, 382, 414, 446, 478, 510,
+ 31, 63, 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x32[512]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+ 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+ 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269,
+ 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
+ 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
+ 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
+ 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329,
+ 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
+ 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359,
+ 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374,
+ 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
+ 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
+ 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,
+ 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
+ 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449,
+ 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464,
+ 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479,
+ 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
+ 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,
+ 510, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x16[512]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+ 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+ 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269,
+ 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
+ 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
+ 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
+ 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329,
+ 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
+ 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359,
+ 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374,
+ 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
+ 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
+ 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,
+ 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
+ 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449,
+ 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464,
+ 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479,
+ 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
+ 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,
+ 510, 511,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = {
+ 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65,
+ 80, 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112,
+ 21, 52, 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7,
+ 129, 38, 69, 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116,
+ 101, 131, 160, 146, 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147,
+ 176, 162, 87, 56, 25, 133, 118, 177, 148, 72, 103, 41, 163, 10, 192,
+ 178, 88, 57, 134, 149, 119, 26, 164, 73, 104, 193, 42, 179, 208, 11,
+ 135, 89, 165, 120, 150, 58, 194, 180, 27, 74, 209, 105, 151, 136, 43,
+ 90, 224, 166, 195, 181, 121, 210, 59, 12, 152, 106, 167, 196, 75, 137,
+ 225, 211, 240, 182, 122, 91, 28, 197, 13, 226, 168, 183, 153, 44, 212,
+ 138, 107, 241, 60, 29, 123, 198, 184, 227, 169, 242, 76, 213, 154, 45,
+ 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108, 77, 155, 30, 15,
+ 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140, 230, 62, 216,
+ 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141, 63, 232,
+ 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142, 219,
+ 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, 251,
+ 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239,
+ 255,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x16[256]) = {
+ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
+ 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241,
+ 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, 178, 194, 210, 226, 242,
+ 3, 19, 35, 51, 67, 83, 99, 115, 131, 147, 163, 179, 195, 211, 227, 243,
+ 4, 20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244,
+ 5, 21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245,
+ 6, 22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246,
+ 7, 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
+ 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248,
+ 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249,
+ 10, 26, 42, 58, 74, 90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250,
+ 11, 27, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251,
+ 12, 28, 44, 60, 76, 92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252,
+ 13, 29, 45, 61, 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253,
+ 14, 30, 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254,
+ 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239, 255,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_16x16[256]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+ 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+ 255,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, col_scan_16x16[256]) = {
+ 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18,
+ 81, 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51,
+ 129, 4, 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161,
+ 68, 115, 21, 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6,
+ 116, 193, 147, 85, 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70,
+ 7, 148, 194, 86, 179, 225, 23, 133, 39, 164, 8, 102, 210, 241, 55,
+ 195, 118, 149, 71, 180, 24, 87, 226, 134, 165, 211, 40, 103, 56, 72,
+ 150, 196, 242, 119, 9, 181, 227, 88, 166, 25, 135, 41, 104, 212, 57,
+ 151, 197, 120, 73, 243, 182, 136, 167, 213, 89, 10, 228, 105, 152, 198,
+ 26, 42, 121, 183, 244, 168, 58, 137, 229, 74, 214, 90, 153, 199, 184,
+ 11, 106, 245, 27, 122, 230, 169, 43, 215, 59, 200, 138, 185, 246, 75,
+ 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170, 60, 247, 232, 76,
+ 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202, 233, 171, 61,
+ 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125, 62, 172,
+ 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79, 126,
+ 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, 236,
+ 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239,
+ 255,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, row_scan_16x16[256]) = {
+ 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48,
+ 20, 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9,
+ 66, 52, 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54,
+ 83, 97, 69, 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26,
+ 41, 56, 114, 100, 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72,
+ 116, 14, 87, 130, 102, 144, 73, 131, 117, 28, 58, 15, 88, 43, 145,
+ 103, 132, 146, 118, 74, 160, 89, 133, 104, 29, 59, 147, 119, 44, 161,
+ 148, 90, 105, 134, 162, 120, 176, 75, 135, 149, 30, 60, 163, 177, 45,
+ 121, 91, 106, 164, 178, 150, 192, 136, 165, 179, 31, 151, 193, 76, 122,
+ 61, 137, 194, 107, 152, 180, 208, 46, 166, 167, 195, 92, 181, 138, 209,
+ 123, 153, 224, 196, 77, 168, 210, 182, 240, 108, 197, 62, 154, 225, 183,
+ 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170, 124, 155, 199, 78,
+ 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186, 156, 229, 243,
+ 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110, 157, 245,
+ 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, 158, 188,
+ 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, 175,
+ 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254,
+ 255,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, mcol_scan_32x32[1024]) = {
+ 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416,
+ 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864,
+ 896, 928, 960, 992, 1, 33, 65, 97, 129, 161, 193, 225, 257, 289,
+ 321, 353, 385, 417, 449, 481, 513, 545, 577, 609, 641, 673, 705, 737,
+ 769, 801, 833, 865, 897, 929, 961, 993, 2, 34, 66, 98, 130, 162,
+ 194, 226, 258, 290, 322, 354, 386, 418, 450, 482, 514, 546, 578, 610,
+ 642, 674, 706, 738, 770, 802, 834, 866, 898, 930, 962, 994, 3, 35,
+ 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483,
+ 515, 547, 579, 611, 643, 675, 707, 739, 771, 803, 835, 867, 899, 931,
+ 963, 995, 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356,
+ 388, 420, 452, 484, 516, 548, 580, 612, 644, 676, 708, 740, 772, 804,
+ 836, 868, 900, 932, 964, 996, 5, 37, 69, 101, 133, 165, 197, 229,
+ 261, 293, 325, 357, 389, 421, 453, 485, 517, 549, 581, 613, 645, 677,
+ 709, 741, 773, 805, 837, 869, 901, 933, 965, 997, 6, 38, 70, 102,
+ 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486, 518, 550,
+ 582, 614, 646, 678, 710, 742, 774, 806, 838, 870, 902, 934, 966, 998,
+ 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423,
+ 455, 487, 519, 551, 583, 615, 647, 679, 711, 743, 775, 807, 839, 871,
+ 903, 935, 967, 999, 8, 40, 72, 104, 136, 168, 200, 232, 264, 296,
+ 328, 360, 392, 424, 456, 488, 520, 552, 584, 616, 648, 680, 712, 744,
+ 776, 808, 840, 872, 904, 936, 968, 1000, 9, 41, 73, 105, 137, 169,
+ 201, 233, 265, 297, 329, 361, 393, 425, 457, 489, 521, 553, 585, 617,
+ 649, 681, 713, 745, 777, 809, 841, 873, 905, 937, 969, 1001, 10, 42,
+ 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490,
+ 522, 554, 586, 618, 650, 682, 714, 746, 778, 810, 842, 874, 906, 938,
+ 970, 1002, 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363,
+ 395, 427, 459, 491, 523, 555, 587, 619, 651, 683, 715, 747, 779, 811,
+ 843, 875, 907, 939, 971, 1003, 12, 44, 76, 108, 140, 172, 204, 236,
+ 268, 300, 332, 364, 396, 428, 460, 492, 524, 556, 588, 620, 652, 684,
+ 716, 748, 780, 812, 844, 876, 908, 940, 972, 1004, 13, 45, 77, 109,
+ 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493, 525, 557,
+ 589, 621, 653, 685, 717, 749, 781, 813, 845, 877, 909, 941, 973, 1005,
+ 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430,
+ 462, 494, 526, 558, 590, 622, 654, 686, 718, 750, 782, 814, 846, 878,
+ 910, 942, 974, 1006, 15, 47, 79, 111, 143, 175, 207, 239, 271, 303,
+ 335, 367, 399, 431, 463, 495, 527, 559, 591, 623, 655, 687, 719, 751,
+ 783, 815, 847, 879, 911, 943, 975, 1007, 16, 48, 80, 112, 144, 176,
+ 208, 240, 272, 304, 336, 368, 400, 432, 464, 496, 528, 560, 592, 624,
+ 656, 688, 720, 752, 784, 816, 848, 880, 912, 944, 976, 1008, 17, 49,
+ 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497,
+ 529, 561, 593, 625, 657, 689, 721, 753, 785, 817, 849, 881, 913, 945,
+ 977, 1009, 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370,
+ 402, 434, 466, 498, 530, 562, 594, 626, 658, 690, 722, 754, 786, 818,
+ 850, 882, 914, 946, 978, 1010, 19, 51, 83, 115, 147, 179, 211, 243,
+ 275, 307, 339, 371, 403, 435, 467, 499, 531, 563, 595, 627, 659, 691,
+ 723, 755, 787, 819, 851, 883, 915, 947, 979, 1011, 20, 52, 84, 116,
+ 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500, 532, 564,
+ 596, 628, 660, 692, 724, 756, 788, 820, 852, 884, 916, 948, 980, 1012,
+ 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437,
+ 469, 501, 533, 565, 597, 629, 661, 693, 725, 757, 789, 821, 853, 885,
+ 917, 949, 981, 1013, 22, 54, 86, 118, 150, 182, 214, 246, 278, 310,
+ 342, 374, 406, 438, 470, 502, 534, 566, 598, 630, 662, 694, 726, 758,
+ 790, 822, 854, 886, 918, 950, 982, 1014, 23, 55, 87, 119, 151, 183,
+ 215, 247, 279, 311, 343, 375, 407, 439, 471, 503, 535, 567, 599, 631,
+ 663, 695, 727, 759, 791, 823, 855, 887, 919, 951, 983, 1015, 24, 56,
+ 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504,
+ 536, 568, 600, 632, 664, 696, 728, 760, 792, 824, 856, 888, 920, 952,
+ 984, 1016, 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377,
+ 409, 441, 473, 505, 537, 569, 601, 633, 665, 697, 729, 761, 793, 825,
+ 857, 889, 921, 953, 985, 1017, 26, 58, 90, 122, 154, 186, 218, 250,
+ 282, 314, 346, 378, 410, 442, 474, 506, 538, 570, 602, 634, 666, 698,
+ 730, 762, 794, 826, 858, 890, 922, 954, 986, 1018, 27, 59, 91, 123,
+ 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507, 539, 571,
+ 603, 635, 667, 699, 731, 763, 795, 827, 859, 891, 923, 955, 987, 1019,
+ 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444,
+ 476, 508, 540, 572, 604, 636, 668, 700, 732, 764, 796, 828, 860, 892,
+ 924, 956, 988, 1020, 29, 61, 93, 125, 157, 189, 221, 253, 285, 317,
+ 349, 381, 413, 445, 477, 509, 541, 573, 605, 637, 669, 701, 733, 765,
+ 797, 829, 861, 893, 925, 957, 989, 1021, 30, 62, 94, 126, 158, 190,
+ 222, 254, 286, 318, 350, 382, 414, 446, 478, 510, 542, 574, 606, 638,
+ 670, 702, 734, 766, 798, 830, 862, 894, 926, 958, 990, 1022, 31, 63,
+ 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511,
+ 543, 575, 607, 639, 671, 703, 735, 767, 799, 831, 863, 895, 927, 959,
+ 991, 1023,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, mrow_scan_32x32[1024]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
+ 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
+ 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+ 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
+ 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
+ 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
+ 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
+ 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
+ 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
+ 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
+ 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233,
+ 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246,
+ 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259,
+ 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272,
+ 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285,
+ 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298,
+ 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311,
+ 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
+ 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337,
+ 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350,
+ 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363,
+ 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376,
+ 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
+ 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402,
+ 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415,
+ 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428,
+ 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441,
+ 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454,
+ 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467,
+ 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480,
+ 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493,
+ 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506,
+ 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519,
+ 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532,
+ 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545,
+ 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558,
+ 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571,
+ 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584,
+ 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597,
+ 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610,
+ 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623,
+ 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636,
+ 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649,
+ 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662,
+ 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675,
+ 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688,
+ 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701,
+ 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714,
+ 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727,
+ 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740,
+ 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753,
+ 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766,
+ 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779,
+ 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792,
+ 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805,
+ 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818,
+ 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831,
+ 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844,
+ 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857,
+ 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870,
+ 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883,
+ 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896,
+ 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909,
+ 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922,
+ 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935,
+ 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948,
+ 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961,
+ 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974,
+ 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987,
+ 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000,
+ 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013,
+ 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = {
+ 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66,
+ 160, 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5,
+ 162, 193, 68, 131, 37, 100, 225, 194, 256, 163, 69, 132, 6,
+ 226, 257, 288, 195, 101, 164, 38, 258, 7, 227, 289, 133, 320,
+ 70, 196, 165, 290, 259, 228, 39, 321, 102, 352, 8, 197, 71,
+ 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292,
+ 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104,
+ 293, 41, 417, 199, 136, 262, 387, 448, 325, 356, 10, 73, 418,
+ 231, 168, 449, 294, 388, 105, 419, 263, 42, 200, 357, 450, 137,
+ 480, 74, 326, 232, 11, 389, 169, 295, 420, 106, 451, 481, 358,
+ 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, 75,
+ 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391,
+ 453, 139, 44, 234, 484, 297, 360, 171, 76, 515, 545, 266, 329,
+ 454, 13, 423, 203, 108, 546, 485, 576, 298, 235, 140, 361, 330,
+ 172, 547, 45, 455, 267, 577, 486, 77, 204, 362, 608, 14, 299,
+ 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, 610, 363,
+ 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, 111,
+ 238, 48, 143, 80, 175, 112, 207, 49, 18, 239, 81, 113, 19,
+ 50, 82, 114, 51, 83, 115, 640, 516, 392, 268, 144, 20, 672,
+ 641, 548, 517, 424, 393, 300, 269, 176, 145, 52, 21, 704, 673,
+ 642, 580, 549, 518, 456, 425, 394, 332, 301, 270, 208, 177, 146,
+ 84, 53, 22, 736, 705, 674, 643, 612, 581, 550, 519, 488, 457,
+ 426, 395, 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54,
+ 23, 737, 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303,
+ 241, 210, 179, 117, 86, 55, 738, 707, 614, 583, 490, 459, 366,
+ 335, 242, 211, 118, 87, 739, 615, 491, 367, 243, 119, 768, 644,
+ 520, 396, 272, 148, 24, 800, 769, 676, 645, 552, 521, 428, 397,
+ 304, 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584,
+ 553, 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57,
+ 26, 864, 833, 802, 771, 740, 709, 678, 647, 616, 585, 554, 523,
+ 492, 461, 430, 399, 368, 337, 306, 275, 244, 213, 182, 151, 120,
+ 89, 58, 27, 865, 834, 803, 741, 710, 679, 617, 586, 555, 493,
+ 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835,
+ 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867,
+ 743, 619, 495, 371, 247, 123, 896, 772, 648, 524, 400, 276, 152,
+ 28, 928, 897, 804, 773, 680, 649, 556, 525, 432, 401, 308, 277,
+ 184, 153, 60, 29, 960, 929, 898, 836, 805, 774, 712, 681, 650,
+ 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, 92,
+ 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682,
+ 651, 620, 589, 558, 527, 496, 465, 434, 403, 372, 341, 310, 279,
+ 248, 217, 186, 155, 124, 93, 62, 31, 993, 962, 931, 869, 838,
+ 807, 745, 714, 683, 621, 590, 559, 497, 466, 435, 373, 342, 311,
+ 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, 622,
+ 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623,
+ 499, 375, 251, 127, 900, 776, 652, 528, 404, 280, 156, 932, 901,
+ 808, 777, 684, 653, 560, 529, 436, 405, 312, 281, 188, 157, 964,
+ 933, 902, 840, 809, 778, 716, 685, 654, 592, 561, 530, 468, 437,
+ 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, 872, 841,
+ 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, 438,
+ 407, 376, 345, 314, 283, 252, 221, 190, 159, 997, 966, 935, 873,
+ 842, 811, 749, 718, 687, 625, 594, 563, 501, 470, 439, 377, 346,
+ 315, 253, 222, 191, 998, 967, 874, 843, 750, 719, 626, 595, 502,
+ 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, 379, 255, 904,
+ 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, 564, 533,
+ 440, 409, 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658,
+ 596, 565, 534, 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907,
+ 876, 845, 814, 783, 752, 721, 690, 659, 628, 597, 566, 535, 504,
+ 473, 442, 411, 380, 349, 318, 287, 1001, 970, 939, 877, 846, 815,
+ 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, 350, 319, 1002,
+ 971, 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879,
+ 755, 631, 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785,
+ 692, 661, 568, 537, 444, 413, 972, 941, 910, 848, 817, 786, 724,
+ 693, 662, 600, 569, 538, 476, 445, 414, 1004, 973, 942, 911, 880,
+ 849, 818, 787, 756, 725, 694, 663, 632, 601, 570, 539, 508, 477,
+ 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602,
+ 571, 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510,
+ 479, 1007, 883, 759, 635, 511, 912, 788, 664, 540, 944, 913, 820,
+ 789, 696, 665, 572, 541, 976, 945, 914, 852, 821, 790, 728, 697,
+ 666, 604, 573, 542, 1008, 977, 946, 915, 884, 853, 822, 791, 760,
+ 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823,
+ 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638,
+ 607, 1011, 887, 763, 639, 916, 792, 668, 948, 917, 824, 793, 700,
+ 669, 980, 949, 918, 856, 825, 794, 732, 701, 670, 1012, 981, 950,
+ 919, 888, 857, 826, 795, 764, 733, 702, 671, 1013, 982, 951, 889,
+ 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, 891,
+ 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798,
+ 1016, 985, 954, 923, 892, 861, 830, 799, 1017, 986, 955, 893, 862,
+ 831, 1018, 987, 894, 863, 1019, 895, 924, 956, 925, 988, 957, 926,
+ 1020, 989, 958, 927, 1021, 990, 959, 1022, 991, 1023,
+};
+
+#if CONFIG_EXT_TX
+// Scan over two rectangular vertical partitions one after the other
+DECLARE_ALIGNED(16, static const int16_t, v2_scan_32x32[1024]) = {
+ 0, 1, 32, 33, 2, 64, 34, 65, 66, 3, 96, 35, 97,
+ 67, 98, 4, 128, 36, 129, 99, 68, 130, 5, 100, 131, 160,
+ 37, 161, 69, 162, 132, 101, 163, 6, 192, 38, 193, 70, 194,
+ 133, 164, 102, 195, 7, 224, 39, 165, 225, 134, 196, 71, 226,
+ 103, 227, 166, 197, 8, 256, 40, 135, 228, 257, 72, 258, 198,
+ 104, 259, 167, 229, 136, 260, 9, 288, 41, 289, 73, 199, 230,
+ 290, 168, 261, 105, 291, 137, 292, 231, 10, 200, 262, 320, 42,
+ 321, 74, 322, 169, 293, 106, 323, 232, 263, 138, 324, 201, 294,
+ 11, 352, 43, 353, 75, 170, 325, 354, 264, 107, 233, 295, 355,
+ 202, 326, 139, 356, 12, 384, 44, 265, 296, 385, 171, 357, 76,
+ 386, 234, 327, 108, 387, 203, 358, 140, 388, 297, 266, 328, 13,
+ 172, 389, 416, 45, 235, 359, 417, 77, 418, 109, 419, 204, 390,
+ 298, 329, 141, 267, 360, 420, 236, 391, 173, 421, 14, 448, 46,
+ 449, 78, 330, 450, 299, 361, 110, 205, 422, 451, 268, 392, 142,
+ 452, 237, 423, 174, 331, 362, 453, 15, 300, 393, 480, 47, 481,
+ 79, 482, 206, 454, 269, 424, 111, 483, 143, 484, 363, 332, 394,
+ 238, 455, 175, 301, 425, 485, 512, 513, 270, 456, 514, 207, 486,
+ 364, 395, 515, 333, 426, 516, 239, 487, 302, 457, 517, 396, 271,
+ 488, 544, 365, 427, 545, 518, 546, 334, 458, 547, 519, 548, 303,
+ 489, 397, 428, 549, 366, 459, 520, 576, 335, 490, 550, 577, 578,
+ 579, 521, 429, 551, 398, 460, 580, 367, 491, 581, 552, 522, 582,
+ 608, 609, 430, 461, 610, 399, 492, 553, 611, 583, 523, 612, 613,
+ 584, 554, 462, 431, 493, 614, 524, 640, 641, 642, 585, 643, 555,
+ 615, 644, 463, 494, 586, 525, 616, 645, 556, 646, 672, 617, 673,
+ 587, 674, 647, 495, 675, 526, 676, 557, 618, 648, 677, 588, 678,
+ 527, 649, 619, 704, 558, 705, 706, 679, 589, 707, 650, 708, 620,
+ 680, 709, 559, 590, 710, 651, 681, 736, 621, 737, 711, 738, 739,
+ 682, 652, 740, 712, 591, 741, 622, 683, 713, 742, 653, 768, 769,
+ 743, 770, 714, 684, 771, 623, 772, 744, 654, 773, 715, 685, 745,
+ 774, 655, 775, 800, 801, 716, 746, 802, 803, 686, 776, 804, 747,
+ 805, 717, 777, 806, 687, 748, 807, 778, 832, 833, 718, 834, 835,
+ 808, 836, 779, 749, 837, 809, 719, 838, 780, 750, 810, 839, 864,
+ 865, 866, 867, 840, 781, 868, 811, 751, 869, 841, 870, 812, 782,
+ 842, 871, 896, 897, 898, 872, 899, 813, 843, 900, 783, 901, 873,
+ 844, 902, 814, 874, 903, 928, 929, 845, 930, 904, 815, 875, 931,
+ 932, 905, 933, 846, 876, 934, 906, 935, 877, 960, 847, 961, 962,
+ 907, 936, 963, 964, 937, 878, 965, 908, 966, 938, 967, 909, 879,
+ 992, 939, 993, 968, 994, 995, 996, 910, 969, 940, 997, 998, 970,
+ 911, 941, 999, 971, 1000, 942, 1001, 972, 1002, 943, 973, 1003, 974,
+ 1004, 975, 1005, 1006, 1007, 16, 48, 80, 112, 144, 176, 17, 49,
+ 208, 81, 113, 145, 240, 177, 272, 18, 50, 209, 82, 114, 304,
+ 241, 146, 178, 273, 336, 210, 19, 51, 83, 115, 305, 242, 147,
+ 368, 179, 274, 337, 211, 20, 400, 52, 84, 306, 116, 243, 369,
+ 148, 338, 180, 275, 432, 401, 212, 21, 53, 307, 85, 370, 244,
+ 117, 464, 149, 433, 339, 276, 181, 402, 213, 308, 496, 371, 22,
+ 54, 465, 86, 245, 118, 434, 150, 340, 277, 403, 182, 528, 497,
+ 214, 466, 372, 309, 23, 55, 435, 87, 246, 119, 341, 404, 151,
+ 529, 560, 278, 498, 183, 467, 373, 215, 310, 436, 24, 56, 247,
+ 561, 88, 530, 592, 342, 120, 405, 499, 152, 279, 468, 184, 374,
+ 311, 437, 216, 562, 593, 531, 624, 25, 248, 500, 57, 406, 89,
+ 343, 121, 469, 280, 153, 594, 185, 375, 563, 625, 438, 532, 656,
+ 312, 217, 501, 407, 249, 26, 344, 58, 90, 470, 122, 595, 626,
+ 281, 564, 657, 154, 376, 533, 688, 439, 186, 313, 502, 218, 408,
+ 627, 596, 658, 250, 345, 471, 27, 59, 565, 689, 91, 123, 282,
+ 534, 720, 155, 440, 377, 187, 503, 314, 628, 659, 219, 597, 690,
+ 409, 472, 566, 721, 346, 251, 28, 60, 535, 752, 92, 124, 283,
+ 441, 378, 156, 660, 504, 629, 691, 598, 722, 188, 315, 567, 753,
+ 220, 410, 473, 347, 536, 784, 252, 29, 661, 692, 61, 93, 442,
+ 630, 723, 284, 125, 379, 505, 599, 754, 157, 316, 568, 785, 189,
+ 474, 411, 221, 537, 816, 693, 348, 662, 724, 253, 631, 755, 443,
+ 30, 600, 786, 62, 506, 94, 285, 380, 126, 569, 817, 158, 317,
+ 190, 475, 694, 725, 412, 663, 756, 538, 848, 222, 632, 787, 349,
+ 254, 601, 818, 444, 507, 31, 63, 381, 286, 95, 570, 849, 726,
+ 127, 695, 757, 664, 788, 159, 476, 318, 413, 539, 880, 191, 633,
+ 819, 223, 350, 602, 850, 508, 255, 445, 727, 758, 696, 789, 571,
+ 881, 382, 287, 665, 820, 477, 634, 851, 540, 912, 319, 414, 603,
+ 882, 759, 728, 790, 351, 509, 697, 821, 446, 572, 913, 666, 852,
+ 383, 635, 883, 478, 541, 944, 415, 760, 791, 604, 914, 729, 822,
+ 698, 853, 510, 667, 884, 447, 573, 945, 636, 915, 792, 761, 823,
+ 542, 976, 479, 730, 854, 605, 946, 699, 885, 668, 916, 511, 574,
+ 977, 793, 824, 637, 947, 762, 855, 731, 886, 543, 1008, 606, 978,
+ 700, 917, 669, 948, 575, 825, 1009, 794, 856, 763, 887, 638, 979,
+ 732, 918, 701, 949, 607, 1010, 670, 980, 826, 857, 795, 888, 764,
+ 919, 639, 1011, 733, 950, 702, 981, 858, 827, 889, 796, 920, 671,
+ 1012, 765, 951, 734, 982, 703, 1013, 859, 890, 828, 921, 797, 952,
+ 766, 983, 735, 1014, 891, 860, 922, 829, 953, 798, 984, 767, 1015,
+ 892, 923, 861, 954, 830, 985, 799, 1016, 924, 893, 955, 862, 986,
+ 831, 1017, 925, 956, 894, 987, 863, 1018, 957, 926, 988, 895, 1019,
+ 958, 989, 927, 1020, 990, 959, 1021, 991, 1022, 1023,
+};
+
+// Scan over two rectangular horizontal partitions one after the other
+DECLARE_ALIGNED(16, static const int16_t, h2_scan_32x32[1024]) = {
+ 0, 1, 32, 33, 2, 64, 34, 65, 66, 3, 96, 35, 97,
+ 67, 98, 4, 128, 36, 129, 99, 68, 130, 5, 100, 131, 160,
+ 37, 161, 69, 162, 132, 101, 163, 6, 192, 38, 193, 70, 194,
+ 133, 164, 102, 195, 7, 224, 39, 165, 225, 134, 196, 71, 226,
+ 103, 227, 166, 197, 8, 256, 40, 135, 228, 257, 72, 258, 198,
+ 104, 259, 167, 229, 136, 260, 9, 288, 41, 289, 73, 199, 230,
+ 290, 168, 261, 105, 291, 137, 292, 231, 10, 200, 262, 320, 42,
+ 321, 74, 322, 169, 293, 106, 323, 232, 263, 138, 324, 201, 294,
+ 11, 352, 43, 353, 75, 170, 325, 354, 264, 107, 233, 295, 355,
+ 202, 326, 139, 356, 12, 384, 44, 265, 296, 385, 171, 357, 76,
+ 386, 234, 327, 108, 387, 203, 358, 140, 388, 297, 266, 328, 13,
+ 172, 389, 416, 45, 235, 359, 417, 77, 418, 109, 419, 204, 390,
+ 298, 329, 141, 267, 360, 420, 236, 391, 173, 421, 14, 448, 46,
+ 449, 78, 330, 450, 299, 361, 110, 205, 422, 451, 268, 392, 142,
+ 452, 237, 423, 174, 331, 362, 453, 15, 300, 393, 480, 47, 481,
+ 79, 482, 206, 454, 269, 424, 111, 483, 143, 484, 363, 332, 394,
+ 238, 455, 175, 301, 425, 485, 16, 48, 80, 270, 456, 207, 486,
+ 112, 364, 395, 333, 426, 144, 239, 487, 302, 457, 176, 396, 17,
+ 271, 488, 49, 365, 427, 208, 81, 334, 458, 113, 145, 240, 303,
+ 489, 397, 428, 177, 366, 459, 272, 18, 50, 209, 335, 490, 82,
+ 114, 304, 241, 429, 146, 398, 460, 367, 491, 178, 273, 336, 210,
+ 19, 51, 83, 430, 461, 399, 492, 115, 305, 242, 147, 368, 179,
+ 274, 337, 462, 431, 493, 211, 20, 400, 52, 84, 306, 116, 243,
+ 369, 148, 463, 494, 338, 180, 275, 432, 401, 212, 21, 53, 307,
+ 85, 370, 244, 117, 495, 464, 149, 433, 339, 276, 181, 402, 213,
+ 308, 496, 371, 22, 54, 465, 86, 245, 118, 434, 150, 340, 277,
+ 403, 182, 497, 214, 466, 372, 309, 23, 55, 435, 87, 246, 119,
+ 341, 404, 151, 278, 498, 183, 467, 373, 215, 310, 436, 24, 56,
+ 247, 88, 342, 120, 405, 499, 152, 279, 468, 184, 374, 311, 437,
+ 216, 25, 248, 500, 57, 406, 89, 343, 121, 469, 280, 153, 185,
+ 375, 438, 312, 217, 501, 407, 249, 26, 344, 58, 90, 470, 122,
+ 281, 154, 376, 439, 186, 313, 502, 218, 408, 250, 345, 471, 27,
+ 59, 91, 123, 282, 155, 440, 377, 187, 503, 314, 219, 409, 472,
+ 346, 251, 28, 60, 92, 124, 283, 441, 378, 156, 504, 188, 315,
+ 220, 410, 473, 347, 252, 29, 61, 93, 442, 284, 125, 379, 505,
+ 157, 316, 189, 474, 411, 221, 348, 253, 443, 30, 62, 506, 94,
+ 285, 380, 126, 158, 317, 190, 475, 412, 222, 349, 254, 444, 507,
+ 31, 63, 381, 286, 95, 127, 159, 476, 318, 413, 191, 223, 350,
+ 508, 255, 445, 382, 287, 477, 319, 414, 351, 509, 446, 383, 478,
+ 415, 510, 447, 479, 511, 512, 513, 514, 515, 516, 517, 544, 545,
+ 518, 546, 547, 519, 548, 549, 520, 576, 550, 577, 578, 579, 521,
+ 551, 580, 581, 552, 522, 582, 608, 609, 610, 553, 611, 583, 523,
+ 612, 613, 584, 554, 614, 524, 640, 641, 642, 585, 643, 555, 615,
+ 644, 586, 525, 616, 645, 556, 646, 672, 617, 673, 587, 674, 647,
+ 675, 526, 676, 557, 618, 648, 677, 588, 678, 527, 649, 619, 704,
+ 558, 705, 706, 679, 589, 707, 650, 708, 620, 680, 709, 528, 559,
+ 590, 710, 651, 681, 736, 621, 737, 711, 738, 739, 682, 652, 529,
+ 560, 740, 712, 591, 741, 622, 683, 713, 742, 653, 768, 769, 561,
+ 743, 530, 592, 770, 714, 684, 771, 623, 772, 744, 654, 773, 715,
+ 685, 745, 774, 562, 593, 531, 624, 655, 775, 800, 801, 716, 746,
+ 802, 803, 686, 776, 804, 594, 563, 625, 747, 805, 717, 532, 656,
+ 777, 806, 687, 748, 807, 778, 832, 833, 718, 834, 595, 626, 835,
+ 564, 657, 808, 836, 533, 688, 779, 749, 837, 809, 719, 838, 780,
+ 627, 596, 658, 750, 810, 839, 864, 565, 689, 865, 866, 867, 534,
+ 720, 840, 781, 868, 811, 751, 869, 841, 628, 659, 597, 690, 870,
+ 812, 782, 566, 721, 842, 871, 896, 535, 752, 897, 898, 872, 899,
+ 813, 843, 660, 900, 783, 629, 691, 598, 722, 901, 873, 567, 753,
+ 844, 902, 814, 874, 536, 784, 903, 661, 692, 928, 929, 630, 723,
+ 845, 930, 904, 815, 875, 931, 599, 754, 932, 568, 785, 905, 933,
+ 846, 876, 934, 537, 816, 693, 662, 724, 906, 631, 755, 935, 877,
+ 600, 786, 960, 847, 961, 962, 907, 936, 963, 569, 817, 964, 937,
+ 694, 725, 878, 965, 908, 663, 756, 538, 848, 966, 632, 787, 938,
+ 601, 818, 967, 909, 879, 992, 939, 993, 968, 570, 849, 994, 726,
+ 695, 757, 995, 664, 788, 996, 910, 969, 539, 880, 940, 633, 819,
+ 997, 998, 602, 850, 970, 911, 941, 999, 727, 758, 696, 789, 571,
+ 881, 971, 665, 820, 1000, 634, 851, 942, 540, 912, 1001, 972, 603,
+ 882, 759, 728, 790, 1002, 697, 821, 943, 973, 572, 913, 666, 852,
+ 1003, 635, 883, 974, 541, 944, 760, 791, 1004, 604, 914, 729, 822,
+ 698, 853, 975, 667, 884, 573, 945, 1005, 636, 915, 792, 761, 823,
+ 542, 976, 1006, 730, 854, 605, 946, 699, 885, 668, 916, 1007, 574,
+ 977, 793, 824, 637, 947, 762, 855, 731, 886, 543, 1008, 606, 978,
+ 700, 917, 669, 948, 575, 825, 1009, 794, 856, 763, 887, 638, 979,
+ 732, 918, 701, 949, 607, 1010, 670, 980, 826, 857, 795, 888, 764,
+ 919, 639, 1011, 733, 950, 702, 981, 858, 827, 889, 796, 920, 671,
+ 1012, 765, 951, 734, 982, 703, 1013, 859, 890, 828, 921, 797, 952,
+ 766, 983, 735, 1014, 891, 860, 922, 829, 953, 798, 984, 767, 1015,
+ 892, 923, 861, 954, 830, 985, 799, 1016, 924, 893, 955, 862, 986,
+ 831, 1017, 925, 956, 894, 987, 863, 1018, 957, 926, 988, 895, 1019,
+ 958, 989, 927, 1020, 990, 959, 1021, 991, 1022, 1023,
+};
+
+// Scan where the top left quarter is scanned first
+DECLARE_ALIGNED(16, static const int16_t, qtr_scan_32x32[1024]) = {
+ 0, 1, 32, 33, 2, 64, 34, 65, 66, 3, 96, 35, 97,
+ 67, 98, 4, 128, 36, 129, 99, 68, 130, 5, 100, 131, 160,
+ 37, 161, 69, 162, 132, 101, 163, 6, 192, 38, 193, 70, 194,
+ 133, 164, 102, 195, 7, 224, 39, 165, 225, 134, 196, 71, 226,
+ 103, 227, 166, 197, 8, 256, 40, 135, 228, 257, 72, 258, 198,
+ 104, 259, 167, 229, 136, 260, 9, 288, 41, 289, 73, 199, 230,
+ 290, 168, 261, 105, 291, 137, 292, 231, 10, 200, 262, 320, 42,
+ 321, 74, 322, 169, 293, 106, 323, 232, 263, 138, 324, 201, 294,
+ 11, 352, 43, 353, 75, 170, 325, 354, 264, 107, 233, 295, 355,
+ 202, 326, 139, 356, 12, 384, 44, 265, 296, 385, 171, 357, 76,
+ 386, 234, 327, 108, 387, 203, 358, 140, 388, 297, 266, 328, 13,
+ 172, 389, 416, 45, 235, 359, 417, 77, 418, 109, 419, 204, 390,
+ 298, 329, 141, 267, 360, 420, 236, 391, 173, 421, 14, 448, 46,
+ 449, 78, 330, 450, 299, 361, 110, 205, 422, 451, 268, 392, 142,
+ 452, 237, 423, 174, 331, 362, 453, 15, 300, 393, 480, 47, 481,
+ 79, 482, 206, 454, 269, 424, 111, 483, 143, 484, 363, 332, 394,
+ 238, 455, 175, 301, 425, 485, 270, 456, 207, 486, 364, 395, 333,
+ 426, 239, 487, 302, 457, 396, 271, 488, 365, 427, 334, 458, 303,
+ 489, 397, 428, 366, 459, 335, 490, 429, 398, 460, 367, 491, 430,
+ 461, 399, 492, 462, 431, 493, 463, 494, 495, 16, 512, 48, 513,
+ 80, 514, 112, 515, 144, 516, 176, 517, 17, 544, 49, 545, 208,
+ 518, 81, 546, 113, 547, 145, 240, 519, 548, 177, 549, 272, 520,
+ 18, 576, 50, 209, 550, 577, 82, 578, 114, 579, 304, 521, 241,
+ 551, 146, 580, 178, 581, 273, 552, 336, 522, 210, 582, 19, 608,
+ 51, 609, 83, 610, 115, 305, 553, 611, 242, 583, 147, 368, 523,
+ 612, 179, 613, 274, 584, 337, 554, 211, 614, 20, 400, 524, 640,
+ 52, 641, 84, 642, 306, 585, 116, 643, 243, 369, 555, 615, 148,
+ 644, 338, 586, 180, 275, 432, 525, 616, 645, 401, 556, 212, 646,
+ 21, 672, 53, 307, 617, 673, 85, 370, 587, 674, 244, 647, 117,
+ 675, 464, 526, 149, 676, 433, 557, 339, 618, 276, 648, 181, 677,
+ 402, 588, 213, 678, 308, 496, 527, 649, 371, 619, 22, 704, 54,
+ 465, 558, 705, 86, 706, 245, 679, 118, 434, 589, 707, 150, 340,
+ 650, 708, 277, 403, 620, 680, 182, 709, 528, 497, 559, 214, 466,
+ 590, 710, 372, 651, 309, 681, 23, 736, 55, 435, 621, 737, 87,
+ 246, 711, 738, 119, 739, 341, 682, 404, 652, 151, 529, 560, 740,
+ 278, 712, 498, 591, 183, 741, 467, 622, 373, 683, 215, 310, 713,
+ 742, 436, 653, 24, 768, 56, 769, 247, 561, 743, 88, 530, 592,
+ 770, 342, 714, 120, 405, 684, 771, 499, 623, 152, 772, 279, 744,
+ 468, 654, 184, 773, 374, 715, 311, 437, 685, 745, 216, 774, 562,
+ 593, 531, 624, 25, 248, 500, 655, 775, 800, 57, 801, 406, 716,
+ 89, 343, 746, 802, 121, 803, 469, 686, 280, 776, 153, 804, 594,
+ 185, 375, 563, 625, 747, 805, 438, 717, 532, 656, 312, 777, 217,
+ 806, 501, 687, 407, 748, 249, 807, 26, 344, 778, 832, 58, 833,
+ 90, 470, 718, 834, 122, 595, 626, 835, 281, 564, 657, 808, 154,
+ 836, 376, 533, 688, 779, 439, 749, 186, 837, 313, 809, 502, 719,
+ 218, 838, 408, 780, 627, 596, 658, 250, 345, 471, 750, 810, 839,
+ 27, 864, 59, 565, 689, 865, 91, 866, 123, 867, 282, 534, 720,
+ 840, 155, 440, 781, 868, 377, 811, 187, 503, 751, 869, 314, 841,
+ 628, 659, 219, 597, 690, 870, 409, 812, 472, 782, 566, 721, 346,
+ 842, 251, 871, 28, 896, 60, 535, 752, 897, 92, 898, 124, 283,
+ 872, 899, 441, 813, 378, 843, 156, 660, 900, 504, 783, 629, 691,
+ 598, 722, 188, 901, 315, 873, 567, 753, 220, 410, 844, 902, 473,
+ 814, 347, 874, 536, 784, 252, 903, 29, 661, 692, 928, 61, 929,
+ 93, 442, 630, 723, 845, 930, 284, 904, 125, 379, 505, 815, 875,
+ 931, 599, 754, 157, 932, 316, 568, 785, 905, 189, 933, 474, 846,
+ 411, 876, 221, 934, 537, 816, 693, 348, 662, 724, 906, 253, 631,
+ 755, 935, 443, 877, 30, 600, 786, 960, 62, 506, 847, 961, 94,
+ 962, 285, 380, 907, 936, 126, 963, 569, 817, 158, 964, 317, 937,
+ 190, 475, 694, 725, 878, 965, 412, 908, 663, 756, 538, 848, 222,
+ 966, 632, 787, 349, 938, 254, 601, 818, 967, 444, 909, 507, 879,
+ 31, 992, 63, 381, 939, 993, 286, 968, 95, 570, 849, 994, 726,
+ 127, 695, 757, 995, 664, 788, 159, 996, 476, 910, 318, 969, 413,
+ 539, 880, 940, 191, 633, 819, 997, 223, 998, 350, 602, 850, 970,
+ 508, 911, 255, 445, 941, 999, 727, 758, 696, 789, 571, 881, 382,
+ 971, 287, 665, 820, 1000, 477, 634, 851, 942, 540, 912, 319, 1001,
+ 414, 972, 603, 882, 759, 728, 790, 351, 1002, 509, 697, 821, 943,
+ 446, 973, 572, 913, 666, 852, 383, 1003, 635, 883, 478, 974, 541,
+ 944, 415, 760, 791, 1004, 604, 914, 729, 822, 698, 853, 510, 975,
+ 667, 884, 447, 573, 945, 1005, 636, 915, 792, 761, 823, 542, 976,
+ 479, 1006, 730, 854, 605, 946, 699, 885, 668, 916, 511, 1007, 574,
+ 977, 793, 824, 637, 947, 762, 855, 731, 886, 543, 1008, 606, 978,
+ 700, 917, 669, 948, 575, 825, 1009, 794, 856, 763, 887, 638, 979,
+ 732, 918, 701, 949, 607, 1010, 670, 980, 826, 857, 795, 888, 764,
+ 919, 639, 1011, 733, 950, 702, 981, 858, 827, 889, 796, 920, 671,
+ 1012, 765, 951, 734, 982, 703, 1013, 859, 890, 828, 921, 797, 952,
+ 766, 983, 735, 1014, 891, 860, 922, 829, 953, 798, 984, 767, 1015,
+ 892, 923, 861, 954, 830, 985, 799, 1016, 924, 893, 955, 862, 986,
+ 831, 1017, 925, 956, 894, 987, 863, 1018, 957, 926, 988, 895, 1019,
+ 958, 989, 927, 1020, 990, 959, 1021, 991, 1022, 1023,
+};
+#endif // CONFIG_EXT_TX
+
+// Neighborhood 2-tuples for various scans and blocksizes,
+// in {top, left} order for each position in corresponding scan order.
+DECLARE_ALIGNED(16, static const int16_t,
+ default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 4, 0, 1, 4, 4, 5, 5, 1, 8, 8, 5, 8, 2,
+ 2, 2, 5, 9, 12, 6, 9, 3, 6, 10, 13, 7, 10, 11, 14, 0, 0,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+ mcol_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 4, 4, 8, 8, 0, 0, 1, 4, 5, 8, 9, 12, 1,
+ 1, 2, 5, 6, 9, 10, 13, 2, 2, 3, 6, 7, 10, 11, 14, 0, 0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mrow_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 2, 5, 3, 6, 4,
+ 4, 5, 8, 6, 9, 7, 10, 8, 8, 9, 12, 10, 13, 11, 14, 0, 0,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t,
+ col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 4, 4, 4, 0, 8, 8, 1, 4, 5, 8, 5, 1, 9,
+ 12, 2, 5, 6, 9, 6, 2, 3, 6, 10, 13, 7, 10, 11, 14, 0, 0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 0, 1, 1, 1, 1, 4, 2, 2, 2, 5, 4, 5, 5,
+ 8, 3, 6, 8, 9, 6, 9, 9, 12, 7, 10, 10, 13, 11, 14, 0, 0,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+ default_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 0, 0, 1, 4, 1, 1, 4, 4, 2, 5, 5, 8, 6,
+ 9, 2, 2, 8, 8, 3, 6, 9, 12, 7, 10, 10, 13, 12, 12, 13, 16,
+ 11, 14, 14, 17, 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21,
+ 24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mcol_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 4, 4, 8, 8, 12, 12, 16, 16, 20, 20, 24, 24, 0,
+ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 1, 1,
+ 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 2, 2, 3,
+ 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mrow_scan_4x8_neighbors[33 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 1, 4, 2, 5, 3, 6, 4,
+ 4, 5, 8, 6, 9, 7, 10, 8, 8, 9, 12, 10, 13, 11, 14, 12, 12,
+ 13, 16, 14, 17, 15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21,
+ 24, 22, 25, 23, 26, 24, 24, 25, 28, 26, 29, 27, 30, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ default_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 0, 0, 1, 8, 1, 1, 8, 8, 2, 9, 9, 16, 10,
+ 17, 2, 2, 16, 16, 3, 10, 17, 24, 11, 18, 18, 25, 3, 3, 4, 11,
+ 19, 26, 12, 19, 4, 4, 20, 27, 5, 12, 13, 20, 21, 28, 5, 5, 6,
+ 13, 14, 21, 22, 29, 6, 6, 7, 14, 15, 22, 23, 30, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mcol_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 8, 8, 16, 16, 0, 0, 1, 8, 9, 16, 17, 24, 1,
+ 1, 2, 9, 10, 17, 18, 25, 2, 2, 3, 10, 11, 18, 19, 26, 3, 3,
+ 4, 11, 12, 19, 20, 27, 4, 4, 5, 12, 13, 20, 21, 28, 5, 5, 6,
+ 13, 14, 21, 22, 29, 6, 6, 7, 14, 15, 22, 23, 30, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mrow_scan_8x4_neighbors[33 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 0,
+ 0, 1, 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13, 7, 14, 8, 8,
+ 9, 16, 10, 17, 11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 16, 17,
+ 24, 18, 25, 19, 26, 20, 27, 21, 28, 22, 29, 23, 30, 0, 0
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t,
+ col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 8, 8, 8, 0, 16, 16, 1, 8, 24, 24, 9, 16, 9, 1, 32,
+ 32, 17, 24, 2, 9, 25, 32, 10, 17, 40, 40, 10, 2, 18, 25, 33, 40, 3, 10,
+ 48, 48, 11, 18, 26, 33, 11, 3, 41, 48, 19, 26, 34, 41, 4, 11, 27, 34, 12,
+ 19, 49, 56, 42, 49, 20, 27, 12, 4, 35, 42, 5, 12, 28, 35, 50, 57, 43, 50,
+ 13, 20, 36, 43, 13, 5, 21, 28, 51, 58, 29, 36, 6, 13, 44, 51, 14, 21, 14,
+ 6, 37, 44, 52, 59, 22, 29, 7, 14, 30, 37, 45, 52, 15, 22, 38, 45, 23, 30,
+ 53, 60, 31, 38, 46, 53, 39, 46, 54, 61, 47, 54, 55, 62, 0, 0,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+ mcol_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 8, 8, 16, 16, 24, 24, 32, 32, 40, 40, 48, 48, 0, 0, 1,
+ 8, 9, 16, 17, 24, 25, 32, 33, 40, 41, 48, 49, 56, 1, 1, 2, 9, 10, 17,
+ 18, 25, 26, 33, 34, 41, 42, 49, 50, 57, 2, 2, 3, 10, 11, 18, 19, 26, 27,
+ 34, 35, 42, 43, 50, 51, 58, 3, 3, 4, 11, 12, 19, 20, 27, 28, 35, 36, 43,
+ 44, 51, 52, 59, 4, 4, 5, 12, 13, 20, 21, 28, 29, 36, 37, 44, 45, 52, 53,
+ 60, 5, 5, 6, 13, 14, 21, 22, 29, 30, 37, 38, 45, 46, 53, 54, 61, 6, 6,
+ 7, 14, 15, 22, 23, 30, 31, 38, 39, 46, 47, 54, 55, 62, 0, 0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mrow_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 0, 0, 1,
+ 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13, 7, 14, 8, 8, 9, 16, 10, 17,
+ 11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 16, 17, 24, 18, 25, 19, 26, 20,
+ 27, 21, 28, 22, 29, 23, 30, 24, 24, 25, 32, 26, 33, 27, 34, 28, 35, 29, 36,
+ 30, 37, 31, 38, 32, 32, 33, 40, 34, 41, 35, 42, 36, 43, 37, 44, 38, 45, 39,
+ 46, 40, 40, 41, 48, 42, 49, 43, 50, 44, 51, 45, 52, 46, 53, 47, 54, 48, 48,
+ 49, 56, 50, 57, 51, 58, 52, 59, 53, 60, 54, 61, 55, 62, 0, 0,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t,
+ row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 0, 1, 1, 8, 2, 2, 8, 9, 2, 9, 3, 3, 9,
+ 16, 3, 10, 16, 17, 4, 4, 10, 17, 17, 24, 4, 11, 11, 18, 18, 25, 24, 25,
+ 5, 5, 5, 12, 12, 19, 25, 32, 19, 26, 6, 6, 26, 33, 32, 33, 13, 20, 20,
+ 27, 33, 40, 6, 13, 27, 34, 40, 41, 34, 41, 21, 28, 28, 35, 41, 48, 14, 21,
+ 35, 42, 7, 14, 48, 49, 29, 36, 42, 49, 36, 43, 22, 29, 49, 56, 15, 22, 43,
+ 50, 50, 57, 37, 44, 30, 37, 44, 51, 23, 30, 51, 58, 45, 52, 38, 45, 52, 59,
+ 31, 38, 53, 60, 39, 46, 46, 53, 47, 54, 54, 61, 55, 62, 0, 0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 8, 0, 8, 8, 1, 8, 9, 1, 9, 16, 16, 17, 2, 9, 10,
+ 2, 10, 17, 17, 24, 24, 25, 3, 10, 11, 3, 18, 25, 25, 32, 11, 18, 32, 33,
+ 4, 11, 26, 33, 19, 26, 12, 4, 33, 40, 12, 19, 40, 41, 5, 12, 27, 34, 34,
+ 41, 20, 27, 13, 20, 13, 5, 41, 48, 48, 49, 28, 35, 35, 42, 21, 28, 6, 6,
+ 6, 13, 42, 49, 49, 56, 36, 43, 14, 21, 29, 36, 7, 14, 43, 50, 50, 57, 22,
+ 29, 37, 44, 15, 22, 44, 51, 51, 58, 30, 37, 23, 30, 52, 59, 45, 52, 38, 45,
+ 31, 38, 53, 60, 46, 53, 39, 46, 54, 61, 47, 54, 55, 62, 0, 0,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+ default_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 0, 0, 1, 1, 1, 8, 8, 8, 2, 2, 2,
+ 9, 9, 16, 16, 16, 3, 3, 3, 10, 10, 17, 17, 24, 24, 24,
+ 4, 4, 4, 11, 11, 18, 18, 25, 25, 32, 32, 32, 5, 5, 5,
+ 12, 12, 19, 19, 26, 26, 33, 33, 40, 40, 40, 6, 6, 6, 13,
+ 13, 20, 20, 27, 27, 34, 34, 41, 41, 48, 48, 48, 7, 14, 14,
+ 21, 21, 28, 28, 35, 35, 42, 42, 49, 49, 56, 56, 56, 15, 22,
+ 22, 29, 29, 36, 36, 43, 43, 50, 50, 57, 57, 64, 64, 64, 23,
+ 30, 30, 37, 37, 44, 44, 51, 51, 58, 58, 65, 65, 72, 72, 72,
+ 31, 38, 38, 45, 45, 52, 52, 59, 59, 66, 66, 73, 73, 80, 80,
+ 80, 39, 46, 46, 53, 53, 60, 60, 67, 67, 74, 74, 81, 81, 88,
+ 88, 88, 47, 54, 54, 61, 61, 68, 68, 75, 75, 82, 82, 89, 89,
+ 96, 96, 96, 55, 62, 62, 69, 69, 76, 76, 83, 83, 90, 90, 97,
+ 97, 104, 104, 104, 63, 70, 70, 77, 77, 84, 84, 91, 91, 98, 98,
+ 105, 105, 112, 112, 112, 71, 78, 78, 85, 85, 92, 92, 99, 99, 106,
+ 106, 113, 113, 120, 79, 86, 86, 93, 93, 100, 100, 107, 107, 114, 114,
+ 121, 87, 94, 94, 101, 101, 108, 108, 115, 115, 122, 95, 102, 102, 109,
+ 109, 116, 116, 123, 103, 110, 110, 117, 117, 124, 111, 118, 118, 125, 119,
+ 126, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ default_scan_16x8_neighbors[129 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 0, 0, 1, 1, 1, 16, 16, 16, 2, 2, 2,
+ 17, 17, 32, 32, 32, 3, 3, 3, 18, 18, 33, 33, 48, 48, 48,
+ 4, 4, 4, 19, 19, 34, 34, 49, 49, 64, 64, 64, 5, 5, 5,
+ 20, 20, 35, 35, 50, 50, 65, 65, 80, 80, 80, 6, 6, 6, 21,
+ 21, 36, 36, 51, 51, 66, 66, 81, 81, 96, 96, 96, 7, 7, 7,
+ 22, 22, 37, 37, 52, 52, 67, 67, 82, 82, 97, 97, 112, 8, 8,
+ 8, 23, 23, 38, 38, 53, 53, 68, 68, 83, 83, 98, 98, 113, 9,
+ 9, 9, 24, 24, 39, 39, 54, 54, 69, 69, 84, 84, 99, 99, 114,
+ 10, 10, 10, 25, 25, 40, 40, 55, 55, 70, 70, 85, 85, 100, 100,
+ 115, 11, 11, 11, 26, 26, 41, 41, 56, 56, 71, 71, 86, 86, 101,
+ 101, 116, 12, 12, 12, 27, 27, 42, 42, 57, 57, 72, 72, 87, 87,
+ 102, 102, 117, 13, 13, 13, 28, 28, 43, 43, 58, 58, 73, 73, 88,
+ 88, 103, 103, 118, 14, 14, 14, 29, 29, 44, 44, 59, 59, 74, 74,
+ 89, 89, 104, 104, 119, 15, 30, 30, 45, 45, 60, 60, 75, 75, 90,
+ 90, 105, 105, 120, 31, 46, 46, 61, 61, 76, 76, 91, 91, 106, 106,
+ 121, 47, 62, 62, 77, 77, 92, 92, 107, 107, 122, 63, 78, 78, 93,
+ 93, 108, 108, 123, 79, 94, 94, 109, 109, 124, 95, 110, 110, 125, 111,
+ 126, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mcol_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 8, 8, 16, 16, 24, 24, 32, 32, 40, 40, 48, 48,
+ 56, 56, 64, 64, 72, 72, 80, 80, 88, 88, 96, 96, 104, 104, 112, 112,
+ 0, 0, 1, 8, 9, 16, 17, 24, 25, 32, 33, 40, 41, 48, 49, 56,
+ 57, 64, 65, 72, 73, 80, 81, 88, 89, 96, 97, 104, 105, 112, 113, 120,
+ 1, 1, 2, 9, 10, 17, 18, 25, 26, 33, 34, 41, 42, 49, 50, 57,
+ 58, 65, 66, 73, 74, 81, 82, 89, 90, 97, 98, 105, 106, 113, 114, 121,
+ 2, 2, 3, 10, 11, 18, 19, 26, 27, 34, 35, 42, 43, 50, 51, 58,
+ 59, 66, 67, 74, 75, 82, 83, 90, 91, 98, 99, 106, 107, 114, 115, 122,
+ 3, 3, 4, 11, 12, 19, 20, 27, 28, 35, 36, 43, 44, 51, 52, 59,
+ 60, 67, 68, 75, 76, 83, 84, 91, 92, 99, 100, 107, 108, 115, 116, 123,
+ 4, 4, 5, 12, 13, 20, 21, 28, 29, 36, 37, 44, 45, 52, 53, 60,
+ 61, 68, 69, 76, 77, 84, 85, 92, 93, 100, 101, 108, 109, 116, 117, 124,
+ 5, 5, 6, 13, 14, 21, 22, 29, 30, 37, 38, 45, 46, 53, 54, 61,
+ 62, 69, 70, 77, 78, 85, 86, 93, 94, 101, 102, 109, 110, 117, 118, 125,
+ 6, 6, 7, 14, 15, 22, 23, 30, 31, 38, 39, 46, 47, 54, 55, 62,
+ 63, 70, 71, 78, 79, 86, 87, 94, 95, 102, 103, 110, 111, 118, 119, 126,
+ 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mcol_scan_16x8_neighbors[129 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96, 96,
+ 0, 0, 1, 16, 17, 32, 33, 48, 49, 64, 65, 80, 81, 96, 97, 112,
+ 1, 1, 2, 17, 18, 33, 34, 49, 50, 65, 66, 81, 82, 97, 98, 113,
+ 2, 2, 3, 18, 19, 34, 35, 50, 51, 66, 67, 82, 83, 98, 99, 114,
+ 3, 3, 4, 19, 20, 35, 36, 51, 52, 67, 68, 83, 84, 99, 100, 115,
+ 4, 4, 5, 20, 21, 36, 37, 52, 53, 68, 69, 84, 85, 100, 101, 116,
+ 5, 5, 6, 21, 22, 37, 38, 53, 54, 69, 70, 85, 86, 101, 102, 117,
+ 6, 6, 7, 22, 23, 38, 39, 54, 55, 70, 71, 86, 87, 102, 103, 118,
+ 7, 7, 8, 23, 24, 39, 40, 55, 56, 71, 72, 87, 88, 103, 104, 119,
+ 8, 8, 9, 24, 25, 40, 41, 56, 57, 72, 73, 88, 89, 104, 105, 120,
+ 9, 9, 10, 25, 26, 41, 42, 57, 58, 73, 74, 89, 90, 105, 106, 121,
+ 10, 10, 11, 26, 27, 42, 43, 58, 59, 74, 75, 90, 91, 106, 107, 122,
+ 11, 11, 12, 27, 28, 43, 44, 59, 60, 75, 76, 91, 92, 107, 108, 123,
+ 12, 12, 13, 28, 29, 44, 45, 60, 61, 76, 77, 92, 93, 108, 109, 124,
+ 13, 13, 14, 29, 30, 45, 46, 61, 62, 77, 78, 93, 94, 109, 110, 125,
+ 14, 14, 15, 30, 31, 46, 47, 62, 63, 78, 79, 94, 95, 110, 111, 126,
+ 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mrow_scan_8x16_neighbors[129 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
+ 6, 0, 0, 1, 8, 2, 9, 3, 10, 4, 11, 5, 12, 6, 13,
+ 7, 14, 8, 8, 9, 16, 10, 17, 11, 18, 12, 19, 13, 20, 14,
+ 21, 15, 22, 16, 16, 17, 24, 18, 25, 19, 26, 20, 27, 21, 28,
+ 22, 29, 23, 30, 24, 24, 25, 32, 26, 33, 27, 34, 28, 35, 29,
+ 36, 30, 37, 31, 38, 32, 32, 33, 40, 34, 41, 35, 42, 36, 43,
+ 37, 44, 38, 45, 39, 46, 40, 40, 41, 48, 42, 49, 43, 50, 44,
+ 51, 45, 52, 46, 53, 47, 54, 48, 48, 49, 56, 50, 57, 51, 58,
+ 52, 59, 53, 60, 54, 61, 55, 62, 56, 56, 57, 64, 58, 65, 59,
+ 66, 60, 67, 61, 68, 62, 69, 63, 70, 64, 64, 65, 72, 66, 73,
+ 67, 74, 68, 75, 69, 76, 70, 77, 71, 78, 72, 72, 73, 80, 74,
+ 81, 75, 82, 76, 83, 77, 84, 78, 85, 79, 86, 80, 80, 81, 88,
+ 82, 89, 83, 90, 84, 91, 85, 92, 86, 93, 87, 94, 88, 88, 89,
+ 96, 90, 97, 91, 98, 92, 99, 93, 100, 94, 101, 95, 102, 96, 96,
+ 97, 104, 98, 105, 99, 106, 100, 107, 101, 108, 102, 109, 103, 110, 104,
+ 104, 105, 112, 106, 113, 107, 114, 108, 115, 109, 116, 110, 117, 111, 118,
+ 112, 112, 113, 120, 114, 121, 115, 122, 116, 123, 117, 124, 118, 125, 119,
+ 126, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mrow_scan_16x8_neighbors[129 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
+ 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
+ 14, 14, 0, 0, 1, 16, 2, 17, 3, 18, 4, 19, 5, 20, 6,
+ 21, 7, 22, 8, 23, 9, 24, 10, 25, 11, 26, 12, 27, 13, 28,
+ 14, 29, 15, 30, 16, 16, 17, 32, 18, 33, 19, 34, 20, 35, 21,
+ 36, 22, 37, 23, 38, 24, 39, 25, 40, 26, 41, 27, 42, 28, 43,
+ 29, 44, 30, 45, 31, 46, 32, 32, 33, 48, 34, 49, 35, 50, 36,
+ 51, 37, 52, 38, 53, 39, 54, 40, 55, 41, 56, 42, 57, 43, 58,
+ 44, 59, 45, 60, 46, 61, 47, 62, 48, 48, 49, 64, 50, 65, 51,
+ 66, 52, 67, 53, 68, 54, 69, 55, 70, 56, 71, 57, 72, 58, 73,
+ 59, 74, 60, 75, 61, 76, 62, 77, 63, 78, 64, 64, 65, 80, 66,
+ 81, 67, 82, 68, 83, 69, 84, 70, 85, 71, 86, 72, 87, 73, 88,
+ 74, 89, 75, 90, 76, 91, 77, 92, 78, 93, 79, 94, 80, 80, 81,
+ 96, 82, 97, 83, 98, 84, 99, 85, 100, 86, 101, 87, 102, 88, 103,
+ 89, 104, 90, 105, 91, 106, 92, 107, 93, 108, 94, 109, 95, 110, 96,
+ 96, 97, 112, 98, 113, 99, 114, 100, 115, 101, 116, 102, 117, 103, 118,
+ 104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111,
+ 126, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ default_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 0, 0, 1, 1, 1, 16, 16, 16, 2, 2, 2,
+ 17, 17, 32, 32, 32, 3, 3, 3, 18, 18, 33, 33, 48, 48, 48,
+ 4, 4, 4, 19, 19, 34, 34, 49, 49, 64, 64, 64, 5, 5, 5,
+ 20, 20, 35, 35, 50, 50, 65, 65, 80, 80, 80, 6, 6, 6, 21,
+ 21, 36, 36, 51, 51, 66, 66, 81, 81, 96, 96, 96, 7, 7, 7,
+ 22, 22, 37, 37, 52, 52, 67, 67, 82, 82, 97, 97, 112, 112, 112,
+ 8, 8, 8, 23, 23, 38, 38, 53, 53, 68, 68, 83, 83, 98, 98,
+ 113, 113, 128, 128, 128, 9, 9, 9, 24, 24, 39, 39, 54, 54, 69,
+ 69, 84, 84, 99, 99, 114, 114, 129, 129, 144, 144, 144, 10, 10, 10,
+ 25, 25, 40, 40, 55, 55, 70, 70, 85, 85, 100, 100, 115, 115, 130,
+ 130, 145, 145, 160, 160, 160, 11, 11, 11, 26, 26, 41, 41, 56, 56,
+ 71, 71, 86, 86, 101, 101, 116, 116, 131, 131, 146, 146, 161, 161, 176,
+ 176, 176, 12, 12, 12, 27, 27, 42, 42, 57, 57, 72, 72, 87, 87,
+ 102, 102, 117, 117, 132, 132, 147, 147, 162, 162, 177, 177, 192, 192, 192,
+ 13, 13, 13, 28, 28, 43, 43, 58, 58, 73, 73, 88, 88, 103, 103,
+ 118, 118, 133, 133, 148, 148, 163, 163, 178, 178, 193, 193, 208, 208, 208,
+ 14, 14, 14, 29, 29, 44, 44, 59, 59, 74, 74, 89, 89, 104, 104,
+ 119, 119, 134, 134, 149, 149, 164, 164, 179, 179, 194, 194, 209, 209, 224,
+ 224, 224, 15, 30, 30, 45, 45, 60, 60, 75, 75, 90, 90, 105, 105,
+ 120, 120, 135, 135, 150, 150, 165, 165, 180, 180, 195, 195, 210, 210, 225,
+ 225, 240, 240, 240, 31, 46, 46, 61, 61, 76, 76, 91, 91, 106, 106,
+ 121, 121, 136, 136, 151, 151, 166, 166, 181, 181, 196, 196, 211, 211, 226,
+ 226, 241, 241, 256, 256, 256, 47, 62, 62, 77, 77, 92, 92, 107, 107,
+ 122, 122, 137, 137, 152, 152, 167, 167, 182, 182, 197, 197, 212, 212, 227,
+ 227, 242, 242, 257, 257, 272, 272, 272, 63, 78, 78, 93, 93, 108, 108,
+ 123, 123, 138, 138, 153, 153, 168, 168, 183, 183, 198, 198, 213, 213, 228,
+ 228, 243, 243, 258, 258, 273, 273, 288, 288, 288, 79, 94, 94, 109, 109,
+ 124, 124, 139, 139, 154, 154, 169, 169, 184, 184, 199, 199, 214, 214, 229,
+ 229, 244, 244, 259, 259, 274, 274, 289, 289, 304, 304, 304, 95, 110, 110,
+ 125, 125, 140, 140, 155, 155, 170, 170, 185, 185, 200, 200, 215, 215, 230,
+ 230, 245, 245, 260, 260, 275, 275, 290, 290, 305, 305, 320, 320, 320, 111,
+ 126, 126, 141, 141, 156, 156, 171, 171, 186, 186, 201, 201, 216, 216, 231,
+ 231, 246, 246, 261, 261, 276, 276, 291, 291, 306, 306, 321, 321, 336, 336,
+ 336, 127, 142, 142, 157, 157, 172, 172, 187, 187, 202, 202, 217, 217, 232,
+ 232, 247, 247, 262, 262, 277, 277, 292, 292, 307, 307, 322, 322, 337, 337,
+ 352, 352, 352, 143, 158, 158, 173, 173, 188, 188, 203, 203, 218, 218, 233,
+ 233, 248, 248, 263, 263, 278, 278, 293, 293, 308, 308, 323, 323, 338, 338,
+ 353, 353, 368, 368, 368, 159, 174, 174, 189, 189, 204, 204, 219, 219, 234,
+ 234, 249, 249, 264, 264, 279, 279, 294, 294, 309, 309, 324, 324, 339, 339,
+ 354, 354, 369, 369, 384, 384, 384, 175, 190, 190, 205, 205, 220, 220, 235,
+ 235, 250, 250, 265, 265, 280, 280, 295, 295, 310, 310, 325, 325, 340, 340,
+ 355, 355, 370, 370, 385, 385, 400, 400, 400, 191, 206, 206, 221, 221, 236,
+ 236, 251, 251, 266, 266, 281, 281, 296, 296, 311, 311, 326, 326, 341, 341,
+ 356, 356, 371, 371, 386, 386, 401, 401, 416, 416, 416, 207, 222, 222, 237,
+ 237, 252, 252, 267, 267, 282, 282, 297, 297, 312, 312, 327, 327, 342, 342,
+ 357, 357, 372, 372, 387, 387, 402, 402, 417, 417, 432, 432, 432, 223, 238,
+ 238, 253, 253, 268, 268, 283, 283, 298, 298, 313, 313, 328, 328, 343, 343,
+ 358, 358, 373, 373, 388, 388, 403, 403, 418, 418, 433, 433, 448, 448, 448,
+ 239, 254, 254, 269, 269, 284, 284, 299, 299, 314, 314, 329, 329, 344, 344,
+ 359, 359, 374, 374, 389, 389, 404, 404, 419, 419, 434, 434, 449, 449, 464,
+ 464, 464, 255, 270, 270, 285, 285, 300, 300, 315, 315, 330, 330, 345, 345,
+ 360, 360, 375, 375, 390, 390, 405, 405, 420, 420, 435, 435, 450, 450, 465,
+ 465, 480, 480, 480, 271, 286, 286, 301, 301, 316, 316, 331, 331, 346, 346,
+ 361, 361, 376, 376, 391, 391, 406, 406, 421, 421, 436, 436, 451, 451, 466,
+ 466, 481, 481, 496, 287, 302, 302, 317, 317, 332, 332, 347, 347, 362, 362,
+ 377, 377, 392, 392, 407, 407, 422, 422, 437, 437, 452, 452, 467, 467, 482,
+ 482, 497, 303, 318, 318, 333, 333, 348, 348, 363, 363, 378, 378, 393, 393,
+ 408, 408, 423, 423, 438, 438, 453, 453, 468, 468, 483, 483, 498, 319, 334,
+ 334, 349, 349, 364, 364, 379, 379, 394, 394, 409, 409, 424, 424, 439, 439,
+ 454, 454, 469, 469, 484, 484, 499, 335, 350, 350, 365, 365, 380, 380, 395,
+ 395, 410, 410, 425, 425, 440, 440, 455, 455, 470, 470, 485, 485, 500, 351,
+ 366, 366, 381, 381, 396, 396, 411, 411, 426, 426, 441, 441, 456, 456, 471,
+ 471, 486, 486, 501, 367, 382, 382, 397, 397, 412, 412, 427, 427, 442, 442,
+ 457, 457, 472, 472, 487, 487, 502, 383, 398, 398, 413, 413, 428, 428, 443,
+ 443, 458, 458, 473, 473, 488, 488, 503, 399, 414, 414, 429, 429, 444, 444,
+ 459, 459, 474, 474, 489, 489, 504, 415, 430, 430, 445, 445, 460, 460, 475,
+ 475, 490, 490, 505, 431, 446, 446, 461, 461, 476, 476, 491, 491, 506, 447,
+ 462, 462, 477, 477, 492, 492, 507, 463, 478, 478, 493, 493, 508, 479, 494,
+ 494, 509, 495, 510, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ default_scan_32x16_neighbors[513 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 0, 0, 1, 1, 1, 32, 32, 32, 2, 2, 2,
+ 33, 33, 64, 64, 64, 3, 3, 3, 34, 34, 65, 65, 96, 96, 96,
+ 4, 4, 4, 35, 35, 66, 66, 97, 97, 128, 128, 128, 5, 5, 5,
+ 36, 36, 67, 67, 98, 98, 129, 129, 160, 160, 160, 6, 6, 6, 37,
+ 37, 68, 68, 99, 99, 130, 130, 161, 161, 192, 192, 192, 7, 7, 7,
+ 38, 38, 69, 69, 100, 100, 131, 131, 162, 162, 193, 193, 224, 224, 224,
+ 8, 8, 8, 39, 39, 70, 70, 101, 101, 132, 132, 163, 163, 194, 194,
+ 225, 225, 256, 256, 256, 9, 9, 9, 40, 40, 71, 71, 102, 102, 133,
+ 133, 164, 164, 195, 195, 226, 226, 257, 257, 288, 288, 288, 10, 10, 10,
+ 41, 41, 72, 72, 103, 103, 134, 134, 165, 165, 196, 196, 227, 227, 258,
+ 258, 289, 289, 320, 320, 320, 11, 11, 11, 42, 42, 73, 73, 104, 104,
+ 135, 135, 166, 166, 197, 197, 228, 228, 259, 259, 290, 290, 321, 321, 352,
+ 352, 352, 12, 12, 12, 43, 43, 74, 74, 105, 105, 136, 136, 167, 167,
+ 198, 198, 229, 229, 260, 260, 291, 291, 322, 322, 353, 353, 384, 384, 384,
+ 13, 13, 13, 44, 44, 75, 75, 106, 106, 137, 137, 168, 168, 199, 199,
+ 230, 230, 261, 261, 292, 292, 323, 323, 354, 354, 385, 385, 416, 416, 416,
+ 14, 14, 14, 45, 45, 76, 76, 107, 107, 138, 138, 169, 169, 200, 200,
+ 231, 231, 262, 262, 293, 293, 324, 324, 355, 355, 386, 386, 417, 417, 448,
+ 448, 448, 15, 15, 15, 46, 46, 77, 77, 108, 108, 139, 139, 170, 170,
+ 201, 201, 232, 232, 263, 263, 294, 294, 325, 325, 356, 356, 387, 387, 418,
+ 418, 449, 449, 480, 16, 16, 16, 47, 47, 78, 78, 109, 109, 140, 140,
+ 171, 171, 202, 202, 233, 233, 264, 264, 295, 295, 326, 326, 357, 357, 388,
+ 388, 419, 419, 450, 450, 481, 17, 17, 17, 48, 48, 79, 79, 110, 110,
+ 141, 141, 172, 172, 203, 203, 234, 234, 265, 265, 296, 296, 327, 327, 358,
+ 358, 389, 389, 420, 420, 451, 451, 482, 18, 18, 18, 49, 49, 80, 80,
+ 111, 111, 142, 142, 173, 173, 204, 204, 235, 235, 266, 266, 297, 297, 328,
+ 328, 359, 359, 390, 390, 421, 421, 452, 452, 483, 19, 19, 19, 50, 50,
+ 81, 81, 112, 112, 143, 143, 174, 174, 205, 205, 236, 236, 267, 267, 298,
+ 298, 329, 329, 360, 360, 391, 391, 422, 422, 453, 453, 484, 20, 20, 20,
+ 51, 51, 82, 82, 113, 113, 144, 144, 175, 175, 206, 206, 237, 237, 268,
+ 268, 299, 299, 330, 330, 361, 361, 392, 392, 423, 423, 454, 454, 485, 21,
+ 21, 21, 52, 52, 83, 83, 114, 114, 145, 145, 176, 176, 207, 207, 238,
+ 238, 269, 269, 300, 300, 331, 331, 362, 362, 393, 393, 424, 424, 455, 455,
+ 486, 22, 22, 22, 53, 53, 84, 84, 115, 115, 146, 146, 177, 177, 208,
+ 208, 239, 239, 270, 270, 301, 301, 332, 332, 363, 363, 394, 394, 425, 425,
+ 456, 456, 487, 23, 23, 23, 54, 54, 85, 85, 116, 116, 147, 147, 178,
+ 178, 209, 209, 240, 240, 271, 271, 302, 302, 333, 333, 364, 364, 395, 395,
+ 426, 426, 457, 457, 488, 24, 24, 24, 55, 55, 86, 86, 117, 117, 148,
+ 148, 179, 179, 210, 210, 241, 241, 272, 272, 303, 303, 334, 334, 365, 365,
+ 396, 396, 427, 427, 458, 458, 489, 25, 25, 25, 56, 56, 87, 87, 118,
+ 118, 149, 149, 180, 180, 211, 211, 242, 242, 273, 273, 304, 304, 335, 335,
+ 366, 366, 397, 397, 428, 428, 459, 459, 490, 26, 26, 26, 57, 57, 88,
+ 88, 119, 119, 150, 150, 181, 181, 212, 212, 243, 243, 274, 274, 305, 305,
+ 336, 336, 367, 367, 398, 398, 429, 429, 460, 460, 491, 27, 27, 27, 58,
+ 58, 89, 89, 120, 120, 151, 151, 182, 182, 213, 213, 244, 244, 275, 275,
+ 306, 306, 337, 337, 368, 368, 399, 399, 430, 430, 461, 461, 492, 28, 28,
+ 28, 59, 59, 90, 90, 121, 121, 152, 152, 183, 183, 214, 214, 245, 245,
+ 276, 276, 307, 307, 338, 338, 369, 369, 400, 400, 431, 431, 462, 462, 493,
+ 29, 29, 29, 60, 60, 91, 91, 122, 122, 153, 153, 184, 184, 215, 215,
+ 246, 246, 277, 277, 308, 308, 339, 339, 370, 370, 401, 401, 432, 432, 463,
+ 463, 494, 30, 30, 30, 61, 61, 92, 92, 123, 123, 154, 154, 185, 185,
+ 216, 216, 247, 247, 278, 278, 309, 309, 340, 340, 371, 371, 402, 402, 433,
+ 433, 464, 464, 495, 31, 62, 62, 93, 93, 124, 124, 155, 155, 186, 186,
+ 217, 217, 248, 248, 279, 279, 310, 310, 341, 341, 372, 372, 403, 403, 434,
+ 434, 465, 465, 496, 63, 94, 94, 125, 125, 156, 156, 187, 187, 218, 218,
+ 249, 249, 280, 280, 311, 311, 342, 342, 373, 373, 404, 404, 435, 435, 466,
+ 466, 497, 95, 126, 126, 157, 157, 188, 188, 219, 219, 250, 250, 281, 281,
+ 312, 312, 343, 343, 374, 374, 405, 405, 436, 436, 467, 467, 498, 127, 158,
+ 158, 189, 189, 220, 220, 251, 251, 282, 282, 313, 313, 344, 344, 375, 375,
+ 406, 406, 437, 437, 468, 468, 499, 159, 190, 190, 221, 221, 252, 252, 283,
+ 283, 314, 314, 345, 345, 376, 376, 407, 407, 438, 438, 469, 469, 500, 191,
+ 222, 222, 253, 253, 284, 284, 315, 315, 346, 346, 377, 377, 408, 408, 439,
+ 439, 470, 470, 501, 223, 254, 254, 285, 285, 316, 316, 347, 347, 378, 378,
+ 409, 409, 440, 440, 471, 471, 502, 255, 286, 286, 317, 317, 348, 348, 379,
+ 379, 410, 410, 441, 441, 472, 472, 503, 287, 318, 318, 349, 349, 380, 380,
+ 411, 411, 442, 442, 473, 473, 504, 319, 350, 350, 381, 381, 412, 412, 443,
+ 443, 474, 474, 505, 351, 382, 382, 413, 413, 444, 444, 475, 475, 506, 383,
+ 414, 414, 445, 445, 476, 476, 507, 415, 446, 446, 477, 477, 508, 447, 478,
+ 478, 509, 479, 510, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mcol_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96,
+ 96, 112, 112, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 208, 208,
+ 224, 224, 240, 240, 256, 256, 272, 272, 288, 288, 304, 304, 320, 320, 336,
+ 336, 352, 352, 368, 368, 384, 384, 400, 400, 416, 416, 432, 432, 448, 448,
+ 464, 464, 480, 480, 0, 0, 1, 16, 17, 32, 33, 48, 49, 64, 65,
+ 80, 81, 96, 97, 112, 113, 128, 129, 144, 145, 160, 161, 176, 177, 192,
+ 193, 208, 209, 224, 225, 240, 241, 256, 257, 272, 273, 288, 289, 304, 305,
+ 320, 321, 336, 337, 352, 353, 368, 369, 384, 385, 400, 401, 416, 417, 432,
+ 433, 448, 449, 464, 465, 480, 481, 496, 1, 1, 2, 17, 18, 33, 34,
+ 49, 50, 65, 66, 81, 82, 97, 98, 113, 114, 129, 130, 145, 146, 161,
+ 162, 177, 178, 193, 194, 209, 210, 225, 226, 241, 242, 257, 258, 273, 274,
+ 289, 290, 305, 306, 321, 322, 337, 338, 353, 354, 369, 370, 385, 386, 401,
+ 402, 417, 418, 433, 434, 449, 450, 465, 466, 481, 482, 497, 2, 2, 3,
+ 18, 19, 34, 35, 50, 51, 66, 67, 82, 83, 98, 99, 114, 115, 130,
+ 131, 146, 147, 162, 163, 178, 179, 194, 195, 210, 211, 226, 227, 242, 243,
+ 258, 259, 274, 275, 290, 291, 306, 307, 322, 323, 338, 339, 354, 355, 370,
+ 371, 386, 387, 402, 403, 418, 419, 434, 435, 450, 451, 466, 467, 482, 483,
+ 498, 3, 3, 4, 19, 20, 35, 36, 51, 52, 67, 68, 83, 84, 99,
+ 100, 115, 116, 131, 132, 147, 148, 163, 164, 179, 180, 195, 196, 211, 212,
+ 227, 228, 243, 244, 259, 260, 275, 276, 291, 292, 307, 308, 323, 324, 339,
+ 340, 355, 356, 371, 372, 387, 388, 403, 404, 419, 420, 435, 436, 451, 452,
+ 467, 468, 483, 484, 499, 4, 4, 5, 20, 21, 36, 37, 52, 53, 68,
+ 69, 84, 85, 100, 101, 116, 117, 132, 133, 148, 149, 164, 165, 180, 181,
+ 196, 197, 212, 213, 228, 229, 244, 245, 260, 261, 276, 277, 292, 293, 308,
+ 309, 324, 325, 340, 341, 356, 357, 372, 373, 388, 389, 404, 405, 420, 421,
+ 436, 437, 452, 453, 468, 469, 484, 485, 500, 5, 5, 6, 21, 22, 37,
+ 38, 53, 54, 69, 70, 85, 86, 101, 102, 117, 118, 133, 134, 149, 150,
+ 165, 166, 181, 182, 197, 198, 213, 214, 229, 230, 245, 246, 261, 262, 277,
+ 278, 293, 294, 309, 310, 325, 326, 341, 342, 357, 358, 373, 374, 389, 390,
+ 405, 406, 421, 422, 437, 438, 453, 454, 469, 470, 485, 486, 501, 6, 6,
+ 7, 22, 23, 38, 39, 54, 55, 70, 71, 86, 87, 102, 103, 118, 119,
+ 134, 135, 150, 151, 166, 167, 182, 183, 198, 199, 214, 215, 230, 231, 246,
+ 247, 262, 263, 278, 279, 294, 295, 310, 311, 326, 327, 342, 343, 358, 359,
+ 374, 375, 390, 391, 406, 407, 422, 423, 438, 439, 454, 455, 470, 471, 486,
+ 487, 502, 7, 7, 8, 23, 24, 39, 40, 55, 56, 71, 72, 87, 88,
+ 103, 104, 119, 120, 135, 136, 151, 152, 167, 168, 183, 184, 199, 200, 215,
+ 216, 231, 232, 247, 248, 263, 264, 279, 280, 295, 296, 311, 312, 327, 328,
+ 343, 344, 359, 360, 375, 376, 391, 392, 407, 408, 423, 424, 439, 440, 455,
+ 456, 471, 472, 487, 488, 503, 8, 8, 9, 24, 25, 40, 41, 56, 57,
+ 72, 73, 88, 89, 104, 105, 120, 121, 136, 137, 152, 153, 168, 169, 184,
+ 185, 200, 201, 216, 217, 232, 233, 248, 249, 264, 265, 280, 281, 296, 297,
+ 312, 313, 328, 329, 344, 345, 360, 361, 376, 377, 392, 393, 408, 409, 424,
+ 425, 440, 441, 456, 457, 472, 473, 488, 489, 504, 9, 9, 10, 25, 26,
+ 41, 42, 57, 58, 73, 74, 89, 90, 105, 106, 121, 122, 137, 138, 153,
+ 154, 169, 170, 185, 186, 201, 202, 217, 218, 233, 234, 249, 250, 265, 266,
+ 281, 282, 297, 298, 313, 314, 329, 330, 345, 346, 361, 362, 377, 378, 393,
+ 394, 409, 410, 425, 426, 441, 442, 457, 458, 473, 474, 489, 490, 505, 10,
+ 10, 11, 26, 27, 42, 43, 58, 59, 74, 75, 90, 91, 106, 107, 122,
+ 123, 138, 139, 154, 155, 170, 171, 186, 187, 202, 203, 218, 219, 234, 235,
+ 250, 251, 266, 267, 282, 283, 298, 299, 314, 315, 330, 331, 346, 347, 362,
+ 363, 378, 379, 394, 395, 410, 411, 426, 427, 442, 443, 458, 459, 474, 475,
+ 490, 491, 506, 11, 11, 12, 27, 28, 43, 44, 59, 60, 75, 76, 91,
+ 92, 107, 108, 123, 124, 139, 140, 155, 156, 171, 172, 187, 188, 203, 204,
+ 219, 220, 235, 236, 251, 252, 267, 268, 283, 284, 299, 300, 315, 316, 331,
+ 332, 347, 348, 363, 364, 379, 380, 395, 396, 411, 412, 427, 428, 443, 444,
+ 459, 460, 475, 476, 491, 492, 507, 12, 12, 13, 28, 29, 44, 45, 60,
+ 61, 76, 77, 92, 93, 108, 109, 124, 125, 140, 141, 156, 157, 172, 173,
+ 188, 189, 204, 205, 220, 221, 236, 237, 252, 253, 268, 269, 284, 285, 300,
+ 301, 316, 317, 332, 333, 348, 349, 364, 365, 380, 381, 396, 397, 412, 413,
+ 428, 429, 444, 445, 460, 461, 476, 477, 492, 493, 508, 13, 13, 14, 29,
+ 30, 45, 46, 61, 62, 77, 78, 93, 94, 109, 110, 125, 126, 141, 142,
+ 157, 158, 173, 174, 189, 190, 205, 206, 221, 222, 237, 238, 253, 254, 269,
+ 270, 285, 286, 301, 302, 317, 318, 333, 334, 349, 350, 365, 366, 381, 382,
+ 397, 398, 413, 414, 429, 430, 445, 446, 461, 462, 477, 478, 493, 494, 509,
+ 14, 14, 15, 30, 31, 46, 47, 62, 63, 78, 79, 94, 95, 110, 111,
+ 126, 127, 142, 143, 158, 159, 174, 175, 190, 191, 206, 207, 222, 223, 238,
+ 239, 254, 255, 270, 271, 286, 287, 302, 303, 318, 319, 334, 335, 350, 351,
+ 366, 367, 382, 383, 398, 399, 414, 415, 430, 431, 446, 447, 462, 463, 478,
+ 479, 494, 495, 510, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mcol_scan_32x16_neighbors[513 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 32, 32, 64, 64, 96, 96, 128, 128, 160, 160, 192,
+ 192, 224, 224, 256, 256, 288, 288, 320, 320, 352, 352, 384, 384, 416, 416,
+ 448, 448, 0, 0, 1, 32, 33, 64, 65, 96, 97, 128, 129, 160, 161,
+ 192, 193, 224, 225, 256, 257, 288, 289, 320, 321, 352, 353, 384, 385, 416,
+ 417, 448, 449, 480, 1, 1, 2, 33, 34, 65, 66, 97, 98, 129, 130,
+ 161, 162, 193, 194, 225, 226, 257, 258, 289, 290, 321, 322, 353, 354, 385,
+ 386, 417, 418, 449, 450, 481, 2, 2, 3, 34, 35, 66, 67, 98, 99,
+ 130, 131, 162, 163, 194, 195, 226, 227, 258, 259, 290, 291, 322, 323, 354,
+ 355, 386, 387, 418, 419, 450, 451, 482, 3, 3, 4, 35, 36, 67, 68,
+ 99, 100, 131, 132, 163, 164, 195, 196, 227, 228, 259, 260, 291, 292, 323,
+ 324, 355, 356, 387, 388, 419, 420, 451, 452, 483, 4, 4, 5, 36, 37,
+ 68, 69, 100, 101, 132, 133, 164, 165, 196, 197, 228, 229, 260, 261, 292,
+ 293, 324, 325, 356, 357, 388, 389, 420, 421, 452, 453, 484, 5, 5, 6,
+ 37, 38, 69, 70, 101, 102, 133, 134, 165, 166, 197, 198, 229, 230, 261,
+ 262, 293, 294, 325, 326, 357, 358, 389, 390, 421, 422, 453, 454, 485, 6,
+ 6, 7, 38, 39, 70, 71, 102, 103, 134, 135, 166, 167, 198, 199, 230,
+ 231, 262, 263, 294, 295, 326, 327, 358, 359, 390, 391, 422, 423, 454, 455,
+ 486, 7, 7, 8, 39, 40, 71, 72, 103, 104, 135, 136, 167, 168, 199,
+ 200, 231, 232, 263, 264, 295, 296, 327, 328, 359, 360, 391, 392, 423, 424,
+ 455, 456, 487, 8, 8, 9, 40, 41, 72, 73, 104, 105, 136, 137, 168,
+ 169, 200, 201, 232, 233, 264, 265, 296, 297, 328, 329, 360, 361, 392, 393,
+ 424, 425, 456, 457, 488, 9, 9, 10, 41, 42, 73, 74, 105, 106, 137,
+ 138, 169, 170, 201, 202, 233, 234, 265, 266, 297, 298, 329, 330, 361, 362,
+ 393, 394, 425, 426, 457, 458, 489, 10, 10, 11, 42, 43, 74, 75, 106,
+ 107, 138, 139, 170, 171, 202, 203, 234, 235, 266, 267, 298, 299, 330, 331,
+ 362, 363, 394, 395, 426, 427, 458, 459, 490, 11, 11, 12, 43, 44, 75,
+ 76, 107, 108, 139, 140, 171, 172, 203, 204, 235, 236, 267, 268, 299, 300,
+ 331, 332, 363, 364, 395, 396, 427, 428, 459, 460, 491, 12, 12, 13, 44,
+ 45, 76, 77, 108, 109, 140, 141, 172, 173, 204, 205, 236, 237, 268, 269,
+ 300, 301, 332, 333, 364, 365, 396, 397, 428, 429, 460, 461, 492, 13, 13,
+ 14, 45, 46, 77, 78, 109, 110, 141, 142, 173, 174, 205, 206, 237, 238,
+ 269, 270, 301, 302, 333, 334, 365, 366, 397, 398, 429, 430, 461, 462, 493,
+ 14, 14, 15, 46, 47, 78, 79, 110, 111, 142, 143, 174, 175, 206, 207,
+ 238, 239, 270, 271, 302, 303, 334, 335, 366, 367, 398, 399, 430, 431, 462,
+ 463, 494, 15, 15, 16, 47, 48, 79, 80, 111, 112, 143, 144, 175, 176,
+ 207, 208, 239, 240, 271, 272, 303, 304, 335, 336, 367, 368, 399, 400, 431,
+ 432, 463, 464, 495, 16, 16, 17, 48, 49, 80, 81, 112, 113, 144, 145,
+ 176, 177, 208, 209, 240, 241, 272, 273, 304, 305, 336, 337, 368, 369, 400,
+ 401, 432, 433, 464, 465, 496, 17, 17, 18, 49, 50, 81, 82, 113, 114,
+ 145, 146, 177, 178, 209, 210, 241, 242, 273, 274, 305, 306, 337, 338, 369,
+ 370, 401, 402, 433, 434, 465, 466, 497, 18, 18, 19, 50, 51, 82, 83,
+ 114, 115, 146, 147, 178, 179, 210, 211, 242, 243, 274, 275, 306, 307, 338,
+ 339, 370, 371, 402, 403, 434, 435, 466, 467, 498, 19, 19, 20, 51, 52,
+ 83, 84, 115, 116, 147, 148, 179, 180, 211, 212, 243, 244, 275, 276, 307,
+ 308, 339, 340, 371, 372, 403, 404, 435, 436, 467, 468, 499, 20, 20, 21,
+ 52, 53, 84, 85, 116, 117, 148, 149, 180, 181, 212, 213, 244, 245, 276,
+ 277, 308, 309, 340, 341, 372, 373, 404, 405, 436, 437, 468, 469, 500, 21,
+ 21, 22, 53, 54, 85, 86, 117, 118, 149, 150, 181, 182, 213, 214, 245,
+ 246, 277, 278, 309, 310, 341, 342, 373, 374, 405, 406, 437, 438, 469, 470,
+ 501, 22, 22, 23, 54, 55, 86, 87, 118, 119, 150, 151, 182, 183, 214,
+ 215, 246, 247, 278, 279, 310, 311, 342, 343, 374, 375, 406, 407, 438, 439,
+ 470, 471, 502, 23, 23, 24, 55, 56, 87, 88, 119, 120, 151, 152, 183,
+ 184, 215, 216, 247, 248, 279, 280, 311, 312, 343, 344, 375, 376, 407, 408,
+ 439, 440, 471, 472, 503, 24, 24, 25, 56, 57, 88, 89, 120, 121, 152,
+ 153, 184, 185, 216, 217, 248, 249, 280, 281, 312, 313, 344, 345, 376, 377,
+ 408, 409, 440, 441, 472, 473, 504, 25, 25, 26, 57, 58, 89, 90, 121,
+ 122, 153, 154, 185, 186, 217, 218, 249, 250, 281, 282, 313, 314, 345, 346,
+ 377, 378, 409, 410, 441, 442, 473, 474, 505, 26, 26, 27, 58, 59, 90,
+ 91, 122, 123, 154, 155, 186, 187, 218, 219, 250, 251, 282, 283, 314, 315,
+ 346, 347, 378, 379, 410, 411, 442, 443, 474, 475, 506, 27, 27, 28, 59,
+ 60, 91, 92, 123, 124, 155, 156, 187, 188, 219, 220, 251, 252, 283, 284,
+ 315, 316, 347, 348, 379, 380, 411, 412, 443, 444, 475, 476, 507, 28, 28,
+ 29, 60, 61, 92, 93, 124, 125, 156, 157, 188, 189, 220, 221, 252, 253,
+ 284, 285, 316, 317, 348, 349, 380, 381, 412, 413, 444, 445, 476, 477, 508,
+ 29, 29, 30, 61, 62, 93, 94, 125, 126, 157, 158, 189, 190, 221, 222,
+ 253, 254, 285, 286, 317, 318, 349, 350, 381, 382, 413, 414, 445, 446, 477,
+ 478, 509, 30, 30, 31, 62, 63, 94, 95, 126, 127, 158, 159, 190, 191,
+ 222, 223, 254, 255, 286, 287, 318, 319, 350, 351, 382, 383, 414, 415, 446,
+ 447, 478, 479, 510, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mrow_scan_16x32_neighbors[513 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
+ 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
+ 14, 14, 0, 0, 1, 16, 2, 17, 3, 18, 4, 19, 5, 20, 6,
+ 21, 7, 22, 8, 23, 9, 24, 10, 25, 11, 26, 12, 27, 13, 28,
+ 14, 29, 15, 30, 16, 16, 17, 32, 18, 33, 19, 34, 20, 35, 21,
+ 36, 22, 37, 23, 38, 24, 39, 25, 40, 26, 41, 27, 42, 28, 43,
+ 29, 44, 30, 45, 31, 46, 32, 32, 33, 48, 34, 49, 35, 50, 36,
+ 51, 37, 52, 38, 53, 39, 54, 40, 55, 41, 56, 42, 57, 43, 58,
+ 44, 59, 45, 60, 46, 61, 47, 62, 48, 48, 49, 64, 50, 65, 51,
+ 66, 52, 67, 53, 68, 54, 69, 55, 70, 56, 71, 57, 72, 58, 73,
+ 59, 74, 60, 75, 61, 76, 62, 77, 63, 78, 64, 64, 65, 80, 66,
+ 81, 67, 82, 68, 83, 69, 84, 70, 85, 71, 86, 72, 87, 73, 88,
+ 74, 89, 75, 90, 76, 91, 77, 92, 78, 93, 79, 94, 80, 80, 81,
+ 96, 82, 97, 83, 98, 84, 99, 85, 100, 86, 101, 87, 102, 88, 103,
+ 89, 104, 90, 105, 91, 106, 92, 107, 93, 108, 94, 109, 95, 110, 96,
+ 96, 97, 112, 98, 113, 99, 114, 100, 115, 101, 116, 102, 117, 103, 118,
+ 104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111,
+ 126, 112, 112, 113, 128, 114, 129, 115, 130, 116, 131, 117, 132, 118, 133,
+ 119, 134, 120, 135, 121, 136, 122, 137, 123, 138, 124, 139, 125, 140, 126,
+ 141, 127, 142, 128, 128, 129, 144, 130, 145, 131, 146, 132, 147, 133, 148,
+ 134, 149, 135, 150, 136, 151, 137, 152, 138, 153, 139, 154, 140, 155, 141,
+ 156, 142, 157, 143, 158, 144, 144, 145, 160, 146, 161, 147, 162, 148, 163,
+ 149, 164, 150, 165, 151, 166, 152, 167, 153, 168, 154, 169, 155, 170, 156,
+ 171, 157, 172, 158, 173, 159, 174, 160, 160, 161, 176, 162, 177, 163, 178,
+ 164, 179, 165, 180, 166, 181, 167, 182, 168, 183, 169, 184, 170, 185, 171,
+ 186, 172, 187, 173, 188, 174, 189, 175, 190, 176, 176, 177, 192, 178, 193,
+ 179, 194, 180, 195, 181, 196, 182, 197, 183, 198, 184, 199, 185, 200, 186,
+ 201, 187, 202, 188, 203, 189, 204, 190, 205, 191, 206, 192, 192, 193, 208,
+ 194, 209, 195, 210, 196, 211, 197, 212, 198, 213, 199, 214, 200, 215, 201,
+ 216, 202, 217, 203, 218, 204, 219, 205, 220, 206, 221, 207, 222, 208, 208,
+ 209, 224, 210, 225, 211, 226, 212, 227, 213, 228, 214, 229, 215, 230, 216,
+ 231, 217, 232, 218, 233, 219, 234, 220, 235, 221, 236, 222, 237, 223, 238,
+ 224, 224, 225, 240, 226, 241, 227, 242, 228, 243, 229, 244, 230, 245, 231,
+ 246, 232, 247, 233, 248, 234, 249, 235, 250, 236, 251, 237, 252, 238, 253,
+ 239, 254, 240, 240, 241, 256, 242, 257, 243, 258, 244, 259, 245, 260, 246,
+ 261, 247, 262, 248, 263, 249, 264, 250, 265, 251, 266, 252, 267, 253, 268,
+ 254, 269, 255, 270, 256, 256, 257, 272, 258, 273, 259, 274, 260, 275, 261,
+ 276, 262, 277, 263, 278, 264, 279, 265, 280, 266, 281, 267, 282, 268, 283,
+ 269, 284, 270, 285, 271, 286, 272, 272, 273, 288, 274, 289, 275, 290, 276,
+ 291, 277, 292, 278, 293, 279, 294, 280, 295, 281, 296, 282, 297, 283, 298,
+ 284, 299, 285, 300, 286, 301, 287, 302, 288, 288, 289, 304, 290, 305, 291,
+ 306, 292, 307, 293, 308, 294, 309, 295, 310, 296, 311, 297, 312, 298, 313,
+ 299, 314, 300, 315, 301, 316, 302, 317, 303, 318, 304, 304, 305, 320, 306,
+ 321, 307, 322, 308, 323, 309, 324, 310, 325, 311, 326, 312, 327, 313, 328,
+ 314, 329, 315, 330, 316, 331, 317, 332, 318, 333, 319, 334, 320, 320, 321,
+ 336, 322, 337, 323, 338, 324, 339, 325, 340, 326, 341, 327, 342, 328, 343,
+ 329, 344, 330, 345, 331, 346, 332, 347, 333, 348, 334, 349, 335, 350, 336,
+ 336, 337, 352, 338, 353, 339, 354, 340, 355, 341, 356, 342, 357, 343, 358,
+ 344, 359, 345, 360, 346, 361, 347, 362, 348, 363, 349, 364, 350, 365, 351,
+ 366, 352, 352, 353, 368, 354, 369, 355, 370, 356, 371, 357, 372, 358, 373,
+ 359, 374, 360, 375, 361, 376, 362, 377, 363, 378, 364, 379, 365, 380, 366,
+ 381, 367, 382, 368, 368, 369, 384, 370, 385, 371, 386, 372, 387, 373, 388,
+ 374, 389, 375, 390, 376, 391, 377, 392, 378, 393, 379, 394, 380, 395, 381,
+ 396, 382, 397, 383, 398, 384, 384, 385, 400, 386, 401, 387, 402, 388, 403,
+ 389, 404, 390, 405, 391, 406, 392, 407, 393, 408, 394, 409, 395, 410, 396,
+ 411, 397, 412, 398, 413, 399, 414, 400, 400, 401, 416, 402, 417, 403, 418,
+ 404, 419, 405, 420, 406, 421, 407, 422, 408, 423, 409, 424, 410, 425, 411,
+ 426, 412, 427, 413, 428, 414, 429, 415, 430, 416, 416, 417, 432, 418, 433,
+ 419, 434, 420, 435, 421, 436, 422, 437, 423, 438, 424, 439, 425, 440, 426,
+ 441, 427, 442, 428, 443, 429, 444, 430, 445, 431, 446, 432, 432, 433, 448,
+ 434, 449, 435, 450, 436, 451, 437, 452, 438, 453, 439, 454, 440, 455, 441,
+ 456, 442, 457, 443, 458, 444, 459, 445, 460, 446, 461, 447, 462, 448, 448,
+ 449, 464, 450, 465, 451, 466, 452, 467, 453, 468, 454, 469, 455, 470, 456,
+ 471, 457, 472, 458, 473, 459, 474, 460, 475, 461, 476, 462, 477, 463, 478,
+ 464, 464, 465, 480, 466, 481, 467, 482, 468, 483, 469, 484, 470, 485, 471,
+ 486, 472, 487, 473, 488, 474, 489, 475, 490, 476, 491, 477, 492, 478, 493,
+ 479, 494, 480, 480, 481, 496, 482, 497, 483, 498, 484, 499, 485, 500, 486,
+ 501, 487, 502, 488, 503, 489, 504, 490, 505, 491, 506, 492, 507, 493, 508,
+ 494, 509, 495, 510, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mrow_scan_32x16_neighbors[513 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
+ 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
+ 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21,
+ 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28,
+ 29, 29, 30, 30, 0, 0, 1, 32, 2, 33, 3, 34, 4, 35, 5,
+ 36, 6, 37, 7, 38, 8, 39, 9, 40, 10, 41, 11, 42, 12, 43,
+ 13, 44, 14, 45, 15, 46, 16, 47, 17, 48, 18, 49, 19, 50, 20,
+ 51, 21, 52, 22, 53, 23, 54, 24, 55, 25, 56, 26, 57, 27, 58,
+ 28, 59, 29, 60, 30, 61, 31, 62, 32, 32, 33, 64, 34, 65, 35,
+ 66, 36, 67, 37, 68, 38, 69, 39, 70, 40, 71, 41, 72, 42, 73,
+ 43, 74, 44, 75, 45, 76, 46, 77, 47, 78, 48, 79, 49, 80, 50,
+ 81, 51, 82, 52, 83, 53, 84, 54, 85, 55, 86, 56, 87, 57, 88,
+ 58, 89, 59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 64, 64, 65,
+ 96, 66, 97, 67, 98, 68, 99, 69, 100, 70, 101, 71, 102, 72, 103,
+ 73, 104, 74, 105, 75, 106, 76, 107, 77, 108, 78, 109, 79, 110, 80,
+ 111, 81, 112, 82, 113, 83, 114, 84, 115, 85, 116, 86, 117, 87, 118,
+ 88, 119, 89, 120, 90, 121, 91, 122, 92, 123, 93, 124, 94, 125, 95,
+ 126, 96, 96, 97, 128, 98, 129, 99, 130, 100, 131, 101, 132, 102, 133,
+ 103, 134, 104, 135, 105, 136, 106, 137, 107, 138, 108, 139, 109, 140, 110,
+ 141, 111, 142, 112, 143, 113, 144, 114, 145, 115, 146, 116, 147, 117, 148,
+ 118, 149, 119, 150, 120, 151, 121, 152, 122, 153, 123, 154, 124, 155, 125,
+ 156, 126, 157, 127, 158, 128, 128, 129, 160, 130, 161, 131, 162, 132, 163,
+ 133, 164, 134, 165, 135, 166, 136, 167, 137, 168, 138, 169, 139, 170, 140,
+ 171, 141, 172, 142, 173, 143, 174, 144, 175, 145, 176, 146, 177, 147, 178,
+ 148, 179, 149, 180, 150, 181, 151, 182, 152, 183, 153, 184, 154, 185, 155,
+ 186, 156, 187, 157, 188, 158, 189, 159, 190, 160, 160, 161, 192, 162, 193,
+ 163, 194, 164, 195, 165, 196, 166, 197, 167, 198, 168, 199, 169, 200, 170,
+ 201, 171, 202, 172, 203, 173, 204, 174, 205, 175, 206, 176, 207, 177, 208,
+ 178, 209, 179, 210, 180, 211, 181, 212, 182, 213, 183, 214, 184, 215, 185,
+ 216, 186, 217, 187, 218, 188, 219, 189, 220, 190, 221, 191, 222, 192, 192,
+ 193, 224, 194, 225, 195, 226, 196, 227, 197, 228, 198, 229, 199, 230, 200,
+ 231, 201, 232, 202, 233, 203, 234, 204, 235, 205, 236, 206, 237, 207, 238,
+ 208, 239, 209, 240, 210, 241, 211, 242, 212, 243, 213, 244, 214, 245, 215,
+ 246, 216, 247, 217, 248, 218, 249, 219, 250, 220, 251, 221, 252, 222, 253,
+ 223, 254, 224, 224, 225, 256, 226, 257, 227, 258, 228, 259, 229, 260, 230,
+ 261, 231, 262, 232, 263, 233, 264, 234, 265, 235, 266, 236, 267, 237, 268,
+ 238, 269, 239, 270, 240, 271, 241, 272, 242, 273, 243, 274, 244, 275, 245,
+ 276, 246, 277, 247, 278, 248, 279, 249, 280, 250, 281, 251, 282, 252, 283,
+ 253, 284, 254, 285, 255, 286, 256, 256, 257, 288, 258, 289, 259, 290, 260,
+ 291, 261, 292, 262, 293, 263, 294, 264, 295, 265, 296, 266, 297, 267, 298,
+ 268, 299, 269, 300, 270, 301, 271, 302, 272, 303, 273, 304, 274, 305, 275,
+ 306, 276, 307, 277, 308, 278, 309, 279, 310, 280, 311, 281, 312, 282, 313,
+ 283, 314, 284, 315, 285, 316, 286, 317, 287, 318, 288, 288, 289, 320, 290,
+ 321, 291, 322, 292, 323, 293, 324, 294, 325, 295, 326, 296, 327, 297, 328,
+ 298, 329, 299, 330, 300, 331, 301, 332, 302, 333, 303, 334, 304, 335, 305,
+ 336, 306, 337, 307, 338, 308, 339, 309, 340, 310, 341, 311, 342, 312, 343,
+ 313, 344, 314, 345, 315, 346, 316, 347, 317, 348, 318, 349, 319, 350, 320,
+ 320, 321, 352, 322, 353, 323, 354, 324, 355, 325, 356, 326, 357, 327, 358,
+ 328, 359, 329, 360, 330, 361, 331, 362, 332, 363, 333, 364, 334, 365, 335,
+ 366, 336, 367, 337, 368, 338, 369, 339, 370, 340, 371, 341, 372, 342, 373,
+ 343, 374, 344, 375, 345, 376, 346, 377, 347, 378, 348, 379, 349, 380, 350,
+ 381, 351, 382, 352, 352, 353, 384, 354, 385, 355, 386, 356, 387, 357, 388,
+ 358, 389, 359, 390, 360, 391, 361, 392, 362, 393, 363, 394, 364, 395, 365,
+ 396, 366, 397, 367, 398, 368, 399, 369, 400, 370, 401, 371, 402, 372, 403,
+ 373, 404, 374, 405, 375, 406, 376, 407, 377, 408, 378, 409, 379, 410, 380,
+ 411, 381, 412, 382, 413, 383, 414, 384, 384, 385, 416, 386, 417, 387, 418,
+ 388, 419, 389, 420, 390, 421, 391, 422, 392, 423, 393, 424, 394, 425, 395,
+ 426, 396, 427, 397, 428, 398, 429, 399, 430, 400, 431, 401, 432, 402, 433,
+ 403, 434, 404, 435, 405, 436, 406, 437, 407, 438, 408, 439, 409, 440, 410,
+ 441, 411, 442, 412, 443, 413, 444, 414, 445, 415, 446, 416, 416, 417, 448,
+ 418, 449, 419, 450, 420, 451, 421, 452, 422, 453, 423, 454, 424, 455, 425,
+ 456, 426, 457, 427, 458, 428, 459, 429, 460, 430, 461, 431, 462, 432, 463,
+ 433, 464, 434, 465, 435, 466, 436, 467, 437, 468, 438, 469, 439, 470, 440,
+ 471, 441, 472, 442, 473, 443, 474, 444, 475, 445, 476, 446, 477, 447, 478,
+ 448, 448, 449, 480, 450, 481, 451, 482, 452, 483, 453, 484, 454, 485, 455,
+ 486, 456, 487, 457, 488, 458, 489, 459, 490, 460, 491, 461, 492, 462, 493,
+ 463, 494, 464, 495, 465, 496, 466, 497, 467, 498, 468, 499, 469, 500, 470,
+ 501, 471, 502, 472, 503, 473, 504, 474, 505, 475, 506, 476, 507, 477, 508,
+ 478, 509, 479, 510, 0, 0
+};
+
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+ mcol_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 16, 16, 32, 32, 48, 48, 64, 64, 80, 80, 96,
+ 96, 112, 112, 128, 128, 144, 144, 160, 160, 176, 176, 192, 192, 208, 208,
+ 224, 224, 0, 0, 1, 16, 17, 32, 33, 48, 49, 64, 65, 80, 81,
+ 96, 97, 112, 113, 128, 129, 144, 145, 160, 161, 176, 177, 192, 193, 208,
+ 209, 224, 225, 240, 1, 1, 2, 17, 18, 33, 34, 49, 50, 65, 66,
+ 81, 82, 97, 98, 113, 114, 129, 130, 145, 146, 161, 162, 177, 178, 193,
+ 194, 209, 210, 225, 226, 241, 2, 2, 3, 18, 19, 34, 35, 50, 51,
+ 66, 67, 82, 83, 98, 99, 114, 115, 130, 131, 146, 147, 162, 163, 178,
+ 179, 194, 195, 210, 211, 226, 227, 242, 3, 3, 4, 19, 20, 35, 36,
+ 51, 52, 67, 68, 83, 84, 99, 100, 115, 116, 131, 132, 147, 148, 163,
+ 164, 179, 180, 195, 196, 211, 212, 227, 228, 243, 4, 4, 5, 20, 21,
+ 36, 37, 52, 53, 68, 69, 84, 85, 100, 101, 116, 117, 132, 133, 148,
+ 149, 164, 165, 180, 181, 196, 197, 212, 213, 228, 229, 244, 5, 5, 6,
+ 21, 22, 37, 38, 53, 54, 69, 70, 85, 86, 101, 102, 117, 118, 133,
+ 134, 149, 150, 165, 166, 181, 182, 197, 198, 213, 214, 229, 230, 245, 6,
+ 6, 7, 22, 23, 38, 39, 54, 55, 70, 71, 86, 87, 102, 103, 118,
+ 119, 134, 135, 150, 151, 166, 167, 182, 183, 198, 199, 214, 215, 230, 231,
+ 246, 7, 7, 8, 23, 24, 39, 40, 55, 56, 71, 72, 87, 88, 103,
+ 104, 119, 120, 135, 136, 151, 152, 167, 168, 183, 184, 199, 200, 215, 216,
+ 231, 232, 247, 8, 8, 9, 24, 25, 40, 41, 56, 57, 72, 73, 88,
+ 89, 104, 105, 120, 121, 136, 137, 152, 153, 168, 169, 184, 185, 200, 201,
+ 216, 217, 232, 233, 248, 9, 9, 10, 25, 26, 41, 42, 57, 58, 73,
+ 74, 89, 90, 105, 106, 121, 122, 137, 138, 153, 154, 169, 170, 185, 186,
+ 201, 202, 217, 218, 233, 234, 249, 10, 10, 11, 26, 27, 42, 43, 58,
+ 59, 74, 75, 90, 91, 106, 107, 122, 123, 138, 139, 154, 155, 170, 171,
+ 186, 187, 202, 203, 218, 219, 234, 235, 250, 11, 11, 12, 27, 28, 43,
+ 44, 59, 60, 75, 76, 91, 92, 107, 108, 123, 124, 139, 140, 155, 156,
+ 171, 172, 187, 188, 203, 204, 219, 220, 235, 236, 251, 12, 12, 13, 28,
+ 29, 44, 45, 60, 61, 76, 77, 92, 93, 108, 109, 124, 125, 140, 141,
+ 156, 157, 172, 173, 188, 189, 204, 205, 220, 221, 236, 237, 252, 13, 13,
+ 14, 29, 30, 45, 46, 61, 62, 77, 78, 93, 94, 109, 110, 125, 126,
+ 141, 142, 157, 158, 173, 174, 189, 190, 205, 206, 221, 222, 237, 238, 253,
+ 14, 14, 15, 30, 31, 46, 47, 62, 63, 78, 79, 94, 95, 110, 111,
+ 126, 127, 142, 143, 158, 159, 174, 175, 190, 191, 206, 207, 222, 223, 238,
+ 239, 254, 0, 0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mrow_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
+ 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
+ 14, 14, 0, 0, 1, 16, 2, 17, 3, 18, 4, 19, 5, 20, 6,
+ 21, 7, 22, 8, 23, 9, 24, 10, 25, 11, 26, 12, 27, 13, 28,
+ 14, 29, 15, 30, 16, 16, 17, 32, 18, 33, 19, 34, 20, 35, 21,
+ 36, 22, 37, 23, 38, 24, 39, 25, 40, 26, 41, 27, 42, 28, 43,
+ 29, 44, 30, 45, 31, 46, 32, 32, 33, 48, 34, 49, 35, 50, 36,
+ 51, 37, 52, 38, 53, 39, 54, 40, 55, 41, 56, 42, 57, 43, 58,
+ 44, 59, 45, 60, 46, 61, 47, 62, 48, 48, 49, 64, 50, 65, 51,
+ 66, 52, 67, 53, 68, 54, 69, 55, 70, 56, 71, 57, 72, 58, 73,
+ 59, 74, 60, 75, 61, 76, 62, 77, 63, 78, 64, 64, 65, 80, 66,
+ 81, 67, 82, 68, 83, 69, 84, 70, 85, 71, 86, 72, 87, 73, 88,
+ 74, 89, 75, 90, 76, 91, 77, 92, 78, 93, 79, 94, 80, 80, 81,
+ 96, 82, 97, 83, 98, 84, 99, 85, 100, 86, 101, 87, 102, 88, 103,
+ 89, 104, 90, 105, 91, 106, 92, 107, 93, 108, 94, 109, 95, 110, 96,
+ 96, 97, 112, 98, 113, 99, 114, 100, 115, 101, 116, 102, 117, 103, 118,
+ 104, 119, 105, 120, 106, 121, 107, 122, 108, 123, 109, 124, 110, 125, 111,
+ 126, 112, 112, 113, 128, 114, 129, 115, 130, 116, 131, 117, 132, 118, 133,
+ 119, 134, 120, 135, 121, 136, 122, 137, 123, 138, 124, 139, 125, 140, 126,
+ 141, 127, 142, 128, 128, 129, 144, 130, 145, 131, 146, 132, 147, 133, 148,
+ 134, 149, 135, 150, 136, 151, 137, 152, 138, 153, 139, 154, 140, 155, 141,
+ 156, 142, 157, 143, 158, 144, 144, 145, 160, 146, 161, 147, 162, 148, 163,
+ 149, 164, 150, 165, 151, 166, 152, 167, 153, 168, 154, 169, 155, 170, 156,
+ 171, 157, 172, 158, 173, 159, 174, 160, 160, 161, 176, 162, 177, 163, 178,
+ 164, 179, 165, 180, 166, 181, 167, 182, 168, 183, 169, 184, 170, 185, 171,
+ 186, 172, 187, 173, 188, 174, 189, 175, 190, 176, 176, 177, 192, 178, 193,
+ 179, 194, 180, 195, 181, 196, 182, 197, 183, 198, 184, 199, 185, 200, 186,
+ 201, 187, 202, 188, 203, 189, 204, 190, 205, 191, 206, 192, 192, 193, 208,
+ 194, 209, 195, 210, 196, 211, 197, 212, 198, 213, 199, 214, 200, 215, 201,
+ 216, 202, 217, 203, 218, 204, 219, 205, 220, 206, 221, 207, 222, 208, 208,
+ 209, 224, 210, 225, 211, 226, 212, 227, 213, 228, 214, 229, 215, 230, 216,
+ 231, 217, 232, 218, 233, 219, 234, 220, 235, 221, 236, 222, 237, 223, 238,
+ 224, 224, 225, 240, 226, 241, 227, 242, 228, 243, 229, 244, 230, 245, 231,
+ 246, 232, 247, 233, 248, 234, 249, 235, 250, 236, 251, 237, 252, 238, 253,
+ 239, 254, 0, 0,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t,
+ col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 16, 16, 32, 32, 16, 0, 48, 48, 1, 16, 64,
+ 64, 17, 32, 80, 80, 33, 48, 17, 1, 49, 64, 96, 96, 2, 17,
+ 65, 80, 18, 33, 112, 112, 34, 49, 81, 96, 18, 2, 50, 65, 128,
+ 128, 3, 18, 97, 112, 19, 34, 66, 81, 144, 144, 82, 97, 35, 50,
+ 113, 128, 19, 3, 51, 66, 160, 160, 4, 19, 98, 113, 129, 144, 67,
+ 82, 20, 35, 83, 98, 114, 129, 36, 51, 176, 176, 20, 4, 145, 160,
+ 52, 67, 99, 114, 5, 20, 130, 145, 68, 83, 192, 192, 161, 176, 21,
+ 36, 115, 130, 84, 99, 37, 52, 146, 161, 208, 208, 53, 68, 21, 5,
+ 100, 115, 177, 192, 131, 146, 69, 84, 6, 21, 224, 224, 116, 131, 22,
+ 37, 162, 177, 85, 100, 147, 162, 38, 53, 193, 208, 101, 116, 54, 69,
+ 22, 6, 132, 147, 178, 193, 70, 85, 163, 178, 209, 224, 7, 22, 117,
+ 132, 23, 38, 148, 163, 23, 7, 86, 101, 194, 209, 225, 240, 39, 54,
+ 179, 194, 102, 117, 133, 148, 55, 70, 164, 179, 8, 23, 71, 86, 210,
+ 225, 118, 133, 149, 164, 195, 210, 24, 39, 87, 102, 40, 55, 56, 71,
+ 134, 149, 180, 195, 226, 241, 103, 118, 24, 8, 165, 180, 211, 226, 72,
+ 87, 150, 165, 9, 24, 119, 134, 25, 40, 88, 103, 196, 211, 41, 56,
+ 135, 150, 181, 196, 104, 119, 57, 72, 227, 242, 166, 181, 120, 135, 151,
+ 166, 197, 212, 73, 88, 25, 9, 212, 227, 89, 104, 136, 151, 182, 197,
+ 10, 25, 26, 41, 105, 120, 167, 182, 228, 243, 152, 167, 42, 57, 121,
+ 136, 213, 228, 58, 73, 198, 213, 74, 89, 137, 152, 183, 198, 168, 183,
+ 26, 10, 90, 105, 229, 244, 11, 26, 106, 121, 214, 229, 153, 168, 27,
+ 42, 199, 214, 43, 58, 184, 199, 122, 137, 169, 184, 230, 245, 59, 74,
+ 27, 11, 75, 90, 138, 153, 200, 215, 215, 230, 91, 106, 12, 27, 28,
+ 43, 185, 200, 107, 122, 154, 169, 44, 59, 231, 246, 216, 231, 60, 75,
+ 123, 138, 28, 12, 76, 91, 201, 216, 170, 185, 232, 247, 139, 154, 92,
+ 107, 13, 28, 108, 123, 29, 44, 186, 201, 217, 232, 155, 170, 45, 60,
+ 29, 13, 61, 76, 124, 139, 14, 14, 233, 248, 77, 92, 14, 29, 171,
+ 186, 140, 155, 202, 217, 30, 45, 93, 108, 109, 124, 46, 61, 156, 171,
+ 62, 77, 187, 202, 15, 30, 125, 140, 218, 233, 78, 93, 31, 46, 172,
+ 187, 47, 62, 141, 156, 94, 109, 234, 249, 203, 218, 63, 78, 110, 125,
+ 188, 203, 157, 172, 126, 141, 79, 94, 173, 188, 95, 110, 219, 234, 142,
+ 157, 204, 219, 235, 250, 111, 126, 158, 173, 127, 142, 189, 204, 220, 235,
+ 143, 158, 174, 189, 205, 220, 236, 251, 159, 174, 190, 205, 221, 236, 175,
+ 190, 237, 252, 206, 221, 222, 237, 191, 206, 238, 253, 207, 222, 223, 238,
+ 239, 254, 0, 0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 16, 3, 3, 2,
+ 17, 16, 17, 4, 4, 17, 32, 3, 18, 5, 5, 18, 33, 32, 33,
+ 4, 19, 33, 48, 6, 6, 19, 34, 5, 20, 34, 49, 48, 49, 7,
+ 7, 20, 35, 49, 64, 6, 21, 35, 50, 21, 36, 64, 65, 8, 8,
+ 50, 65, 36, 51, 7, 22, 22, 37, 65, 80, 51, 66, 9, 9, 37,
+ 52, 8, 23, 66, 81, 52, 67, 80, 81, 23, 38, 10, 10, 38, 53,
+ 67, 82, 81, 96, 53, 68, 9, 24, 82, 97, 68, 83, 24, 39, 96,
+ 97, 39, 54, 11, 11, 54, 69, 83, 98, 97, 112, 69, 84, 10, 25,
+ 25, 40, 40, 55, 98, 113, 84, 99, 12, 12, 55, 70, 112, 113, 70,
+ 85, 11, 26, 99, 114, 85, 100, 113, 128, 26, 41, 41, 56, 56, 71,
+ 100, 115, 13, 13, 71, 86, 114, 129, 86, 101, 128, 129, 57, 72, 115,
+ 130, 101, 116, 12, 27, 42, 57, 14, 14, 72, 87, 27, 42, 129, 144,
+ 87, 102, 116, 131, 130, 145, 102, 117, 58, 73, 144, 145, 73, 88, 117,
+ 132, 88, 103, 13, 28, 43, 58, 131, 146, 103, 118, 28, 43, 145, 160,
+ 132, 147, 74, 89, 89, 104, 118, 133, 146, 161, 104, 119, 160, 161, 59,
+ 74, 119, 134, 133, 148, 14, 29, 44, 59, 147, 162, 161, 176, 29, 44,
+ 105, 120, 75, 90, 90, 105, 148, 163, 162, 177, 134, 149, 176, 177, 120,
+ 135, 149, 164, 163, 178, 15, 30, 135, 150, 177, 192, 60, 75, 106, 121,
+ 45, 60, 121, 136, 178, 193, 91, 106, 136, 151, 164, 179, 192, 193, 30,
+ 45, 150, 165, 151, 166, 179, 194, 76, 91, 165, 180, 122, 137, 193, 208,
+ 107, 122, 137, 152, 208, 209, 180, 195, 61, 76, 152, 167, 194, 209, 166,
+ 181, 224, 224, 92, 107, 181, 196, 46, 61, 138, 153, 209, 224, 167, 182,
+ 153, 168, 195, 210, 31, 46, 123, 138, 77, 92, 168, 183, 210, 225, 196,
+ 211, 225, 240, 182, 197, 154, 169, 108, 123, 139, 154, 183, 198, 62, 77,
+ 197, 212, 169, 184, 93, 108, 211, 226, 184, 199, 47, 62, 212, 227, 226,
+ 241, 124, 139, 198, 213, 155, 170, 170, 185, 140, 155, 213, 228, 227, 242,
+ 109, 124, 78, 93, 185, 200, 228, 243, 199, 214, 200, 215, 214, 229, 125,
+ 140, 171, 186, 186, 201, 63, 78, 156, 171, 94, 109, 141, 156, 229, 244,
+ 201, 216, 215, 230, 79, 94, 230, 245, 216, 231, 110, 125, 187, 202, 231,
+ 246, 217, 232, 157, 172, 202, 217, 126, 141, 95, 110, 142, 157, 172, 187,
+ 232, 247, 111, 126, 218, 233, 203, 218, 233, 248, 173, 188, 188, 203, 127,
+ 142, 158, 173, 143, 158, 234, 249, 219, 234, 189, 204, 204, 219, 159, 174,
+ 174, 189, 235, 250, 205, 220, 175, 190, 190, 205, 220, 235, 191, 206, 221,
+ 236, 236, 251, 206, 221, 237, 252, 207, 222, 222, 237, 223, 238, 238, 253,
+ 239, 254, 0, 0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 16, 0, 16, 16, 1, 16, 17, 1, 32, 32, 17,
+ 32, 2, 17, 18, 2, 48, 48, 18, 33, 33, 48, 3, 18, 49, 64,
+ 64, 65, 34, 49, 19, 3, 19, 34, 50, 65, 4, 19, 65, 80, 80,
+ 81, 35, 50, 20, 4, 20, 35, 66, 81, 81, 96, 51, 66, 96, 97,
+ 5, 20, 36, 51, 82, 97, 21, 36, 67, 82, 97, 112, 21, 5, 52,
+ 67, 112, 113, 37, 52, 6, 21, 83, 98, 98, 113, 68, 83, 22, 6,
+ 113, 128, 22, 37, 53, 68, 84, 99, 99, 114, 128, 129, 114, 129, 69,
+ 84, 38, 53, 7, 22, 23, 7, 129, 144, 23, 38, 54, 69, 100, 115,
+ 85, 100, 115, 130, 144, 145, 130, 145, 39, 54, 70, 85, 8, 23, 55,
+ 70, 116, 131, 101, 116, 145, 160, 24, 39, 24, 8, 86, 101, 131, 146,
+ 160, 161, 146, 161, 71, 86, 40, 55, 9, 24, 117, 132, 102, 117, 161,
+ 176, 132, 147, 56, 71, 87, 102, 25, 40, 147, 162, 25, 9, 176, 177,
+ 162, 177, 72, 87, 41, 56, 118, 133, 133, 148, 103, 118, 10, 25, 148,
+ 163, 57, 72, 88, 103, 177, 192, 26, 41, 163, 178, 192, 193, 26, 10,
+ 119, 134, 73, 88, 149, 164, 104, 119, 134, 149, 42, 57, 178, 193, 164,
+ 179, 11, 26, 58, 73, 193, 208, 89, 104, 135, 150, 120, 135, 27, 42,
+ 74, 89, 208, 209, 150, 165, 179, 194, 165, 180, 105, 120, 194, 209, 43,
+ 58, 27, 11, 136, 151, 90, 105, 151, 166, 180, 195, 59, 74, 121, 136,
+ 209, 224, 195, 210, 224, 225, 166, 181, 106, 121, 75, 90, 12, 27, 181,
+ 196, 28, 12, 210, 225, 152, 167, 167, 182, 137, 152, 28, 43, 196, 211,
+ 122, 137, 91, 106, 225, 240, 44, 59, 13, 28, 107, 122, 182, 197, 168,
+ 183, 211, 226, 153, 168, 226, 241, 60, 75, 197, 212, 138, 153, 29, 44,
+ 76, 91, 29, 13, 183, 198, 123, 138, 45, 60, 212, 227, 198, 213, 154,
+ 169, 169, 184, 227, 242, 92, 107, 61, 76, 139, 154, 14, 29, 30, 14,
+ 184, 199, 213, 228, 108, 123, 199, 214, 228, 243, 77, 92, 30, 45, 170,
+ 185, 155, 170, 185, 200, 93, 108, 124, 139, 214, 229, 46, 61, 200, 215,
+ 229, 244, 15, 30, 109, 124, 62, 77, 140, 155, 215, 230, 31, 46, 171,
+ 186, 186, 201, 201, 216, 78, 93, 230, 245, 125, 140, 47, 62, 216, 231,
+ 156, 171, 94, 109, 231, 246, 141, 156, 63, 78, 202, 217, 187, 202, 110,
+ 125, 217, 232, 172, 187, 232, 247, 79, 94, 157, 172, 126, 141, 203, 218,
+ 95, 110, 233, 248, 218, 233, 142, 157, 111, 126, 173, 188, 188, 203, 234,
+ 249, 219, 234, 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, 250,
+ 174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, 190, 205, 236,
+ 251, 206, 221, 237, 252, 191, 206, 222, 237, 207, 222, 238, 253, 223, 238,
+ 239, 254, 0, 0,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+ mcol_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 32, 32, 64, 64, 96, 96, 128, 128, 160, 160,
+ 192, 192, 224, 224, 256, 256, 288, 288, 320, 320, 352, 352, 384, 384,
+ 416, 416, 448, 448, 480, 480, 512, 512, 544, 544, 576, 576, 608, 608,
+ 640, 640, 672, 672, 704, 704, 736, 736, 768, 768, 800, 800, 832, 832,
+ 864, 864, 896, 896, 928, 928, 960, 960, 0, 0, 1, 32, 33, 64,
+ 65, 96, 97, 128, 129, 160, 161, 192, 193, 224, 225, 256, 257, 288,
+ 289, 320, 321, 352, 353, 384, 385, 416, 417, 448, 449, 480, 481, 512,
+ 513, 544, 545, 576, 577, 608, 609, 640, 641, 672, 673, 704, 705, 736,
+ 737, 768, 769, 800, 801, 832, 833, 864, 865, 896, 897, 928, 929, 960,
+ 961, 992, 1, 1, 2, 33, 34, 65, 66, 97, 98, 129, 130, 161,
+ 162, 193, 194, 225, 226, 257, 258, 289, 290, 321, 322, 353, 354, 385,
+ 386, 417, 418, 449, 450, 481, 482, 513, 514, 545, 546, 577, 578, 609,
+ 610, 641, 642, 673, 674, 705, 706, 737, 738, 769, 770, 801, 802, 833,
+ 834, 865, 866, 897, 898, 929, 930, 961, 962, 993, 2, 2, 3, 34,
+ 35, 66, 67, 98, 99, 130, 131, 162, 163, 194, 195, 226, 227, 258,
+ 259, 290, 291, 322, 323, 354, 355, 386, 387, 418, 419, 450, 451, 482,
+ 483, 514, 515, 546, 547, 578, 579, 610, 611, 642, 643, 674, 675, 706,
+ 707, 738, 739, 770, 771, 802, 803, 834, 835, 866, 867, 898, 899, 930,
+ 931, 962, 963, 994, 3, 3, 4, 35, 36, 67, 68, 99, 100, 131,
+ 132, 163, 164, 195, 196, 227, 228, 259, 260, 291, 292, 323, 324, 355,
+ 356, 387, 388, 419, 420, 451, 452, 483, 484, 515, 516, 547, 548, 579,
+ 580, 611, 612, 643, 644, 675, 676, 707, 708, 739, 740, 771, 772, 803,
+ 804, 835, 836, 867, 868, 899, 900, 931, 932, 963, 964, 995, 4, 4,
+ 5, 36, 37, 68, 69, 100, 101, 132, 133, 164, 165, 196, 197, 228,
+ 229, 260, 261, 292, 293, 324, 325, 356, 357, 388, 389, 420, 421, 452,
+ 453, 484, 485, 516, 517, 548, 549, 580, 581, 612, 613, 644, 645, 676,
+ 677, 708, 709, 740, 741, 772, 773, 804, 805, 836, 837, 868, 869, 900,
+ 901, 932, 933, 964, 965, 996, 5, 5, 6, 37, 38, 69, 70, 101,
+ 102, 133, 134, 165, 166, 197, 198, 229, 230, 261, 262, 293, 294, 325,
+ 326, 357, 358, 389, 390, 421, 422, 453, 454, 485, 486, 517, 518, 549,
+ 550, 581, 582, 613, 614, 645, 646, 677, 678, 709, 710, 741, 742, 773,
+ 774, 805, 806, 837, 838, 869, 870, 901, 902, 933, 934, 965, 966, 997,
+ 6, 6, 7, 38, 39, 70, 71, 102, 103, 134, 135, 166, 167, 198,
+ 199, 230, 231, 262, 263, 294, 295, 326, 327, 358, 359, 390, 391, 422,
+ 423, 454, 455, 486, 487, 518, 519, 550, 551, 582, 583, 614, 615, 646,
+ 647, 678, 679, 710, 711, 742, 743, 774, 775, 806, 807, 838, 839, 870,
+ 871, 902, 903, 934, 935, 966, 967, 998, 7, 7, 8, 39, 40, 71,
+ 72, 103, 104, 135, 136, 167, 168, 199, 200, 231, 232, 263, 264, 295,
+ 296, 327, 328, 359, 360, 391, 392, 423, 424, 455, 456, 487, 488, 519,
+ 520, 551, 552, 583, 584, 615, 616, 647, 648, 679, 680, 711, 712, 743,
+ 744, 775, 776, 807, 808, 839, 840, 871, 872, 903, 904, 935, 936, 967,
+ 968, 999, 8, 8, 9, 40, 41, 72, 73, 104, 105, 136, 137, 168,
+ 169, 200, 201, 232, 233, 264, 265, 296, 297, 328, 329, 360, 361, 392,
+ 393, 424, 425, 456, 457, 488, 489, 520, 521, 552, 553, 584, 585, 616,
+ 617, 648, 649, 680, 681, 712, 713, 744, 745, 776, 777, 808, 809, 840,
+ 841, 872, 873, 904, 905, 936, 937, 968, 969, 1000, 9, 9, 10, 41,
+ 42, 73, 74, 105, 106, 137, 138, 169, 170, 201, 202, 233, 234, 265,
+ 266, 297, 298, 329, 330, 361, 362, 393, 394, 425, 426, 457, 458, 489,
+ 490, 521, 522, 553, 554, 585, 586, 617, 618, 649, 650, 681, 682, 713,
+ 714, 745, 746, 777, 778, 809, 810, 841, 842, 873, 874, 905, 906, 937,
+ 938, 969, 970, 1001, 10, 10, 11, 42, 43, 74, 75, 106, 107, 138,
+ 139, 170, 171, 202, 203, 234, 235, 266, 267, 298, 299, 330, 331, 362,
+ 363, 394, 395, 426, 427, 458, 459, 490, 491, 522, 523, 554, 555, 586,
+ 587, 618, 619, 650, 651, 682, 683, 714, 715, 746, 747, 778, 779, 810,
+ 811, 842, 843, 874, 875, 906, 907, 938, 939, 970, 971, 1002, 11, 11,
+ 12, 43, 44, 75, 76, 107, 108, 139, 140, 171, 172, 203, 204, 235,
+ 236, 267, 268, 299, 300, 331, 332, 363, 364, 395, 396, 427, 428, 459,
+ 460, 491, 492, 523, 524, 555, 556, 587, 588, 619, 620, 651, 652, 683,
+ 684, 715, 716, 747, 748, 779, 780, 811, 812, 843, 844, 875, 876, 907,
+ 908, 939, 940, 971, 972, 1003, 12, 12, 13, 44, 45, 76, 77, 108,
+ 109, 140, 141, 172, 173, 204, 205, 236, 237, 268, 269, 300, 301, 332,
+ 333, 364, 365, 396, 397, 428, 429, 460, 461, 492, 493, 524, 525, 556,
+ 557, 588, 589, 620, 621, 652, 653, 684, 685, 716, 717, 748, 749, 780,
+ 781, 812, 813, 844, 845, 876, 877, 908, 909, 940, 941, 972, 973, 1004,
+ 13, 13, 14, 45, 46, 77, 78, 109, 110, 141, 142, 173, 174, 205,
+ 206, 237, 238, 269, 270, 301, 302, 333, 334, 365, 366, 397, 398, 429,
+ 430, 461, 462, 493, 494, 525, 526, 557, 558, 589, 590, 621, 622, 653,
+ 654, 685, 686, 717, 718, 749, 750, 781, 782, 813, 814, 845, 846, 877,
+ 878, 909, 910, 941, 942, 973, 974, 1005, 14, 14, 15, 46, 47, 78,
+ 79, 110, 111, 142, 143, 174, 175, 206, 207, 238, 239, 270, 271, 302,
+ 303, 334, 335, 366, 367, 398, 399, 430, 431, 462, 463, 494, 495, 526,
+ 527, 558, 559, 590, 591, 622, 623, 654, 655, 686, 687, 718, 719, 750,
+ 751, 782, 783, 814, 815, 846, 847, 878, 879, 910, 911, 942, 943, 974,
+ 975, 1006, 15, 15, 16, 47, 48, 79, 80, 111, 112, 143, 144, 175,
+ 176, 207, 208, 239, 240, 271, 272, 303, 304, 335, 336, 367, 368, 399,
+ 400, 431, 432, 463, 464, 495, 496, 527, 528, 559, 560, 591, 592, 623,
+ 624, 655, 656, 687, 688, 719, 720, 751, 752, 783, 784, 815, 816, 847,
+ 848, 879, 880, 911, 912, 943, 944, 975, 976, 1007, 16, 16, 17, 48,
+ 49, 80, 81, 112, 113, 144, 145, 176, 177, 208, 209, 240, 241, 272,
+ 273, 304, 305, 336, 337, 368, 369, 400, 401, 432, 433, 464, 465, 496,
+ 497, 528, 529, 560, 561, 592, 593, 624, 625, 656, 657, 688, 689, 720,
+ 721, 752, 753, 784, 785, 816, 817, 848, 849, 880, 881, 912, 913, 944,
+ 945, 976, 977, 1008, 17, 17, 18, 49, 50, 81, 82, 113, 114, 145,
+ 146, 177, 178, 209, 210, 241, 242, 273, 274, 305, 306, 337, 338, 369,
+ 370, 401, 402, 433, 434, 465, 466, 497, 498, 529, 530, 561, 562, 593,
+ 594, 625, 626, 657, 658, 689, 690, 721, 722, 753, 754, 785, 786, 817,
+ 818, 849, 850, 881, 882, 913, 914, 945, 946, 977, 978, 1009, 18, 18,
+ 19, 50, 51, 82, 83, 114, 115, 146, 147, 178, 179, 210, 211, 242,
+ 243, 274, 275, 306, 307, 338, 339, 370, 371, 402, 403, 434, 435, 466,
+ 467, 498, 499, 530, 531, 562, 563, 594, 595, 626, 627, 658, 659, 690,
+ 691, 722, 723, 754, 755, 786, 787, 818, 819, 850, 851, 882, 883, 914,
+ 915, 946, 947, 978, 979, 1010, 19, 19, 20, 51, 52, 83, 84, 115,
+ 116, 147, 148, 179, 180, 211, 212, 243, 244, 275, 276, 307, 308, 339,
+ 340, 371, 372, 403, 404, 435, 436, 467, 468, 499, 500, 531, 532, 563,
+ 564, 595, 596, 627, 628, 659, 660, 691, 692, 723, 724, 755, 756, 787,
+ 788, 819, 820, 851, 852, 883, 884, 915, 916, 947, 948, 979, 980, 1011,
+ 20, 20, 21, 52, 53, 84, 85, 116, 117, 148, 149, 180, 181, 212,
+ 213, 244, 245, 276, 277, 308, 309, 340, 341, 372, 373, 404, 405, 436,
+ 437, 468, 469, 500, 501, 532, 533, 564, 565, 596, 597, 628, 629, 660,
+ 661, 692, 693, 724, 725, 756, 757, 788, 789, 820, 821, 852, 853, 884,
+ 885, 916, 917, 948, 949, 980, 981, 1012, 21, 21, 22, 53, 54, 85,
+ 86, 117, 118, 149, 150, 181, 182, 213, 214, 245, 246, 277, 278, 309,
+ 310, 341, 342, 373, 374, 405, 406, 437, 438, 469, 470, 501, 502, 533,
+ 534, 565, 566, 597, 598, 629, 630, 661, 662, 693, 694, 725, 726, 757,
+ 758, 789, 790, 821, 822, 853, 854, 885, 886, 917, 918, 949, 950, 981,
+ 982, 1013, 22, 22, 23, 54, 55, 86, 87, 118, 119, 150, 151, 182,
+ 183, 214, 215, 246, 247, 278, 279, 310, 311, 342, 343, 374, 375, 406,
+ 407, 438, 439, 470, 471, 502, 503, 534, 535, 566, 567, 598, 599, 630,
+ 631, 662, 663, 694, 695, 726, 727, 758, 759, 790, 791, 822, 823, 854,
+ 855, 886, 887, 918, 919, 950, 951, 982, 983, 1014, 23, 23, 24, 55,
+ 56, 87, 88, 119, 120, 151, 152, 183, 184, 215, 216, 247, 248, 279,
+ 280, 311, 312, 343, 344, 375, 376, 407, 408, 439, 440, 471, 472, 503,
+ 504, 535, 536, 567, 568, 599, 600, 631, 632, 663, 664, 695, 696, 727,
+ 728, 759, 760, 791, 792, 823, 824, 855, 856, 887, 888, 919, 920, 951,
+ 952, 983, 984, 1015, 24, 24, 25, 56, 57, 88, 89, 120, 121, 152,
+ 153, 184, 185, 216, 217, 248, 249, 280, 281, 312, 313, 344, 345, 376,
+ 377, 408, 409, 440, 441, 472, 473, 504, 505, 536, 537, 568, 569, 600,
+ 601, 632, 633, 664, 665, 696, 697, 728, 729, 760, 761, 792, 793, 824,
+ 825, 856, 857, 888, 889, 920, 921, 952, 953, 984, 985, 1016, 25, 25,
+ 26, 57, 58, 89, 90, 121, 122, 153, 154, 185, 186, 217, 218, 249,
+ 250, 281, 282, 313, 314, 345, 346, 377, 378, 409, 410, 441, 442, 473,
+ 474, 505, 506, 537, 538, 569, 570, 601, 602, 633, 634, 665, 666, 697,
+ 698, 729, 730, 761, 762, 793, 794, 825, 826, 857, 858, 889, 890, 921,
+ 922, 953, 954, 985, 986, 1017, 26, 26, 27, 58, 59, 90, 91, 122,
+ 123, 154, 155, 186, 187, 218, 219, 250, 251, 282, 283, 314, 315, 346,
+ 347, 378, 379, 410, 411, 442, 443, 474, 475, 506, 507, 538, 539, 570,
+ 571, 602, 603, 634, 635, 666, 667, 698, 699, 730, 731, 762, 763, 794,
+ 795, 826, 827, 858, 859, 890, 891, 922, 923, 954, 955, 986, 987, 1018,
+ 27, 27, 28, 59, 60, 91, 92, 123, 124, 155, 156, 187, 188, 219,
+ 220, 251, 252, 283, 284, 315, 316, 347, 348, 379, 380, 411, 412, 443,
+ 444, 475, 476, 507, 508, 539, 540, 571, 572, 603, 604, 635, 636, 667,
+ 668, 699, 700, 731, 732, 763, 764, 795, 796, 827, 828, 859, 860, 891,
+ 892, 923, 924, 955, 956, 987, 988, 1019, 28, 28, 29, 60, 61, 92,
+ 93, 124, 125, 156, 157, 188, 189, 220, 221, 252, 253, 284, 285, 316,
+ 317, 348, 349, 380, 381, 412, 413, 444, 445, 476, 477, 508, 509, 540,
+ 541, 572, 573, 604, 605, 636, 637, 668, 669, 700, 701, 732, 733, 764,
+ 765, 796, 797, 828, 829, 860, 861, 892, 893, 924, 925, 956, 957, 988,
+ 989, 1020, 29, 29, 30, 61, 62, 93, 94, 125, 126, 157, 158, 189,
+ 190, 221, 222, 253, 254, 285, 286, 317, 318, 349, 350, 381, 382, 413,
+ 414, 445, 446, 477, 478, 509, 510, 541, 542, 573, 574, 605, 606, 637,
+ 638, 669, 670, 701, 702, 733, 734, 765, 766, 797, 798, 829, 830, 861,
+ 862, 893, 894, 925, 926, 957, 958, 989, 990, 1021, 30, 30, 31, 62,
+ 63, 94, 95, 126, 127, 158, 159, 190, 191, 222, 223, 254, 255, 286,
+ 287, 318, 319, 350, 351, 382, 383, 414, 415, 446, 447, 478, 479, 510,
+ 511, 542, 543, 574, 575, 606, 607, 638, 639, 670, 671, 702, 703, 734,
+ 735, 766, 767, 798, 799, 830, 831, 862, 863, 894, 895, 926, 927, 958,
+ 959, 990, 991, 1022, 0, 0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ mrow_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,
+ 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12,
+ 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19,
+ 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26,
+ 27, 27, 28, 28, 29, 29, 30, 30, 0, 0, 1, 32, 2, 33,
+ 3, 34, 4, 35, 5, 36, 6, 37, 7, 38, 8, 39, 9, 40,
+ 10, 41, 11, 42, 12, 43, 13, 44, 14, 45, 15, 46, 16, 47,
+ 17, 48, 18, 49, 19, 50, 20, 51, 21, 52, 22, 53, 23, 54,
+ 24, 55, 25, 56, 26, 57, 27, 58, 28, 59, 29, 60, 30, 61,
+ 31, 62, 32, 32, 33, 64, 34, 65, 35, 66, 36, 67, 37, 68,
+ 38, 69, 39, 70, 40, 71, 41, 72, 42, 73, 43, 74, 44, 75,
+ 45, 76, 46, 77, 47, 78, 48, 79, 49, 80, 50, 81, 51, 82,
+ 52, 83, 53, 84, 54, 85, 55, 86, 56, 87, 57, 88, 58, 89,
+ 59, 90, 60, 91, 61, 92, 62, 93, 63, 94, 64, 64, 65, 96,
+ 66, 97, 67, 98, 68, 99, 69, 100, 70, 101, 71, 102, 72, 103,
+ 73, 104, 74, 105, 75, 106, 76, 107, 77, 108, 78, 109, 79, 110,
+ 80, 111, 81, 112, 82, 113, 83, 114, 84, 115, 85, 116, 86, 117,
+ 87, 118, 88, 119, 89, 120, 90, 121, 91, 122, 92, 123, 93, 124,
+ 94, 125, 95, 126, 96, 96, 97, 128, 98, 129, 99, 130, 100, 131,
+ 101, 132, 102, 133, 103, 134, 104, 135, 105, 136, 106, 137, 107, 138,
+ 108, 139, 109, 140, 110, 141, 111, 142, 112, 143, 113, 144, 114, 145,
+ 115, 146, 116, 147, 117, 148, 118, 149, 119, 150, 120, 151, 121, 152,
+ 122, 153, 123, 154, 124, 155, 125, 156, 126, 157, 127, 158, 128, 128,
+ 129, 160, 130, 161, 131, 162, 132, 163, 133, 164, 134, 165, 135, 166,
+ 136, 167, 137, 168, 138, 169, 139, 170, 140, 171, 141, 172, 142, 173,
+ 143, 174, 144, 175, 145, 176, 146, 177, 147, 178, 148, 179, 149, 180,
+ 150, 181, 151, 182, 152, 183, 153, 184, 154, 185, 155, 186, 156, 187,
+ 157, 188, 158, 189, 159, 190, 160, 160, 161, 192, 162, 193, 163, 194,
+ 164, 195, 165, 196, 166, 197, 167, 198, 168, 199, 169, 200, 170, 201,
+ 171, 202, 172, 203, 173, 204, 174, 205, 175, 206, 176, 207, 177, 208,
+ 178, 209, 179, 210, 180, 211, 181, 212, 182, 213, 183, 214, 184, 215,
+ 185, 216, 186, 217, 187, 218, 188, 219, 189, 220, 190, 221, 191, 222,
+ 192, 192, 193, 224, 194, 225, 195, 226, 196, 227, 197, 228, 198, 229,
+ 199, 230, 200, 231, 201, 232, 202, 233, 203, 234, 204, 235, 205, 236,
+ 206, 237, 207, 238, 208, 239, 209, 240, 210, 241, 211, 242, 212, 243,
+ 213, 244, 214, 245, 215, 246, 216, 247, 217, 248, 218, 249, 219, 250,
+ 220, 251, 221, 252, 222, 253, 223, 254, 224, 224, 225, 256, 226, 257,
+ 227, 258, 228, 259, 229, 260, 230, 261, 231, 262, 232, 263, 233, 264,
+ 234, 265, 235, 266, 236, 267, 237, 268, 238, 269, 239, 270, 240, 271,
+ 241, 272, 242, 273, 243, 274, 244, 275, 245, 276, 246, 277, 247, 278,
+ 248, 279, 249, 280, 250, 281, 251, 282, 252, 283, 253, 284, 254, 285,
+ 255, 286, 256, 256, 257, 288, 258, 289, 259, 290, 260, 291, 261, 292,
+ 262, 293, 263, 294, 264, 295, 265, 296, 266, 297, 267, 298, 268, 299,
+ 269, 300, 270, 301, 271, 302, 272, 303, 273, 304, 274, 305, 275, 306,
+ 276, 307, 277, 308, 278, 309, 279, 310, 280, 311, 281, 312, 282, 313,
+ 283, 314, 284, 315, 285, 316, 286, 317, 287, 318, 288, 288, 289, 320,
+ 290, 321, 291, 322, 292, 323, 293, 324, 294, 325, 295, 326, 296, 327,
+ 297, 328, 298, 329, 299, 330, 300, 331, 301, 332, 302, 333, 303, 334,
+ 304, 335, 305, 336, 306, 337, 307, 338, 308, 339, 309, 340, 310, 341,
+ 311, 342, 312, 343, 313, 344, 314, 345, 315, 346, 316, 347, 317, 348,
+ 318, 349, 319, 350, 320, 320, 321, 352, 322, 353, 323, 354, 324, 355,
+ 325, 356, 326, 357, 327, 358, 328, 359, 329, 360, 330, 361, 331, 362,
+ 332, 363, 333, 364, 334, 365, 335, 366, 336, 367, 337, 368, 338, 369,
+ 339, 370, 340, 371, 341, 372, 342, 373, 343, 374, 344, 375, 345, 376,
+ 346, 377, 347, 378, 348, 379, 349, 380, 350, 381, 351, 382, 352, 352,
+ 353, 384, 354, 385, 355, 386, 356, 387, 357, 388, 358, 389, 359, 390,
+ 360, 391, 361, 392, 362, 393, 363, 394, 364, 395, 365, 396, 366, 397,
+ 367, 398, 368, 399, 369, 400, 370, 401, 371, 402, 372, 403, 373, 404,
+ 374, 405, 375, 406, 376, 407, 377, 408, 378, 409, 379, 410, 380, 411,
+ 381, 412, 382, 413, 383, 414, 384, 384, 385, 416, 386, 417, 387, 418,
+ 388, 419, 389, 420, 390, 421, 391, 422, 392, 423, 393, 424, 394, 425,
+ 395, 426, 396, 427, 397, 428, 398, 429, 399, 430, 400, 431, 401, 432,
+ 402, 433, 403, 434, 404, 435, 405, 436, 406, 437, 407, 438, 408, 439,
+ 409, 440, 410, 441, 411, 442, 412, 443, 413, 444, 414, 445, 415, 446,
+ 416, 416, 417, 448, 418, 449, 419, 450, 420, 451, 421, 452, 422, 453,
+ 423, 454, 424, 455, 425, 456, 426, 457, 427, 458, 428, 459, 429, 460,
+ 430, 461, 431, 462, 432, 463, 433, 464, 434, 465, 435, 466, 436, 467,
+ 437, 468, 438, 469, 439, 470, 440, 471, 441, 472, 442, 473, 443, 474,
+ 444, 475, 445, 476, 446, 477, 447, 478, 448, 448, 449, 480, 450, 481,
+ 451, 482, 452, 483, 453, 484, 454, 485, 455, 486, 456, 487, 457, 488,
+ 458, 489, 459, 490, 460, 491, 461, 492, 462, 493, 463, 494, 464, 495,
+ 465, 496, 466, 497, 467, 498, 468, 499, 469, 500, 470, 501, 471, 502,
+ 472, 503, 473, 504, 474, 505, 475, 506, 476, 507, 477, 508, 478, 509,
+ 479, 510, 480, 480, 481, 512, 482, 513, 483, 514, 484, 515, 485, 516,
+ 486, 517, 487, 518, 488, 519, 489, 520, 490, 521, 491, 522, 492, 523,
+ 493, 524, 494, 525, 495, 526, 496, 527, 497, 528, 498, 529, 499, 530,
+ 500, 531, 501, 532, 502, 533, 503, 534, 504, 535, 505, 536, 506, 537,
+ 507, 538, 508, 539, 509, 540, 510, 541, 511, 542, 512, 512, 513, 544,
+ 514, 545, 515, 546, 516, 547, 517, 548, 518, 549, 519, 550, 520, 551,
+ 521, 552, 522, 553, 523, 554, 524, 555, 525, 556, 526, 557, 527, 558,
+ 528, 559, 529, 560, 530, 561, 531, 562, 532, 563, 533, 564, 534, 565,
+ 535, 566, 536, 567, 537, 568, 538, 569, 539, 570, 540, 571, 541, 572,
+ 542, 573, 543, 574, 544, 544, 545, 576, 546, 577, 547, 578, 548, 579,
+ 549, 580, 550, 581, 551, 582, 552, 583, 553, 584, 554, 585, 555, 586,
+ 556, 587, 557, 588, 558, 589, 559, 590, 560, 591, 561, 592, 562, 593,
+ 563, 594, 564, 595, 565, 596, 566, 597, 567, 598, 568, 599, 569, 600,
+ 570, 601, 571, 602, 572, 603, 573, 604, 574, 605, 575, 606, 576, 576,
+ 577, 608, 578, 609, 579, 610, 580, 611, 581, 612, 582, 613, 583, 614,
+ 584, 615, 585, 616, 586, 617, 587, 618, 588, 619, 589, 620, 590, 621,
+ 591, 622, 592, 623, 593, 624, 594, 625, 595, 626, 596, 627, 597, 628,
+ 598, 629, 599, 630, 600, 631, 601, 632, 602, 633, 603, 634, 604, 635,
+ 605, 636, 606, 637, 607, 638, 608, 608, 609, 640, 610, 641, 611, 642,
+ 612, 643, 613, 644, 614, 645, 615, 646, 616, 647, 617, 648, 618, 649,
+ 619, 650, 620, 651, 621, 652, 622, 653, 623, 654, 624, 655, 625, 656,
+ 626, 657, 627, 658, 628, 659, 629, 660, 630, 661, 631, 662, 632, 663,
+ 633, 664, 634, 665, 635, 666, 636, 667, 637, 668, 638, 669, 639, 670,
+ 640, 640, 641, 672, 642, 673, 643, 674, 644, 675, 645, 676, 646, 677,
+ 647, 678, 648, 679, 649, 680, 650, 681, 651, 682, 652, 683, 653, 684,
+ 654, 685, 655, 686, 656, 687, 657, 688, 658, 689, 659, 690, 660, 691,
+ 661, 692, 662, 693, 663, 694, 664, 695, 665, 696, 666, 697, 667, 698,
+ 668, 699, 669, 700, 670, 701, 671, 702, 672, 672, 673, 704, 674, 705,
+ 675, 706, 676, 707, 677, 708, 678, 709, 679, 710, 680, 711, 681, 712,
+ 682, 713, 683, 714, 684, 715, 685, 716, 686, 717, 687, 718, 688, 719,
+ 689, 720, 690, 721, 691, 722, 692, 723, 693, 724, 694, 725, 695, 726,
+ 696, 727, 697, 728, 698, 729, 699, 730, 700, 731, 701, 732, 702, 733,
+ 703, 734, 704, 704, 705, 736, 706, 737, 707, 738, 708, 739, 709, 740,
+ 710, 741, 711, 742, 712, 743, 713, 744, 714, 745, 715, 746, 716, 747,
+ 717, 748, 718, 749, 719, 750, 720, 751, 721, 752, 722, 753, 723, 754,
+ 724, 755, 725, 756, 726, 757, 727, 758, 728, 759, 729, 760, 730, 761,
+ 731, 762, 732, 763, 733, 764, 734, 765, 735, 766, 736, 736, 737, 768,
+ 738, 769, 739, 770, 740, 771, 741, 772, 742, 773, 743, 774, 744, 775,
+ 745, 776, 746, 777, 747, 778, 748, 779, 749, 780, 750, 781, 751, 782,
+ 752, 783, 753, 784, 754, 785, 755, 786, 756, 787, 757, 788, 758, 789,
+ 759, 790, 760, 791, 761, 792, 762, 793, 763, 794, 764, 795, 765, 796,
+ 766, 797, 767, 798, 768, 768, 769, 800, 770, 801, 771, 802, 772, 803,
+ 773, 804, 774, 805, 775, 806, 776, 807, 777, 808, 778, 809, 779, 810,
+ 780, 811, 781, 812, 782, 813, 783, 814, 784, 815, 785, 816, 786, 817,
+ 787, 818, 788, 819, 789, 820, 790, 821, 791, 822, 792, 823, 793, 824,
+ 794, 825, 795, 826, 796, 827, 797, 828, 798, 829, 799, 830, 800, 800,
+ 801, 832, 802, 833, 803, 834, 804, 835, 805, 836, 806, 837, 807, 838,
+ 808, 839, 809, 840, 810, 841, 811, 842, 812, 843, 813, 844, 814, 845,
+ 815, 846, 816, 847, 817, 848, 818, 849, 819, 850, 820, 851, 821, 852,
+ 822, 853, 823, 854, 824, 855, 825, 856, 826, 857, 827, 858, 828, 859,
+ 829, 860, 830, 861, 831, 862, 832, 832, 833, 864, 834, 865, 835, 866,
+ 836, 867, 837, 868, 838, 869, 839, 870, 840, 871, 841, 872, 842, 873,
+ 843, 874, 844, 875, 845, 876, 846, 877, 847, 878, 848, 879, 849, 880,
+ 850, 881, 851, 882, 852, 883, 853, 884, 854, 885, 855, 886, 856, 887,
+ 857, 888, 858, 889, 859, 890, 860, 891, 861, 892, 862, 893, 863, 894,
+ 864, 864, 865, 896, 866, 897, 867, 898, 868, 899, 869, 900, 870, 901,
+ 871, 902, 872, 903, 873, 904, 874, 905, 875, 906, 876, 907, 877, 908,
+ 878, 909, 879, 910, 880, 911, 881, 912, 882, 913, 883, 914, 884, 915,
+ 885, 916, 886, 917, 887, 918, 888, 919, 889, 920, 890, 921, 891, 922,
+ 892, 923, 893, 924, 894, 925, 895, 926, 896, 896, 897, 928, 898, 929,
+ 899, 930, 900, 931, 901, 932, 902, 933, 903, 934, 904, 935, 905, 936,
+ 906, 937, 907, 938, 908, 939, 909, 940, 910, 941, 911, 942, 912, 943,
+ 913, 944, 914, 945, 915, 946, 916, 947, 917, 948, 918, 949, 919, 950,
+ 920, 951, 921, 952, 922, 953, 923, 954, 924, 955, 925, 956, 926, 957,
+ 927, 958, 928, 928, 929, 960, 930, 961, 931, 962, 932, 963, 933, 964,
+ 934, 965, 935, 966, 936, 967, 937, 968, 938, 969, 939, 970, 940, 971,
+ 941, 972, 942, 973, 943, 974, 944, 975, 945, 976, 946, 977, 947, 978,
+ 948, 979, 949, 980, 950, 981, 951, 982, 952, 983, 953, 984, 954, 985,
+ 955, 986, 956, 987, 957, 988, 958, 989, 959, 990, 960, 960, 961, 992,
+ 962, 993, 963, 994, 964, 995, 965, 996, 966, 997, 967, 998, 968, 999,
+ 969, 1000, 970, 1001, 971, 1002, 972, 1003, 973, 1004, 974, 1005, 975, 1006,
+ 976, 1007, 977, 1008, 978, 1009, 979, 1010, 980, 1011, 981, 1012, 982, 1013,
+ 983, 1014, 984, 1015, 985, 1016, 986, 1017, 987, 1018, 988, 1019, 989, 1020,
+ 990, 1021, 991, 1022, 0, 0,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t,
+ default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 32, 0, 32, 32, 1, 32, 33, 1, 64, 64,
+ 33, 64, 2, 33, 96, 96, 34, 2, 65, 96, 34, 65, 128, 128,
+ 97, 128, 3, 34, 66, 97, 35, 3, 35, 66, 98, 129, 129, 160,
+ 160, 161, 4, 35, 67, 98, 192, 192, 36, 4, 130, 161, 161, 192,
+ 36, 67, 99, 130, 5, 36, 68, 99, 193, 224, 162, 193, 224, 225,
+ 131, 162, 37, 68, 100, 131, 37, 5, 194, 225, 225, 256, 256, 257,
+ 163, 194, 69, 100, 132, 163, 6, 37, 226, 257, 38, 6, 195, 226,
+ 257, 288, 101, 132, 288, 289, 38, 69, 164, 195, 133, 164, 258, 289,
+ 227, 258, 196, 227, 7, 38, 289, 320, 70, 101, 320, 321, 39, 7,
+ 165, 196, 39, 70, 102, 133, 290, 321, 259, 290, 228, 259, 321, 352,
+ 352, 353, 197, 228, 134, 165, 71, 102, 8, 39, 322, 353, 291, 322,
+ 260, 291, 103, 134, 353, 384, 166, 197, 229, 260, 40, 71, 40, 8,
+ 384, 385, 135, 166, 354, 385, 323, 354, 198, 229, 292, 323, 72, 103,
+ 261, 292, 9, 40, 385, 416, 167, 198, 104, 135, 230, 261, 355, 386,
+ 416, 417, 293, 324, 324, 355, 41, 9, 41, 72, 386, 417, 199, 230,
+ 136, 167, 417, 448, 262, 293, 356, 387, 73, 104, 387, 418, 231, 262,
+ 10, 41, 168, 199, 325, 356, 418, 449, 105, 136, 448, 449, 42, 73,
+ 294, 325, 200, 231, 42, 10, 357, 388, 137, 168, 263, 294, 388, 419,
+ 74, 105, 419, 450, 449, 480, 326, 357, 232, 263, 295, 326, 169, 200,
+ 11, 42, 106, 137, 480, 481, 450, 481, 358, 389, 264, 295, 201, 232,
+ 138, 169, 389, 420, 43, 74, 420, 451, 327, 358, 43, 11, 481, 512,
+ 233, 264, 451, 482, 296, 327, 75, 106, 170, 201, 482, 513, 512, 513,
+ 390, 421, 359, 390, 421, 452, 107, 138, 12, 43, 202, 233, 452, 483,
+ 265, 296, 328, 359, 139, 170, 44, 75, 483, 514, 513, 544, 234, 265,
+ 297, 328, 422, 453, 44, 12, 391, 422, 171, 202, 76, 107, 514, 545,
+ 453, 484, 544, 545, 266, 297, 203, 234, 108, 139, 329, 360, 298, 329,
+ 140, 171, 515, 546, 13, 44, 423, 454, 235, 266, 545, 576, 454, 485,
+ 45, 76, 172, 203, 330, 361, 576, 577, 45, 13, 267, 298, 546, 577,
+ 77, 108, 204, 235, 455, 486, 577, 608, 299, 330, 109, 140, 547, 578,
+ 14, 45, 46, 14, 141, 172, 578, 609, 331, 362, 46, 77, 173, 204,
+ 15, 15, 78, 109, 205, 236, 579, 610, 110, 141, 15, 46, 142, 173,
+ 47, 78, 174, 205, 16, 16, 79, 110, 206, 237, 16, 47, 111, 142,
+ 48, 79, 143, 174, 80, 111, 175, 206, 17, 48, 49, 17, 207, 238,
+ 49, 80, 81, 112, 18, 18, 18, 49, 50, 81, 82, 113, 19, 50,
+ 51, 82, 83, 114, 608, 609, 484, 515, 360, 391, 236, 267, 112, 143,
+ 51, 19, 640, 640, 609, 640, 516, 547, 485, 516, 392, 423, 361, 392,
+ 268, 299, 237, 268, 144, 175, 113, 144, 20, 51, 52, 20, 672, 672,
+ 641, 672, 610, 641, 548, 579, 517, 548, 486, 517, 424, 455, 393, 424,
+ 362, 393, 300, 331, 269, 300, 238, 269, 176, 207, 145, 176, 114, 145,
+ 52, 83, 21, 52, 53, 21, 704, 704, 673, 704, 642, 673, 611, 642,
+ 580, 611, 549, 580, 518, 549, 487, 518, 456, 487, 425, 456, 394, 425,
+ 363, 394, 332, 363, 301, 332, 270, 301, 239, 270, 208, 239, 177, 208,
+ 146, 177, 115, 146, 84, 115, 53, 84, 22, 53, 54, 22, 705, 736,
+ 674, 705, 643, 674, 581, 612, 550, 581, 519, 550, 457, 488, 426, 457,
+ 395, 426, 333, 364, 302, 333, 271, 302, 209, 240, 178, 209, 147, 178,
+ 85, 116, 54, 85, 23, 54, 706, 737, 675, 706, 582, 613, 551, 582,
+ 458, 489, 427, 458, 334, 365, 303, 334, 210, 241, 179, 210, 86, 117,
+ 55, 86, 707, 738, 583, 614, 459, 490, 335, 366, 211, 242, 87, 118,
+ 736, 737, 612, 643, 488, 519, 364, 395, 240, 271, 116, 147, 55, 23,
+ 768, 768, 737, 768, 644, 675, 613, 644, 520, 551, 489, 520, 396, 427,
+ 365, 396, 272, 303, 241, 272, 148, 179, 117, 148, 24, 55, 56, 24,
+ 800, 800, 769, 800, 738, 769, 676, 707, 645, 676, 614, 645, 552, 583,
+ 521, 552, 490, 521, 428, 459, 397, 428, 366, 397, 304, 335, 273, 304,
+ 242, 273, 180, 211, 149, 180, 118, 149, 56, 87, 25, 56, 57, 25,
+ 832, 832, 801, 832, 770, 801, 739, 770, 708, 739, 677, 708, 646, 677,
+ 615, 646, 584, 615, 553, 584, 522, 553, 491, 522, 460, 491, 429, 460,
+ 398, 429, 367, 398, 336, 367, 305, 336, 274, 305, 243, 274, 212, 243,
+ 181, 212, 150, 181, 119, 150, 88, 119, 57, 88, 26, 57, 58, 26,
+ 833, 864, 802, 833, 771, 802, 709, 740, 678, 709, 647, 678, 585, 616,
+ 554, 585, 523, 554, 461, 492, 430, 461, 399, 430, 337, 368, 306, 337,
+ 275, 306, 213, 244, 182, 213, 151, 182, 89, 120, 58, 89, 27, 58,
+ 834, 865, 803, 834, 710, 741, 679, 710, 586, 617, 555, 586, 462, 493,
+ 431, 462, 338, 369, 307, 338, 214, 245, 183, 214, 90, 121, 59, 90,
+ 835, 866, 711, 742, 587, 618, 463, 494, 339, 370, 215, 246, 91, 122,
+ 864, 865, 740, 771, 616, 647, 492, 523, 368, 399, 244, 275, 120, 151,
+ 59, 27, 896, 896, 865, 896, 772, 803, 741, 772, 648, 679, 617, 648,
+ 524, 555, 493, 524, 400, 431, 369, 400, 276, 307, 245, 276, 152, 183,
+ 121, 152, 28, 59, 60, 28, 928, 928, 897, 928, 866, 897, 804, 835,
+ 773, 804, 742, 773, 680, 711, 649, 680, 618, 649, 556, 587, 525, 556,
+ 494, 525, 432, 463, 401, 432, 370, 401, 308, 339, 277, 308, 246, 277,
+ 184, 215, 153, 184, 122, 153, 60, 91, 29, 60, 61, 29, 960, 960,
+ 929, 960, 898, 929, 867, 898, 836, 867, 805, 836, 774, 805, 743, 774,
+ 712, 743, 681, 712, 650, 681, 619, 650, 588, 619, 557, 588, 526, 557,
+ 495, 526, 464, 495, 433, 464, 402, 433, 371, 402, 340, 371, 309, 340,
+ 278, 309, 247, 278, 216, 247, 185, 216, 154, 185, 123, 154, 92, 123,
+ 61, 92, 30, 61, 62, 30, 961, 992, 930, 961, 899, 930, 837, 868,
+ 806, 837, 775, 806, 713, 744, 682, 713, 651, 682, 589, 620, 558, 589,
+ 527, 558, 465, 496, 434, 465, 403, 434, 341, 372, 310, 341, 279, 310,
+ 217, 248, 186, 217, 155, 186, 93, 124, 62, 93, 31, 62, 962, 993,
+ 931, 962, 838, 869, 807, 838, 714, 745, 683, 714, 590, 621, 559, 590,
+ 466, 497, 435, 466, 342, 373, 311, 342, 218, 249, 187, 218, 94, 125,
+ 63, 94, 963, 994, 839, 870, 715, 746, 591, 622, 467, 498, 343, 374,
+ 219, 250, 95, 126, 868, 899, 744, 775, 620, 651, 496, 527, 372, 403,
+ 248, 279, 124, 155, 900, 931, 869, 900, 776, 807, 745, 776, 652, 683,
+ 621, 652, 528, 559, 497, 528, 404, 435, 373, 404, 280, 311, 249, 280,
+ 156, 187, 125, 156, 932, 963, 901, 932, 870, 901, 808, 839, 777, 808,
+ 746, 777, 684, 715, 653, 684, 622, 653, 560, 591, 529, 560, 498, 529,
+ 436, 467, 405, 436, 374, 405, 312, 343, 281, 312, 250, 281, 188, 219,
+ 157, 188, 126, 157, 964, 995, 933, 964, 902, 933, 871, 902, 840, 871,
+ 809, 840, 778, 809, 747, 778, 716, 747, 685, 716, 654, 685, 623, 654,
+ 592, 623, 561, 592, 530, 561, 499, 530, 468, 499, 437, 468, 406, 437,
+ 375, 406, 344, 375, 313, 344, 282, 313, 251, 282, 220, 251, 189, 220,
+ 158, 189, 127, 158, 965, 996, 934, 965, 903, 934, 841, 872, 810, 841,
+ 779, 810, 717, 748, 686, 717, 655, 686, 593, 624, 562, 593, 531, 562,
+ 469, 500, 438, 469, 407, 438, 345, 376, 314, 345, 283, 314, 221, 252,
+ 190, 221, 159, 190, 966, 997, 935, 966, 842, 873, 811, 842, 718, 749,
+ 687, 718, 594, 625, 563, 594, 470, 501, 439, 470, 346, 377, 315, 346,
+ 222, 253, 191, 222, 967, 998, 843, 874, 719, 750, 595, 626, 471, 502,
+ 347, 378, 223, 254, 872, 903, 748, 779, 624, 655, 500, 531, 376, 407,
+ 252, 283, 904, 935, 873, 904, 780, 811, 749, 780, 656, 687, 625, 656,
+ 532, 563, 501, 532, 408, 439, 377, 408, 284, 315, 253, 284, 936, 967,
+ 905, 936, 874, 905, 812, 843, 781, 812, 750, 781, 688, 719, 657, 688,
+ 626, 657, 564, 595, 533, 564, 502, 533, 440, 471, 409, 440, 378, 409,
+ 316, 347, 285, 316, 254, 285, 968, 999, 937, 968, 906, 937, 875, 906,
+ 844, 875, 813, 844, 782, 813, 751, 782, 720, 751, 689, 720, 658, 689,
+ 627, 658, 596, 627, 565, 596, 534, 565, 503, 534, 472, 503, 441, 472,
+ 410, 441, 379, 410, 348, 379, 317, 348, 286, 317, 255, 286, 969, 1000,
+ 938, 969, 907, 938, 845, 876, 814, 845, 783, 814, 721, 752, 690, 721,
+ 659, 690, 597, 628, 566, 597, 535, 566, 473, 504, 442, 473, 411, 442,
+ 349, 380, 318, 349, 287, 318, 970, 1001, 939, 970, 846, 877, 815, 846,
+ 722, 753, 691, 722, 598, 629, 567, 598, 474, 505, 443, 474, 350, 381,
+ 319, 350, 971, 1002, 847, 878, 723, 754, 599, 630, 475, 506, 351, 382,
+ 876, 907, 752, 783, 628, 659, 504, 535, 380, 411, 908, 939, 877, 908,
+ 784, 815, 753, 784, 660, 691, 629, 660, 536, 567, 505, 536, 412, 443,
+ 381, 412, 940, 971, 909, 940, 878, 909, 816, 847, 785, 816, 754, 785,
+ 692, 723, 661, 692, 630, 661, 568, 599, 537, 568, 506, 537, 444, 475,
+ 413, 444, 382, 413, 972, 1003, 941, 972, 910, 941, 879, 910, 848, 879,
+ 817, 848, 786, 817, 755, 786, 724, 755, 693, 724, 662, 693, 631, 662,
+ 600, 631, 569, 600, 538, 569, 507, 538, 476, 507, 445, 476, 414, 445,
+ 383, 414, 973, 1004, 942, 973, 911, 942, 849, 880, 818, 849, 787, 818,
+ 725, 756, 694, 725, 663, 694, 601, 632, 570, 601, 539, 570, 477, 508,
+ 446, 477, 415, 446, 974, 1005, 943, 974, 850, 881, 819, 850, 726, 757,
+ 695, 726, 602, 633, 571, 602, 478, 509, 447, 478, 975, 1006, 851, 882,
+ 727, 758, 603, 634, 479, 510, 880, 911, 756, 787, 632, 663, 508, 539,
+ 912, 943, 881, 912, 788, 819, 757, 788, 664, 695, 633, 664, 540, 571,
+ 509, 540, 944, 975, 913, 944, 882, 913, 820, 851, 789, 820, 758, 789,
+ 696, 727, 665, 696, 634, 665, 572, 603, 541, 572, 510, 541, 976, 1007,
+ 945, 976, 914, 945, 883, 914, 852, 883, 821, 852, 790, 821, 759, 790,
+ 728, 759, 697, 728, 666, 697, 635, 666, 604, 635, 573, 604, 542, 573,
+ 511, 542, 977, 1008, 946, 977, 915, 946, 853, 884, 822, 853, 791, 822,
+ 729, 760, 698, 729, 667, 698, 605, 636, 574, 605, 543, 574, 978, 1009,
+ 947, 978, 854, 885, 823, 854, 730, 761, 699, 730, 606, 637, 575, 606,
+ 979, 1010, 855, 886, 731, 762, 607, 638, 884, 915, 760, 791, 636, 667,
+ 916, 947, 885, 916, 792, 823, 761, 792, 668, 699, 637, 668, 948, 979,
+ 917, 948, 886, 917, 824, 855, 793, 824, 762, 793, 700, 731, 669, 700,
+ 638, 669, 980, 1011, 949, 980, 918, 949, 887, 918, 856, 887, 825, 856,
+ 794, 825, 763, 794, 732, 763, 701, 732, 670, 701, 639, 670, 981, 1012,
+ 950, 981, 919, 950, 857, 888, 826, 857, 795, 826, 733, 764, 702, 733,
+ 671, 702, 982, 1013, 951, 982, 858, 889, 827, 858, 734, 765, 703, 734,
+ 983, 1014, 859, 890, 735, 766, 888, 919, 764, 795, 920, 951, 889, 920,
+ 796, 827, 765, 796, 952, 983, 921, 952, 890, 921, 828, 859, 797, 828,
+ 766, 797, 984, 1015, 953, 984, 922, 953, 891, 922, 860, 891, 829, 860,
+ 798, 829, 767, 798, 985, 1016, 954, 985, 923, 954, 861, 892, 830, 861,
+ 799, 830, 986, 1017, 955, 986, 862, 893, 831, 862, 987, 1018, 863, 894,
+ 892, 923, 924, 955, 893, 924, 956, 987, 925, 956, 894, 925, 988, 1019,
+ 957, 988, 926, 957, 895, 926, 989, 1020, 958, 989, 927, 958, 990, 1021,
+ 959, 990, 991, 1022, 0, 0,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t,
+ v2_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 0, 0, 1, 32, 1, 1, 32, 32, 2, 33,
+ 33, 64, 34, 65, 2, 2, 64, 64, 3, 34, 65, 96, 35, 66,
+ 66, 97, 3, 3, 96, 96, 4, 35, 97, 128, 67, 98, 36, 67,
+ 98, 129, 4, 4, 68, 99, 99, 130, 128, 128, 5, 36, 129, 160,
+ 37, 68, 130, 161, 100, 131, 69, 100, 131, 162, 5, 5, 160, 160,
+ 6, 37, 161, 192, 38, 69, 162, 193, 101, 132, 132, 163, 70, 101,
+ 163, 194, 6, 6, 192, 192, 7, 38, 133, 164, 193, 224, 102, 133,
+ 164, 195, 39, 70, 194, 225, 71, 102, 195, 226, 134, 165, 165, 196,
+ 7, 7, 224, 224, 8, 39, 103, 134, 196, 227, 225, 256, 40, 71,
+ 226, 257, 166, 197, 72, 103, 227, 258, 135, 166, 197, 228, 104, 135,
+ 228, 259, 8, 8, 256, 256, 9, 40, 257, 288, 41, 72, 167, 198,
+ 198, 229, 258, 289, 136, 167, 229, 260, 73, 104, 259, 290, 105, 136,
+ 260, 291, 199, 230, 9, 9, 168, 199, 230, 261, 288, 288, 10, 41,
+ 289, 320, 42, 73, 290, 321, 137, 168, 261, 292, 74, 105, 291, 322,
+ 200, 231, 231, 262, 106, 137, 292, 323, 169, 200, 262, 293, 10, 10,
+ 320, 320, 11, 42, 321, 352, 43, 74, 138, 169, 293, 324, 322, 353,
+ 232, 263, 75, 106, 201, 232, 263, 294, 323, 354, 170, 201, 294, 325,
+ 107, 138, 324, 355, 11, 11, 352, 352, 12, 43, 233, 264, 264, 295,
+ 353, 384, 139, 170, 325, 356, 44, 75, 354, 385, 202, 233, 295, 326,
+ 76, 107, 355, 386, 171, 202, 326, 357, 108, 139, 356, 387, 265, 296,
+ 234, 265, 296, 327, 12, 12, 140, 171, 357, 388, 384, 384, 13, 44,
+ 203, 234, 327, 358, 385, 416, 45, 76, 386, 417, 77, 108, 387, 418,
+ 172, 203, 358, 389, 266, 297, 297, 328, 109, 140, 235, 266, 328, 359,
+ 388, 419, 204, 235, 359, 390, 141, 172, 389, 420, 13, 13, 416, 416,
+ 14, 45, 417, 448, 46, 77, 298, 329, 418, 449, 267, 298, 329, 360,
+ 78, 109, 173, 204, 390, 421, 419, 450, 236, 267, 360, 391, 110, 141,
+ 420, 451, 205, 236, 391, 422, 142, 173, 299, 330, 330, 361, 421, 452,
+ 14, 14, 268, 299, 361, 392, 448, 448, 15, 46, 449, 480, 47, 78,
+ 450, 481, 174, 205, 422, 453, 237, 268, 392, 423, 79, 110, 451, 482,
+ 111, 142, 452, 483, 331, 362, 300, 331, 362, 393, 206, 237, 423, 454,
+ 143, 174, 269, 300, 393, 424, 453, 484, 480, 480, 481, 512, 238, 269,
+ 424, 455, 482, 513, 175, 206, 454, 485, 332, 363, 363, 394, 483, 514,
+ 301, 332, 394, 425, 484, 515, 207, 238, 455, 486, 270, 301, 425, 456,
+ 485, 516, 364, 395, 239, 270, 456, 487, 512, 512, 333, 364, 395, 426,
+ 513, 544, 486, 517, 514, 545, 302, 333, 426, 457, 515, 546, 487, 518,
+ 516, 547, 271, 302, 457, 488, 365, 396, 396, 427, 517, 548, 334, 365,
+ 427, 458, 488, 519, 544, 544, 303, 334, 458, 489, 518, 549, 545, 576,
+ 546, 577, 547, 578, 489, 520, 397, 428, 519, 550, 366, 397, 428, 459,
+ 548, 579, 335, 366, 459, 490, 549, 580, 520, 551, 490, 521, 550, 581,
+ 576, 576, 577, 608, 398, 429, 429, 460, 578, 609, 367, 398, 460, 491,
+ 521, 552, 579, 610, 551, 582, 491, 522, 580, 611, 581, 612, 552, 583,
+ 522, 553, 430, 461, 399, 430, 461, 492, 582, 613, 492, 523, 608, 608,
+ 609, 640, 610, 641, 553, 584, 611, 642, 523, 554, 583, 614, 612, 643,
+ 431, 462, 462, 493, 554, 585, 493, 524, 584, 615, 613, 644, 524, 555,
+ 614, 645, 640, 640, 585, 616, 641, 672, 555, 586, 642, 673, 615, 646,
+ 463, 494, 643, 674, 494, 525, 644, 675, 525, 556, 586, 617, 616, 647,
+ 645, 676, 556, 587, 646, 677, 495, 526, 617, 648, 587, 618, 672, 672,
+ 526, 557, 673, 704, 674, 705, 647, 678, 557, 588, 675, 706, 618, 649,
+ 676, 707, 588, 619, 648, 679, 677, 708, 527, 558, 558, 589, 678, 709,
+ 619, 650, 649, 680, 704, 704, 589, 620, 705, 736, 679, 710, 706, 737,
+ 707, 738, 650, 681, 620, 651, 708, 739, 680, 711, 559, 590, 709, 740,
+ 590, 621, 651, 682, 681, 712, 710, 741, 621, 652, 736, 736, 737, 768,
+ 711, 742, 738, 769, 682, 713, 652, 683, 739, 770, 591, 622, 740, 771,
+ 712, 743, 622, 653, 741, 772, 683, 714, 653, 684, 713, 744, 742, 773,
+ 623, 654, 743, 774, 768, 768, 769, 800, 684, 715, 714, 745, 770, 801,
+ 771, 802, 654, 685, 744, 775, 772, 803, 715, 746, 773, 804, 685, 716,
+ 745, 776, 774, 805, 655, 686, 716, 747, 775, 806, 746, 777, 800, 800,
+ 801, 832, 686, 717, 802, 833, 803, 834, 776, 807, 804, 835, 747, 778,
+ 717, 748, 805, 836, 777, 808, 687, 718, 806, 837, 748, 779, 718, 749,
+ 778, 809, 807, 838, 832, 832, 833, 864, 834, 865, 835, 866, 808, 839,
+ 749, 780, 836, 867, 779, 810, 719, 750, 837, 868, 809, 840, 838, 869,
+ 780, 811, 750, 781, 810, 841, 839, 870, 864, 864, 865, 896, 866, 897,
+ 840, 871, 867, 898, 781, 812, 811, 842, 868, 899, 751, 782, 869, 900,
+ 841, 872, 812, 843, 870, 901, 782, 813, 842, 873, 871, 902, 896, 896,
+ 897, 928, 813, 844, 898, 929, 872, 903, 783, 814, 843, 874, 899, 930,
+ 900, 931, 873, 904, 901, 932, 814, 845, 844, 875, 902, 933, 874, 905,
+ 903, 934, 845, 876, 928, 928, 815, 846, 929, 960, 930, 961, 875, 906,
+ 904, 935, 931, 962, 932, 963, 905, 936, 846, 877, 933, 964, 876, 907,
+ 934, 965, 906, 937, 935, 966, 877, 908, 847, 878, 960, 960, 907, 938,
+ 961, 992, 936, 967, 962, 993, 963, 994, 964, 995, 878, 909, 937, 968,
+ 908, 939, 965, 996, 966, 997, 938, 969, 879, 910, 909, 940, 967, 998,
+ 939, 970, 968, 999, 910, 941, 969, 1000, 940, 971, 970, 1001, 911, 942,
+ 941, 972, 971, 1002, 942, 973, 972, 1003, 943, 974, 973, 1004, 974, 1005,
+ 975, 1006, 15, 15, 16, 47, 48, 79, 80, 111, 112, 143, 144, 175,
+ 16, 16, 17, 48, 176, 207, 49, 80, 81, 112, 113, 144, 208, 239,
+ 145, 176, 240, 271, 17, 17, 18, 49, 177, 208, 50, 81, 82, 113,
+ 272, 303, 209, 240, 114, 145, 146, 177, 241, 272, 304, 335, 178, 209,
+ 18, 18, 19, 50, 51, 82, 83, 114, 273, 304, 210, 241, 115, 146,
+ 336, 367, 147, 178, 242, 273, 305, 336, 179, 210, 19, 19, 368, 399,
+ 20, 51, 52, 83, 274, 305, 84, 115, 211, 242, 337, 368, 116, 147,
+ 306, 337, 148, 179, 243, 274, 400, 431, 369, 400, 180, 211, 20, 20,
+ 21, 52, 275, 306, 53, 84, 338, 369, 212, 243, 85, 116, 432, 463,
+ 117, 148, 401, 432, 307, 338, 244, 275, 149, 180, 370, 401, 181, 212,
+ 276, 307, 464, 495, 339, 370, 21, 21, 22, 53, 433, 464, 54, 85,
+ 213, 244, 86, 117, 402, 433, 118, 149, 308, 339, 245, 276, 371, 402,
+ 150, 181, 496, 527, 465, 496, 182, 213, 434, 465, 340, 371, 277, 308,
+ 22, 22, 23, 54, 403, 434, 55, 86, 214, 245, 87, 118, 309, 340,
+ 372, 403, 119, 150, 497, 528, 528, 559, 246, 277, 466, 497, 151, 182,
+ 435, 466, 341, 372, 183, 214, 278, 309, 404, 435, 23, 23, 24, 55,
+ 215, 246, 529, 560, 56, 87, 498, 529, 560, 591, 310, 341, 88, 119,
+ 373, 404, 467, 498, 120, 151, 247, 278, 436, 467, 152, 183, 342, 373,
+ 279, 310, 405, 436, 184, 215, 530, 561, 561, 592, 499, 530, 592, 623,
+ 24, 24, 216, 247, 468, 499, 25, 56, 374, 405, 57, 88, 311, 342,
+ 89, 120, 437, 468, 248, 279, 121, 152, 562, 593, 153, 184, 343, 374,
+ 531, 562, 593, 624, 406, 437, 500, 531, 624, 655, 280, 311, 185, 216,
+ 469, 500, 375, 406, 217, 248, 25, 25, 312, 343, 26, 57, 58, 89,
+ 438, 469, 90, 121, 563, 594, 594, 625, 249, 280, 532, 563, 625, 656,
+ 122, 153, 344, 375, 501, 532, 656, 687, 407, 438, 154, 185, 281, 312,
+ 470, 501, 186, 217, 376, 407, 595, 626, 564, 595, 626, 657, 218, 249,
+ 313, 344, 439, 470, 26, 26, 27, 58, 533, 564, 657, 688, 59, 90,
+ 91, 122, 250, 281, 502, 533, 688, 719, 123, 154, 408, 439, 345, 376,
+ 155, 186, 471, 502, 282, 313, 596, 627, 627, 658, 187, 218, 565, 596,
+ 658, 689, 377, 408, 440, 471, 534, 565, 689, 720, 314, 345, 219, 250,
+ 27, 27, 28, 59, 503, 534, 720, 751, 60, 91, 92, 123, 251, 282,
+ 409, 440, 346, 377, 124, 155, 628, 659, 472, 503, 597, 628, 659, 690,
+ 566, 597, 690, 721, 156, 187, 283, 314, 535, 566, 721, 752, 188, 219,
+ 378, 409, 441, 472, 315, 346, 504, 535, 752, 783, 220, 251, 28, 28,
+ 629, 660, 660, 691, 29, 60, 61, 92, 410, 441, 598, 629, 691, 722,
+ 252, 283, 93, 124, 347, 378, 473, 504, 567, 598, 722, 753, 125, 156,
+ 284, 315, 536, 567, 753, 784, 157, 188, 442, 473, 379, 410, 189, 220,
+ 505, 536, 784, 815, 661, 692, 316, 347, 630, 661, 692, 723, 221, 252,
+ 599, 630, 723, 754, 411, 442, 29, 29, 568, 599, 754, 785, 30, 61,
+ 474, 505, 62, 93, 253, 284, 348, 379, 94, 125, 537, 568, 785, 816,
+ 126, 157, 285, 316, 158, 189, 443, 474, 662, 693, 693, 724, 380, 411,
+ 631, 662, 724, 755, 506, 537, 816, 847, 190, 221, 600, 631, 755, 786,
+ 317, 348, 222, 253, 569, 600, 786, 817, 412, 443, 475, 506, 30, 30,
+ 31, 62, 349, 380, 254, 285, 63, 94, 538, 569, 817, 848, 694, 725,
+ 95, 126, 663, 694, 725, 756, 632, 663, 756, 787, 127, 158, 444, 475,
+ 286, 317, 381, 412, 507, 538, 848, 879, 159, 190, 601, 632, 787, 818,
+ 191, 222, 318, 349, 570, 601, 818, 849, 476, 507, 223, 254, 413, 444,
+ 695, 726, 726, 757, 664, 695, 757, 788, 539, 570, 849, 880, 350, 381,
+ 255, 286, 633, 664, 788, 819, 445, 476, 602, 633, 819, 850, 508, 539,
+ 880, 911, 287, 318, 382, 413, 571, 602, 850, 881, 727, 758, 696, 727,
+ 758, 789, 319, 350, 477, 508, 665, 696, 789, 820, 414, 445, 540, 571,
+ 881, 912, 634, 665, 820, 851, 351, 382, 603, 634, 851, 882, 446, 477,
+ 509, 540, 912, 943, 383, 414, 728, 759, 759, 790, 572, 603, 882, 913,
+ 697, 728, 790, 821, 666, 697, 821, 852, 478, 509, 635, 666, 852, 883,
+ 415, 446, 541, 572, 913, 944, 604, 635, 883, 914, 760, 791, 729, 760,
+ 791, 822, 510, 541, 944, 975, 447, 478, 698, 729, 822, 853, 573, 604,
+ 914, 945, 667, 698, 853, 884, 636, 667, 884, 915, 479, 510, 542, 573,
+ 945, 976, 761, 792, 792, 823, 605, 636, 915, 946, 730, 761, 823, 854,
+ 699, 730, 854, 885, 511, 542, 976, 1007, 574, 605, 946, 977, 668, 699,
+ 885, 916, 637, 668, 916, 947, 543, 574, 793, 824, 977, 1008, 762, 793,
+ 824, 855, 731, 762, 855, 886, 606, 637, 947, 978, 700, 731, 886, 917,
+ 669, 700, 917, 948, 575, 606, 978, 1009, 638, 669, 948, 979, 794, 825,
+ 825, 856, 763, 794, 856, 887, 732, 763, 887, 918, 607, 638, 979, 1010,
+ 701, 732, 918, 949, 670, 701, 949, 980, 826, 857, 795, 826, 857, 888,
+ 764, 795, 888, 919, 639, 670, 980, 1011, 733, 764, 919, 950, 702, 733,
+ 950, 981, 671, 702, 981, 1012, 827, 858, 858, 889, 796, 827, 889, 920,
+ 765, 796, 920, 951, 734, 765, 951, 982, 703, 734, 982, 1013, 859, 890,
+ 828, 859, 890, 921, 797, 828, 921, 952, 766, 797, 952, 983, 735, 766,
+ 983, 1014, 860, 891, 891, 922, 829, 860, 922, 953, 798, 829, 953, 984,
+ 767, 798, 984, 1015, 892, 923, 861, 892, 923, 954, 830, 861, 954, 985,
+ 799, 830, 985, 1016, 893, 924, 924, 955, 862, 893, 955, 986, 831, 862,
+ 986, 1017, 925, 956, 894, 925, 956, 987, 863, 894, 987, 1018, 926, 957,
+ 957, 988, 895, 926, 988, 1019, 958, 989, 927, 958, 989, 1020, 959, 990,
+ 990, 1021, 991, 1022, 0, 0,
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ h2_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 0, 0, 1, 32, 1, 1, 32, 32, 2, 33,
+ 33, 64, 34, 65, 2, 2, 64, 64, 3, 34, 65, 96, 35, 66,
+ 66, 97, 3, 3, 96, 96, 4, 35, 97, 128, 67, 98, 36, 67,
+ 98, 129, 4, 4, 68, 99, 99, 130, 128, 128, 5, 36, 129, 160,
+ 37, 68, 130, 161, 100, 131, 69, 100, 131, 162, 5, 5, 160, 160,
+ 6, 37, 161, 192, 38, 69, 162, 193, 101, 132, 132, 163, 70, 101,
+ 163, 194, 6, 6, 192, 192, 7, 38, 133, 164, 193, 224, 102, 133,
+ 164, 195, 39, 70, 194, 225, 71, 102, 195, 226, 134, 165, 165, 196,
+ 7, 7, 224, 224, 8, 39, 103, 134, 196, 227, 225, 256, 40, 71,
+ 226, 257, 166, 197, 72, 103, 227, 258, 135, 166, 197, 228, 104, 135,
+ 228, 259, 8, 8, 256, 256, 9, 40, 257, 288, 41, 72, 167, 198,
+ 198, 229, 258, 289, 136, 167, 229, 260, 73, 104, 259, 290, 105, 136,
+ 260, 291, 199, 230, 9, 9, 168, 199, 230, 261, 288, 288, 10, 41,
+ 289, 320, 42, 73, 290, 321, 137, 168, 261, 292, 74, 105, 291, 322,
+ 200, 231, 231, 262, 106, 137, 292, 323, 169, 200, 262, 293, 10, 10,
+ 320, 320, 11, 42, 321, 352, 43, 74, 138, 169, 293, 324, 322, 353,
+ 232, 263, 75, 106, 201, 232, 263, 294, 323, 354, 170, 201, 294, 325,
+ 107, 138, 324, 355, 11, 11, 352, 352, 12, 43, 233, 264, 264, 295,
+ 353, 384, 139, 170, 325, 356, 44, 75, 354, 385, 202, 233, 295, 326,
+ 76, 107, 355, 386, 171, 202, 326, 357, 108, 139, 356, 387, 265, 296,
+ 234, 265, 296, 327, 12, 12, 140, 171, 357, 388, 384, 384, 13, 44,
+ 203, 234, 327, 358, 385, 416, 45, 76, 386, 417, 77, 108, 387, 418,
+ 172, 203, 358, 389, 266, 297, 297, 328, 109, 140, 235, 266, 328, 359,
+ 388, 419, 204, 235, 359, 390, 141, 172, 389, 420, 13, 13, 416, 416,
+ 14, 45, 417, 448, 46, 77, 298, 329, 418, 449, 267, 298, 329, 360,
+ 78, 109, 173, 204, 390, 421, 419, 450, 236, 267, 360, 391, 110, 141,
+ 420, 451, 205, 236, 391, 422, 142, 173, 299, 330, 330, 361, 421, 452,
+ 14, 14, 268, 299, 361, 392, 448, 448, 15, 46, 449, 480, 47, 78,
+ 450, 481, 174, 205, 422, 453, 237, 268, 392, 423, 79, 110, 451, 482,
+ 111, 142, 452, 483, 331, 362, 300, 331, 362, 393, 206, 237, 423, 454,
+ 143, 174, 269, 300, 393, 424, 453, 484, 15, 15, 16, 47, 48, 79,
+ 238, 269, 424, 455, 175, 206, 454, 485, 80, 111, 332, 363, 363, 394,
+ 301, 332, 394, 425, 112, 143, 207, 238, 455, 486, 270, 301, 425, 456,
+ 144, 175, 364, 395, 16, 16, 239, 270, 456, 487, 17, 48, 333, 364,
+ 395, 426, 176, 207, 49, 80, 302, 333, 426, 457, 81, 112, 113, 144,
+ 208, 239, 271, 302, 457, 488, 365, 396, 396, 427, 145, 176, 334, 365,
+ 427, 458, 240, 271, 17, 17, 18, 49, 177, 208, 303, 334, 458, 489,
+ 50, 81, 82, 113, 272, 303, 209, 240, 397, 428, 114, 145, 366, 397,
+ 428, 459, 335, 366, 459, 490, 146, 177, 241, 272, 304, 335, 178, 209,
+ 18, 18, 19, 50, 51, 82, 398, 429, 429, 460, 367, 398, 460, 491,
+ 83, 114, 273, 304, 210, 241, 115, 146, 336, 367, 147, 178, 242, 273,
+ 305, 336, 430, 461, 399, 430, 461, 492, 179, 210, 19, 19, 368, 399,
+ 20, 51, 52, 83, 274, 305, 84, 115, 211, 242, 337, 368, 116, 147,
+ 431, 462, 462, 493, 306, 337, 148, 179, 243, 274, 400, 431, 369, 400,
+ 180, 211, 20, 20, 21, 52, 275, 306, 53, 84, 338, 369, 212, 243,
+ 85, 116, 463, 494, 432, 463, 117, 148, 401, 432, 307, 338, 244, 275,
+ 149, 180, 370, 401, 181, 212, 276, 307, 464, 495, 339, 370, 21, 21,
+ 22, 53, 433, 464, 54, 85, 213, 244, 86, 117, 402, 433, 118, 149,
+ 308, 339, 245, 276, 371, 402, 150, 181, 465, 496, 182, 213, 434, 465,
+ 340, 371, 277, 308, 22, 22, 23, 54, 403, 434, 55, 86, 214, 245,
+ 87, 118, 309, 340, 372, 403, 119, 150, 246, 277, 466, 497, 151, 182,
+ 435, 466, 341, 372, 183, 214, 278, 309, 404, 435, 23, 23, 24, 55,
+ 215, 246, 56, 87, 310, 341, 88, 119, 373, 404, 467, 498, 120, 151,
+ 247, 278, 436, 467, 152, 183, 342, 373, 279, 310, 405, 436, 184, 215,
+ 24, 24, 216, 247, 468, 499, 25, 56, 374, 405, 57, 88, 311, 342,
+ 89, 120, 437, 468, 248, 279, 121, 152, 153, 184, 343, 374, 406, 437,
+ 280, 311, 185, 216, 469, 500, 375, 406, 217, 248, 25, 25, 312, 343,
+ 26, 57, 58, 89, 438, 469, 90, 121, 249, 280, 122, 153, 344, 375,
+ 407, 438, 154, 185, 281, 312, 470, 501, 186, 217, 376, 407, 218, 249,
+ 313, 344, 439, 470, 26, 26, 27, 58, 59, 90, 91, 122, 250, 281,
+ 123, 154, 408, 439, 345, 376, 155, 186, 471, 502, 282, 313, 187, 218,
+ 377, 408, 440, 471, 314, 345, 219, 250, 27, 27, 28, 59, 60, 91,
+ 92, 123, 251, 282, 409, 440, 346, 377, 124, 155, 472, 503, 156, 187,
+ 283, 314, 188, 219, 378, 409, 441, 472, 315, 346, 220, 251, 28, 28,
+ 29, 60, 61, 92, 410, 441, 252, 283, 93, 124, 347, 378, 473, 504,
+ 125, 156, 284, 315, 157, 188, 442, 473, 379, 410, 189, 220, 316, 347,
+ 221, 252, 411, 442, 29, 29, 30, 61, 474, 505, 62, 93, 253, 284,
+ 348, 379, 94, 125, 126, 157, 285, 316, 158, 189, 443, 474, 380, 411,
+ 190, 221, 317, 348, 222, 253, 412, 443, 475, 506, 30, 30, 31, 62,
+ 349, 380, 254, 285, 63, 94, 95, 126, 127, 158, 444, 475, 286, 317,
+ 381, 412, 159, 190, 191, 222, 318, 349, 476, 507, 223, 254, 413, 444,
+ 350, 381, 255, 286, 445, 476, 287, 318, 382, 413, 319, 350, 477, 508,
+ 414, 445, 351, 382, 446, 477, 383, 414, 478, 509, 415, 446, 447, 478,
+ 479, 510, 480, 480, 481, 512, 482, 513, 483, 514, 484, 515, 485, 516,
+ 512, 512, 513, 544, 486, 517, 514, 545, 515, 546, 487, 518, 516, 547,
+ 517, 548, 488, 519, 544, 544, 518, 549, 545, 576, 546, 577, 547, 578,
+ 489, 520, 519, 550, 548, 579, 549, 580, 520, 551, 490, 521, 550, 581,
+ 576, 576, 577, 608, 578, 609, 521, 552, 579, 610, 551, 582, 491, 522,
+ 580, 611, 581, 612, 552, 583, 522, 553, 582, 613, 492, 523, 608, 608,
+ 609, 640, 610, 641, 553, 584, 611, 642, 523, 554, 583, 614, 612, 643,
+ 554, 585, 493, 524, 584, 615, 613, 644, 524, 555, 614, 645, 640, 640,
+ 585, 616, 641, 672, 555, 586, 642, 673, 615, 646, 643, 674, 494, 525,
+ 644, 675, 525, 556, 586, 617, 616, 647, 645, 676, 556, 587, 646, 677,
+ 495, 526, 617, 648, 587, 618, 672, 672, 526, 557, 673, 704, 674, 705,
+ 647, 678, 557, 588, 675, 706, 618, 649, 676, 707, 588, 619, 648, 679,
+ 677, 708, 496, 527, 527, 558, 558, 589, 678, 709, 619, 650, 649, 680,
+ 704, 704, 589, 620, 705, 736, 679, 710, 706, 737, 707, 738, 650, 681,
+ 620, 651, 497, 528, 528, 559, 708, 739, 680, 711, 559, 590, 709, 740,
+ 590, 621, 651, 682, 681, 712, 710, 741, 621, 652, 736, 736, 737, 768,
+ 529, 560, 711, 742, 498, 529, 560, 591, 738, 769, 682, 713, 652, 683,
+ 739, 770, 591, 622, 740, 771, 712, 743, 622, 653, 741, 772, 683, 714,
+ 653, 684, 713, 744, 742, 773, 530, 561, 561, 592, 499, 530, 592, 623,
+ 623, 654, 743, 774, 768, 768, 769, 800, 684, 715, 714, 745, 770, 801,
+ 771, 802, 654, 685, 744, 775, 772, 803, 562, 593, 531, 562, 593, 624,
+ 715, 746, 773, 804, 685, 716, 500, 531, 624, 655, 745, 776, 774, 805,
+ 655, 686, 716, 747, 775, 806, 746, 777, 800, 800, 801, 832, 686, 717,
+ 802, 833, 563, 594, 594, 625, 803, 834, 532, 563, 625, 656, 776, 807,
+ 804, 835, 501, 532, 656, 687, 747, 778, 717, 748, 805, 836, 777, 808,
+ 687, 718, 806, 837, 748, 779, 595, 626, 564, 595, 626, 657, 718, 749,
+ 778, 809, 807, 838, 832, 832, 533, 564, 657, 688, 833, 864, 834, 865,
+ 835, 866, 502, 533, 688, 719, 808, 839, 749, 780, 836, 867, 779, 810,
+ 719, 750, 837, 868, 809, 840, 596, 627, 627, 658, 565, 596, 658, 689,
+ 838, 869, 780, 811, 750, 781, 534, 565, 689, 720, 810, 841, 839, 870,
+ 864, 864, 503, 534, 720, 751, 865, 896, 866, 897, 840, 871, 867, 898,
+ 781, 812, 811, 842, 628, 659, 868, 899, 751, 782, 597, 628, 659, 690,
+ 566, 597, 690, 721, 869, 900, 841, 872, 535, 566, 721, 752, 812, 843,
+ 870, 901, 782, 813, 842, 873, 504, 535, 752, 783, 871, 902, 629, 660,
+ 660, 691, 896, 896, 897, 928, 598, 629, 691, 722, 813, 844, 898, 929,
+ 872, 903, 783, 814, 843, 874, 899, 930, 567, 598, 722, 753, 900, 931,
+ 536, 567, 753, 784, 873, 904, 901, 932, 814, 845, 844, 875, 902, 933,
+ 505, 536, 784, 815, 661, 692, 630, 661, 692, 723, 874, 905, 599, 630,
+ 723, 754, 903, 934, 845, 876, 568, 599, 754, 785, 928, 928, 815, 846,
+ 929, 960, 930, 961, 875, 906, 904, 935, 931, 962, 537, 568, 785, 816,
+ 932, 963, 905, 936, 662, 693, 693, 724, 846, 877, 933, 964, 876, 907,
+ 631, 662, 724, 755, 506, 537, 816, 847, 934, 965, 600, 631, 755, 786,
+ 906, 937, 569, 600, 786, 817, 935, 966, 877, 908, 847, 878, 960, 960,
+ 907, 938, 961, 992, 936, 967, 538, 569, 817, 848, 962, 993, 694, 725,
+ 663, 694, 725, 756, 963, 994, 632, 663, 756, 787, 964, 995, 878, 909,
+ 937, 968, 507, 538, 848, 879, 908, 939, 601, 632, 787, 818, 965, 996,
+ 966, 997, 570, 601, 818, 849, 938, 969, 879, 910, 909, 940, 967, 998,
+ 695, 726, 726, 757, 664, 695, 757, 788, 539, 570, 849, 880, 939, 970,
+ 633, 664, 788, 819, 968, 999, 602, 633, 819, 850, 910, 941, 508, 539,
+ 880, 911, 969, 1000, 940, 971, 571, 602, 850, 881, 727, 758, 696, 727,
+ 758, 789, 970, 1001, 665, 696, 789, 820, 911, 942, 941, 972, 540, 571,
+ 881, 912, 634, 665, 820, 851, 971, 1002, 603, 634, 851, 882, 942, 973,
+ 509, 540, 912, 943, 728, 759, 759, 790, 972, 1003, 572, 603, 882, 913,
+ 697, 728, 790, 821, 666, 697, 821, 852, 943, 974, 635, 666, 852, 883,
+ 541, 572, 913, 944, 973, 1004, 604, 635, 883, 914, 760, 791, 729, 760,
+ 791, 822, 510, 541, 944, 975, 974, 1005, 698, 729, 822, 853, 573, 604,
+ 914, 945, 667, 698, 853, 884, 636, 667, 884, 915, 975, 1006, 542, 573,
+ 945, 976, 761, 792, 792, 823, 605, 636, 915, 946, 730, 761, 823, 854,
+ 699, 730, 854, 885, 511, 542, 976, 1007, 574, 605, 946, 977, 668, 699,
+ 885, 916, 637, 668, 916, 947, 543, 574, 793, 824, 977, 1008, 762, 793,
+ 824, 855, 731, 762, 855, 886, 606, 637, 947, 978, 700, 731, 886, 917,
+ 669, 700, 917, 948, 575, 606, 978, 1009, 638, 669, 948, 979, 794, 825,
+ 825, 856, 763, 794, 856, 887, 732, 763, 887, 918, 607, 638, 979, 1010,
+ 701, 732, 918, 949, 670, 701, 949, 980, 826, 857, 795, 826, 857, 888,
+ 764, 795, 888, 919, 639, 670, 980, 1011, 733, 764, 919, 950, 702, 733,
+ 950, 981, 671, 702, 981, 1012, 827, 858, 858, 889, 796, 827, 889, 920,
+ 765, 796, 920, 951, 734, 765, 951, 982, 703, 734, 982, 1013, 859, 890,
+ 828, 859, 890, 921, 797, 828, 921, 952, 766, 797, 952, 983, 735, 766,
+ 983, 1014, 860, 891, 891, 922, 829, 860, 922, 953, 798, 829, 953, 984,
+ 767, 798, 984, 1015, 892, 923, 861, 892, 923, 954, 830, 861, 954, 985,
+ 799, 830, 985, 1016, 893, 924, 924, 955, 862, 893, 955, 986, 831, 862,
+ 986, 1017, 925, 956, 894, 925, 956, 987, 863, 894, 987, 1018, 926, 957,
+ 957, 988, 895, 926, 988, 1019, 958, 989, 927, 958, 989, 1020, 959, 990,
+ 990, 1021, 991, 1022, 0, 0
+};
+
+DECLARE_ALIGNED(16, static const int16_t,
+ qtr_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = {
+ 0, 0, 0, 0, 0, 0, 1, 32, 1, 1, 32, 32, 2, 33,
+ 33, 64, 34, 65, 2, 2, 64, 64, 3, 34, 65, 96, 35, 66,
+ 66, 97, 3, 3, 96, 96, 4, 35, 97, 128, 67, 98, 36, 67,
+ 98, 129, 4, 4, 68, 99, 99, 130, 128, 128, 5, 36, 129, 160,
+ 37, 68, 130, 161, 100, 131, 69, 100, 131, 162, 5, 5, 160, 160,
+ 6, 37, 161, 192, 38, 69, 162, 193, 101, 132, 132, 163, 70, 101,
+ 163, 194, 6, 6, 192, 192, 7, 38, 133, 164, 193, 224, 102, 133,
+ 164, 195, 39, 70, 194, 225, 71, 102, 195, 226, 134, 165, 165, 196,
+ 7, 7, 224, 224, 8, 39, 103, 134, 196, 227, 225, 256, 40, 71,
+ 226, 257, 166, 197, 72, 103, 227, 258, 135, 166, 197, 228, 104, 135,
+ 228, 259, 8, 8, 256, 256, 9, 40, 257, 288, 41, 72, 167, 198,
+ 198, 229, 258, 289, 136, 167, 229, 260, 73, 104, 259, 290, 105, 136,
+ 260, 291, 199, 230, 9, 9, 168, 199, 230, 261, 288, 288, 10, 41,
+ 289, 320, 42, 73, 290, 321, 137, 168, 261, 292, 74, 105, 291, 322,
+ 200, 231, 231, 262, 106, 137, 292, 323, 169, 200, 262, 293, 10, 10,
+ 320, 320, 11, 42, 321, 352, 43, 74, 138, 169, 293, 324, 322, 353,
+ 232, 263, 75, 106, 201, 232, 263, 294, 323, 354, 170, 201, 294, 325,
+ 107, 138, 324, 355, 11, 11, 352, 352, 12, 43, 233, 264, 264, 295,
+ 353, 384, 139, 170, 325, 356, 44, 75, 354, 385, 202, 233, 295, 326,
+ 76, 107, 355, 386, 171, 202, 326, 357, 108, 139, 356, 387, 265, 296,
+ 234, 265, 296, 327, 12, 12, 140, 171, 357, 388, 384, 384, 13, 44,
+ 203, 234, 327, 358, 385, 416, 45, 76, 386, 417, 77, 108, 387, 418,
+ 172, 203, 358, 389, 266, 297, 297, 328, 109, 140, 235, 266, 328, 359,
+ 388, 419, 204, 235, 359, 390, 141, 172, 389, 420, 13, 13, 416, 416,
+ 14, 45, 417, 448, 46, 77, 298, 329, 418, 449, 267, 298, 329, 360,
+ 78, 109, 173, 204, 390, 421, 419, 450, 236, 267, 360, 391, 110, 141,
+ 420, 451, 205, 236, 391, 422, 142, 173, 299, 330, 330, 361, 421, 452,
+ 14, 14, 268, 299, 361, 392, 448, 448, 15, 46, 449, 480, 47, 78,
+ 450, 481, 174, 205, 422, 453, 237, 268, 392, 423, 79, 110, 451, 482,
+ 111, 142, 452, 483, 331, 362, 300, 331, 362, 393, 206, 237, 423, 454,
+ 143, 174, 269, 300, 393, 424, 453, 484, 238, 269, 424, 455, 175, 206,
+ 454, 485, 332, 363, 363, 394, 301, 332, 394, 425, 207, 238, 455, 486,
+ 270, 301, 425, 456, 364, 395, 239, 270, 456, 487, 333, 364, 395, 426,
+ 302, 333, 426, 457, 271, 302, 457, 488, 365, 396, 396, 427, 334, 365,
+ 427, 458, 303, 334, 458, 489, 397, 428, 366, 397, 428, 459, 335, 366,
+ 459, 490, 398, 429, 429, 460, 367, 398, 460, 491, 430, 461, 399, 430,
+ 461, 492, 431, 462, 462, 493, 463, 494, 15, 15, 480, 480, 16, 47,
+ 481, 512, 48, 79, 482, 513, 80, 111, 483, 514, 112, 143, 484, 515,
+ 144, 175, 485, 516, 16, 16, 512, 512, 17, 48, 513, 544, 176, 207,
+ 486, 517, 49, 80, 514, 545, 81, 112, 515, 546, 113, 144, 208, 239,
+ 487, 518, 516, 547, 145, 176, 517, 548, 240, 271, 488, 519, 17, 17,
+ 544, 544, 18, 49, 177, 208, 518, 549, 545, 576, 50, 81, 546, 577,
+ 82, 113, 547, 578, 272, 303, 489, 520, 209, 240, 519, 550, 114, 145,
+ 548, 579, 146, 177, 549, 580, 241, 272, 520, 551, 304, 335, 490, 521,
+ 178, 209, 550, 581, 18, 18, 576, 576, 19, 50, 577, 608, 51, 82,
+ 578, 609, 83, 114, 273, 304, 521, 552, 579, 610, 210, 241, 551, 582,
+ 115, 146, 336, 367, 491, 522, 580, 611, 147, 178, 581, 612, 242, 273,
+ 552, 583, 305, 336, 522, 553, 179, 210, 582, 613, 19, 19, 368, 399,
+ 492, 523, 608, 608, 20, 51, 609, 640, 52, 83, 610, 641, 274, 305,
+ 553, 584, 84, 115, 611, 642, 211, 242, 337, 368, 523, 554, 583, 614,
+ 116, 147, 612, 643, 306, 337, 554, 585, 148, 179, 243, 274, 400, 431,
+ 493, 524, 584, 615, 613, 644, 369, 400, 524, 555, 180, 211, 614, 645,
+ 20, 20, 640, 640, 21, 52, 275, 306, 585, 616, 641, 672, 53, 84,
+ 338, 369, 555, 586, 642, 673, 212, 243, 615, 646, 85, 116, 643, 674,
+ 432, 463, 494, 525, 117, 148, 644, 675, 401, 432, 525, 556, 307, 338,
+ 586, 617, 244, 275, 616, 647, 149, 180, 645, 676, 370, 401, 556, 587,
+ 181, 212, 646, 677, 276, 307, 464, 495, 495, 526, 617, 648, 339, 370,
+ 587, 618, 21, 21, 672, 672, 22, 53, 433, 464, 526, 557, 673, 704,
+ 54, 85, 674, 705, 213, 244, 647, 678, 86, 117, 402, 433, 557, 588,
+ 675, 706, 118, 149, 308, 339, 618, 649, 676, 707, 245, 276, 371, 402,
+ 588, 619, 648, 679, 150, 181, 677, 708, 496, 527, 465, 496, 527, 558,
+ 182, 213, 434, 465, 558, 589, 678, 709, 340, 371, 619, 650, 277, 308,
+ 649, 680, 22, 22, 704, 704, 23, 54, 403, 434, 589, 620, 705, 736,
+ 55, 86, 214, 245, 679, 710, 706, 737, 87, 118, 707, 738, 309, 340,
+ 650, 681, 372, 403, 620, 651, 119, 150, 497, 528, 528, 559, 708, 739,
+ 246, 277, 680, 711, 466, 497, 559, 590, 151, 182, 709, 740, 435, 466,
+ 590, 621, 341, 372, 651, 682, 183, 214, 278, 309, 681, 712, 710, 741,
+ 404, 435, 621, 652, 23, 23, 736, 736, 24, 55, 737, 768, 215, 246,
+ 529, 560, 711, 742, 56, 87, 498, 529, 560, 591, 738, 769, 310, 341,
+ 682, 713, 88, 119, 373, 404, 652, 683, 739, 770, 467, 498, 591, 622,
+ 120, 151, 740, 771, 247, 278, 712, 743, 436, 467, 622, 653, 152, 183,
+ 741, 772, 342, 373, 683, 714, 279, 310, 405, 436, 653, 684, 713, 744,
+ 184, 215, 742, 773, 530, 561, 561, 592, 499, 530, 592, 623, 24, 24,
+ 216, 247, 468, 499, 623, 654, 743, 774, 768, 768, 25, 56, 769, 800,
+ 374, 405, 684, 715, 57, 88, 311, 342, 714, 745, 770, 801, 89, 120,
+ 771, 802, 437, 468, 654, 685, 248, 279, 744, 775, 121, 152, 772, 803,
+ 562, 593, 153, 184, 343, 374, 531, 562, 593, 624, 715, 746, 773, 804,
+ 406, 437, 685, 716, 500, 531, 624, 655, 280, 311, 745, 776, 185, 216,
+ 774, 805, 469, 500, 655, 686, 375, 406, 716, 747, 217, 248, 775, 806,
+ 25, 25, 312, 343, 746, 777, 800, 800, 26, 57, 801, 832, 58, 89,
+ 438, 469, 686, 717, 802, 833, 90, 121, 563, 594, 594, 625, 803, 834,
+ 249, 280, 532, 563, 625, 656, 776, 807, 122, 153, 804, 835, 344, 375,
+ 501, 532, 656, 687, 747, 778, 407, 438, 717, 748, 154, 185, 805, 836,
+ 281, 312, 777, 808, 470, 501, 687, 718, 186, 217, 806, 837, 376, 407,
+ 748, 779, 595, 626, 564, 595, 626, 657, 218, 249, 313, 344, 439, 470,
+ 718, 749, 778, 809, 807, 838, 26, 26, 832, 832, 27, 58, 533, 564,
+ 657, 688, 833, 864, 59, 90, 834, 865, 91, 122, 835, 866, 250, 281,
+ 502, 533, 688, 719, 808, 839, 123, 154, 408, 439, 749, 780, 836, 867,
+ 345, 376, 779, 810, 155, 186, 471, 502, 719, 750, 837, 868, 282, 313,
+ 809, 840, 596, 627, 627, 658, 187, 218, 565, 596, 658, 689, 838, 869,
+ 377, 408, 780, 811, 440, 471, 750, 781, 534, 565, 689, 720, 314, 345,
+ 810, 841, 219, 250, 839, 870, 27, 27, 864, 864, 28, 59, 503, 534,
+ 720, 751, 865, 896, 60, 91, 866, 897, 92, 123, 251, 282, 840, 871,
+ 867, 898, 409, 440, 781, 812, 346, 377, 811, 842, 124, 155, 628, 659,
+ 868, 899, 472, 503, 751, 782, 597, 628, 659, 690, 566, 597, 690, 721,
+ 156, 187, 869, 900, 283, 314, 841, 872, 535, 566, 721, 752, 188, 219,
+ 378, 409, 812, 843, 870, 901, 441, 472, 782, 813, 315, 346, 842, 873,
+ 504, 535, 752, 783, 220, 251, 871, 902, 28, 28, 629, 660, 660, 691,
+ 896, 896, 29, 60, 897, 928, 61, 92, 410, 441, 598, 629, 691, 722,
+ 813, 844, 898, 929, 252, 283, 872, 903, 93, 124, 347, 378, 473, 504,
+ 783, 814, 843, 874, 899, 930, 567, 598, 722, 753, 125, 156, 900, 931,
+ 284, 315, 536, 567, 753, 784, 873, 904, 157, 188, 901, 932, 442, 473,
+ 814, 845, 379, 410, 844, 875, 189, 220, 902, 933, 505, 536, 784, 815,
+ 661, 692, 316, 347, 630, 661, 692, 723, 874, 905, 221, 252, 599, 630,
+ 723, 754, 903, 934, 411, 442, 845, 876, 29, 29, 568, 599, 754, 785,
+ 928, 928, 30, 61, 474, 505, 815, 846, 929, 960, 62, 93, 930, 961,
+ 253, 284, 348, 379, 875, 906, 904, 935, 94, 125, 931, 962, 537, 568,
+ 785, 816, 126, 157, 932, 963, 285, 316, 905, 936, 158, 189, 443, 474,
+ 662, 693, 693, 724, 846, 877, 933, 964, 380, 411, 876, 907, 631, 662,
+ 724, 755, 506, 537, 816, 847, 190, 221, 934, 965, 600, 631, 755, 786,
+ 317, 348, 906, 937, 222, 253, 569, 600, 786, 817, 935, 966, 412, 443,
+ 877, 908, 475, 506, 847, 878, 30, 30, 960, 960, 31, 62, 349, 380,
+ 907, 938, 961, 992, 254, 285, 936, 967, 63, 94, 538, 569, 817, 848,
+ 962, 993, 694, 725, 95, 126, 663, 694, 725, 756, 963, 994, 632, 663,
+ 756, 787, 127, 158, 964, 995, 444, 475, 878, 909, 286, 317, 937, 968,
+ 381, 412, 507, 538, 848, 879, 908, 939, 159, 190, 601, 632, 787, 818,
+ 965, 996, 191, 222, 966, 997, 318, 349, 570, 601, 818, 849, 938, 969,
+ 476, 507, 879, 910, 223, 254, 413, 444, 909, 940, 967, 998, 695, 726,
+ 726, 757, 664, 695, 757, 788, 539, 570, 849, 880, 350, 381, 939, 970,
+ 255, 286, 633, 664, 788, 819, 968, 999, 445, 476, 602, 633, 819, 850,
+ 910, 941, 508, 539, 880, 911, 287, 318, 969, 1000, 382, 413, 940, 971,
+ 571, 602, 850, 881, 727, 758, 696, 727, 758, 789, 319, 350, 970, 1001,
+ 477, 508, 665, 696, 789, 820, 911, 942, 414, 445, 941, 972, 540, 571,
+ 881, 912, 634, 665, 820, 851, 351, 382, 971, 1002, 603, 634, 851, 882,
+ 446, 477, 942, 973, 509, 540, 912, 943, 383, 414, 728, 759, 759, 790,
+ 972, 1003, 572, 603, 882, 913, 697, 728, 790, 821, 666, 697, 821, 852,
+ 478, 509, 943, 974, 635, 666, 852, 883, 415, 446, 541, 572, 913, 944,
+ 973, 1004, 604, 635, 883, 914, 760, 791, 729, 760, 791, 822, 510, 541,
+ 944, 975, 447, 478, 974, 1005, 698, 729, 822, 853, 573, 604, 914, 945,
+ 667, 698, 853, 884, 636, 667, 884, 915, 479, 510, 975, 1006, 542, 573,
+ 945, 976, 761, 792, 792, 823, 605, 636, 915, 946, 730, 761, 823, 854,
+ 699, 730, 854, 885, 511, 542, 976, 1007, 574, 605, 946, 977, 668, 699,
+ 885, 916, 637, 668, 916, 947, 543, 574, 793, 824, 977, 1008, 762, 793,
+ 824, 855, 731, 762, 855, 886, 606, 637, 947, 978, 700, 731, 886, 917,
+ 669, 700, 917, 948, 575, 606, 978, 1009, 638, 669, 948, 979, 794, 825,
+ 825, 856, 763, 794, 856, 887, 732, 763, 887, 918, 607, 638, 979, 1010,
+ 701, 732, 918, 949, 670, 701, 949, 980, 826, 857, 795, 826, 857, 888,
+ 764, 795, 888, 919, 639, 670, 980, 1011, 733, 764, 919, 950, 702, 733,
+ 950, 981, 671, 702, 981, 1012, 827, 858, 858, 889, 796, 827, 889, 920,
+ 765, 796, 920, 951, 734, 765, 951, 982, 703, 734, 982, 1013, 859, 890,
+ 828, 859, 890, 921, 797, 828, 921, 952, 766, 797, 952, 983, 735, 766,
+ 983, 1014, 860, 891, 891, 922, 829, 860, 922, 953, 798, 829, 953, 984,
+ 767, 798, 984, 1015, 892, 923, 861, 892, 923, 954, 830, 861, 954, 985,
+ 799, 830, 985, 1016, 893, 924, 924, 955, 862, 893, 955, 986, 831, 862,
+ 986, 1017, 925, 956, 894, 925, 956, 987, 863, 894, 987, 1018, 926, 957,
+ 957, 988, 895, 926, 988, 1019, 958, 989, 927, 958, 989, 1020, 959, 990,
+ 990, 1021, 991, 1022, 0, 0
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_4x4[16]) = {
+ 0, 2, 5, 8, 1, 3, 9, 12, 4, 7, 11, 14, 6, 10, 13, 15,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_4x4[16]) = {
+ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_4x4[16]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_col_iscan_4x4[16]) = {
+ 0, 3, 7, 11, 1, 5, 9, 12, 2, 6, 10, 14, 4, 8, 13, 15,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_row_iscan_4x4[16]) = {
+ 0, 1, 3, 5, 2, 4, 6, 9, 7, 8, 11, 13, 10, 12, 14, 15,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_4x8[32]) = {
+ 0, 1, 4, 9, 2, 3, 6, 11, 5, 7, 8, 13, 10, 12, 14, 17,
+ 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_4x8[32]) = {
+ 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27,
+ 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_4x8[32]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_8x4[32]) = {
+ 0, 1, 4, 9, 15, 19, 24, 28, 2, 3, 6, 11, 16, 21, 25, 29,
+ 5, 7, 8, 13, 18, 22, 26, 30, 10, 12, 14, 17, 20, 23, 27, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_8x4[32]) = {
+ 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29,
+ 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_8x4[32]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+};
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_8x8[64]) = {
+ 0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57,
+ 2, 10, 18, 26, 34, 42, 50, 58, 3, 11, 19, 27, 35, 43, 51, 59,
+ 4, 12, 20, 28, 36, 44, 52, 60, 5, 13, 21, 29, 37, 45, 53, 61,
+ 6, 14, 22, 30, 38, 46, 54, 62, 7, 15, 23, 31, 39, 47, 55, 63,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_8x8[64]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_col_iscan_8x8[64]) = {
+ 0, 3, 8, 15, 22, 32, 40, 47, 1, 5, 11, 18, 26, 34, 44, 51,
+ 2, 7, 13, 20, 28, 38, 46, 54, 4, 10, 16, 24, 31, 41, 50, 56,
+ 6, 12, 21, 27, 35, 43, 52, 58, 9, 17, 25, 33, 39, 48, 55, 60,
+ 14, 23, 30, 37, 45, 53, 59, 62, 19, 29, 36, 42, 49, 57, 61, 63,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_row_iscan_8x8[64]) = {
+ 0, 1, 2, 5, 8, 12, 19, 24, 3, 4, 7, 10, 15, 20, 30, 39,
+ 6, 9, 13, 16, 21, 27, 37, 46, 11, 14, 17, 23, 28, 34, 44, 52,
+ 18, 22, 25, 31, 35, 41, 50, 57, 26, 29, 33, 38, 43, 49, 55, 59,
+ 32, 36, 42, 47, 51, 54, 60, 61, 40, 45, 48, 53, 56, 58, 62, 63,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_8x8[64]) = {
+ 0, 2, 5, 9, 14, 22, 31, 37, 1, 4, 8, 13, 19, 26, 38, 44,
+ 3, 6, 10, 17, 24, 30, 42, 49, 7, 11, 15, 21, 29, 36, 47, 53,
+ 12, 16, 20, 27, 34, 43, 52, 57, 18, 23, 28, 35, 41, 48, 56, 60,
+ 25, 32, 39, 45, 50, 55, 59, 62, 33, 40, 46, 51, 54, 58, 61, 63,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_8x16[128]) = {
+ 0, 1, 3, 6, 10, 15, 21, 28, 2, 4, 7, 11, 16, 22, 29, 36,
+ 5, 8, 12, 17, 23, 30, 37, 44, 9, 13, 18, 24, 31, 38, 45, 52,
+ 14, 19, 25, 32, 39, 46, 53, 60, 20, 26, 33, 40, 47, 54, 61, 68,
+ 27, 34, 41, 48, 55, 62, 69, 76, 35, 42, 49, 56, 63, 70, 77, 84,
+ 43, 50, 57, 64, 71, 78, 85, 92, 51, 58, 65, 72, 79, 86, 93, 100,
+ 59, 66, 73, 80, 87, 94, 101, 107, 67, 74, 81, 88, 95, 102, 108, 113,
+ 75, 82, 89, 96, 103, 109, 114, 118, 83, 90, 97, 104, 110, 115, 119, 122,
+ 91, 98, 105, 111, 116, 120, 123, 125, 99, 106, 112, 117, 121, 124, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_16x8[128]) = {
+ 0, 1, 3, 6, 10, 15, 21, 28, 36, 44, 52, 60, 68, 76, 84, 92,
+ 2, 4, 7, 11, 16, 22, 29, 37, 45, 53, 61, 69, 77, 85, 93, 100,
+ 5, 8, 12, 17, 23, 30, 38, 46, 54, 62, 70, 78, 86, 94, 101, 107,
+ 9, 13, 18, 24, 31, 39, 47, 55, 63, 71, 79, 87, 95, 102, 108, 113,
+ 14, 19, 25, 32, 40, 48, 56, 64, 72, 80, 88, 96, 103, 109, 114, 118,
+ 20, 26, 33, 41, 49, 57, 65, 73, 81, 89, 97, 104, 110, 115, 119, 122,
+ 27, 34, 42, 50, 58, 66, 74, 82, 90, 98, 105, 111, 116, 120, 123, 125,
+ 35, 43, 51, 59, 67, 75, 83, 91, 99, 106, 112, 117, 121, 124, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_8x16[128]) = {
+ 0, 16, 32, 48, 64, 80, 96, 112, 1, 17, 33, 49, 65, 81, 97, 113,
+ 2, 18, 34, 50, 66, 82, 98, 114, 3, 19, 35, 51, 67, 83, 99, 115,
+ 4, 20, 36, 52, 68, 84, 100, 116, 5, 21, 37, 53, 69, 85, 101, 117,
+ 6, 22, 38, 54, 70, 86, 102, 118, 7, 23, 39, 55, 71, 87, 103, 119,
+ 8, 24, 40, 56, 72, 88, 104, 120, 9, 25, 41, 57, 73, 89, 105, 121,
+ 10, 26, 42, 58, 74, 90, 106, 122, 11, 27, 43, 59, 75, 91, 107, 123,
+ 12, 28, 44, 60, 76, 92, 108, 124, 13, 29, 45, 61, 77, 93, 109, 125,
+ 14, 30, 46, 62, 78, 94, 110, 126, 15, 31, 47, 63, 79, 95, 111, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_16x8[128]) = {
+ 0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120,
+ 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97, 105, 113, 121,
+ 2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122,
+ 3, 11, 19, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99, 107, 115, 123,
+ 4, 12, 20, 28, 36, 44, 52, 60, 68, 76, 84, 92, 100, 108, 116, 124,
+ 5, 13, 21, 29, 37, 45, 53, 61, 69, 77, 85, 93, 101, 109, 117, 125,
+ 6, 14, 22, 30, 38, 46, 54, 62, 70, 78, 86, 94, 102, 110, 118, 126,
+ 7, 15, 23, 31, 39, 47, 55, 63, 71, 79, 87, 95, 103, 111, 119, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_8x16[128]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_16x8[128]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_16x32[512]) = {
+ 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105,
+ 120, 2, 4, 7, 11, 16, 22, 29, 37, 46, 56, 67, 79, 92, 106,
+ 121, 136, 5, 8, 12, 17, 23, 30, 38, 47, 57, 68, 80, 93, 107,
+ 122, 137, 152, 9, 13, 18, 24, 31, 39, 48, 58, 69, 81, 94, 108,
+ 123, 138, 153, 168, 14, 19, 25, 32, 40, 49, 59, 70, 82, 95, 109,
+ 124, 139, 154, 169, 184, 20, 26, 33, 41, 50, 60, 71, 83, 96, 110,
+ 125, 140, 155, 170, 185, 200, 27, 34, 42, 51, 61, 72, 84, 97, 111,
+ 126, 141, 156, 171, 186, 201, 216, 35, 43, 52, 62, 73, 85, 98, 112,
+ 127, 142, 157, 172, 187, 202, 217, 232, 44, 53, 63, 74, 86, 99, 113,
+ 128, 143, 158, 173, 188, 203, 218, 233, 248, 54, 64, 75, 87, 100, 114,
+ 129, 144, 159, 174, 189, 204, 219, 234, 249, 264, 65, 76, 88, 101, 115,
+ 130, 145, 160, 175, 190, 205, 220, 235, 250, 265, 280, 77, 89, 102, 116,
+ 131, 146, 161, 176, 191, 206, 221, 236, 251, 266, 281, 296, 90, 103, 117,
+ 132, 147, 162, 177, 192, 207, 222, 237, 252, 267, 282, 297, 312, 104, 118,
+ 133, 148, 163, 178, 193, 208, 223, 238, 253, 268, 283, 298, 313, 328, 119,
+ 134, 149, 164, 179, 194, 209, 224, 239, 254, 269, 284, 299, 314, 329, 344,
+ 135, 150, 165, 180, 195, 210, 225, 240, 255, 270, 285, 300, 315, 330, 345,
+ 360, 151, 166, 181, 196, 211, 226, 241, 256, 271, 286, 301, 316, 331, 346,
+ 361, 376, 167, 182, 197, 212, 227, 242, 257, 272, 287, 302, 317, 332, 347,
+ 362, 377, 392, 183, 198, 213, 228, 243, 258, 273, 288, 303, 318, 333, 348,
+ 363, 378, 393, 407, 199, 214, 229, 244, 259, 274, 289, 304, 319, 334, 349,
+ 364, 379, 394, 408, 421, 215, 230, 245, 260, 275, 290, 305, 320, 335, 350,
+ 365, 380, 395, 409, 422, 434, 231, 246, 261, 276, 291, 306, 321, 336, 351,
+ 366, 381, 396, 410, 423, 435, 446, 247, 262, 277, 292, 307, 322, 337, 352,
+ 367, 382, 397, 411, 424, 436, 447, 457, 263, 278, 293, 308, 323, 338, 353,
+ 368, 383, 398, 412, 425, 437, 448, 458, 467, 279, 294, 309, 324, 339, 354,
+ 369, 384, 399, 413, 426, 438, 449, 459, 468, 476, 295, 310, 325, 340, 355,
+ 370, 385, 400, 414, 427, 439, 450, 460, 469, 477, 484, 311, 326, 341, 356,
+ 371, 386, 401, 415, 428, 440, 451, 461, 470, 478, 485, 491, 327, 342, 357,
+ 372, 387, 402, 416, 429, 441, 452, 462, 471, 479, 486, 492, 497, 343, 358,
+ 373, 388, 403, 417, 430, 442, 453, 463, 472, 480, 487, 493, 498, 502, 359,
+ 374, 389, 404, 418, 431, 443, 454, 464, 473, 481, 488, 494, 499, 503, 506,
+ 375, 390, 405, 419, 432, 444, 455, 465, 474, 482, 489, 495, 500, 504, 507,
+ 509, 391, 406, 420, 433, 445, 456, 466, 475, 483, 490, 496, 501, 505, 508,
+ 510, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_32x16[512]) = {
+ 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105,
+ 120, 136, 152, 168, 184, 200, 216, 232, 248, 264, 280, 296, 312, 328, 344,
+ 360, 376, 2, 4, 7, 11, 16, 22, 29, 37, 46, 56, 67, 79, 92,
+ 106, 121, 137, 153, 169, 185, 201, 217, 233, 249, 265, 281, 297, 313, 329,
+ 345, 361, 377, 392, 5, 8, 12, 17, 23, 30, 38, 47, 57, 68, 80,
+ 93, 107, 122, 138, 154, 170, 186, 202, 218, 234, 250, 266, 282, 298, 314,
+ 330, 346, 362, 378, 393, 407, 9, 13, 18, 24, 31, 39, 48, 58, 69,
+ 81, 94, 108, 123, 139, 155, 171, 187, 203, 219, 235, 251, 267, 283, 299,
+ 315, 331, 347, 363, 379, 394, 408, 421, 14, 19, 25, 32, 40, 49, 59,
+ 70, 82, 95, 109, 124, 140, 156, 172, 188, 204, 220, 236, 252, 268, 284,
+ 300, 316, 332, 348, 364, 380, 395, 409, 422, 434, 20, 26, 33, 41, 50,
+ 60, 71, 83, 96, 110, 125, 141, 157, 173, 189, 205, 221, 237, 253, 269,
+ 285, 301, 317, 333, 349, 365, 381, 396, 410, 423, 435, 446, 27, 34, 42,
+ 51, 61, 72, 84, 97, 111, 126, 142, 158, 174, 190, 206, 222, 238, 254,
+ 270, 286, 302, 318, 334, 350, 366, 382, 397, 411, 424, 436, 447, 457, 35,
+ 43, 52, 62, 73, 85, 98, 112, 127, 143, 159, 175, 191, 207, 223, 239,
+ 255, 271, 287, 303, 319, 335, 351, 367, 383, 398, 412, 425, 437, 448, 458,
+ 467, 44, 53, 63, 74, 86, 99, 113, 128, 144, 160, 176, 192, 208, 224,
+ 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 399, 413, 426, 438, 449,
+ 459, 468, 476, 54, 64, 75, 87, 100, 114, 129, 145, 161, 177, 193, 209,
+ 225, 241, 257, 273, 289, 305, 321, 337, 353, 369, 385, 400, 414, 427, 439,
+ 450, 460, 469, 477, 484, 65, 76, 88, 101, 115, 130, 146, 162, 178, 194,
+ 210, 226, 242, 258, 274, 290, 306, 322, 338, 354, 370, 386, 401, 415, 428,
+ 440, 451, 461, 470, 478, 485, 491, 77, 89, 102, 116, 131, 147, 163, 179,
+ 195, 211, 227, 243, 259, 275, 291, 307, 323, 339, 355, 371, 387, 402, 416,
+ 429, 441, 452, 462, 471, 479, 486, 492, 497, 90, 103, 117, 132, 148, 164,
+ 180, 196, 212, 228, 244, 260, 276, 292, 308, 324, 340, 356, 372, 388, 403,
+ 417, 430, 442, 453, 463, 472, 480, 487, 493, 498, 502, 104, 118, 133, 149,
+ 165, 181, 197, 213, 229, 245, 261, 277, 293, 309, 325, 341, 357, 373, 389,
+ 404, 418, 431, 443, 454, 464, 473, 481, 488, 494, 499, 503, 506, 119, 134,
+ 150, 166, 182, 198, 214, 230, 246, 262, 278, 294, 310, 326, 342, 358, 374,
+ 390, 405, 419, 432, 444, 455, 465, 474, 482, 489, 495, 500, 504, 507, 509,
+ 135, 151, 167, 183, 199, 215, 231, 247, 263, 279, 295, 311, 327, 343, 359,
+ 375, 391, 406, 420, 433, 445, 456, 466, 475, 483, 490, 496, 501, 505, 508,
+ 510, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_16x32[512]) = {
+ 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 480,
+ 1, 33, 65, 97, 129, 161, 193, 225, 257, 289, 321, 353, 385, 417, 449, 481,
+ 2, 34, 66, 98, 130, 162, 194, 226, 258, 290, 322, 354, 386, 418, 450, 482,
+ 3, 35, 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483,
+ 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356, 388, 420, 452, 484,
+ 5, 37, 69, 101, 133, 165, 197, 229, 261, 293, 325, 357, 389, 421, 453, 485,
+ 6, 38, 70, 102, 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486,
+ 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423, 455, 487,
+ 8, 40, 72, 104, 136, 168, 200, 232, 264, 296, 328, 360, 392, 424, 456, 488,
+ 9, 41, 73, 105, 137, 169, 201, 233, 265, 297, 329, 361, 393, 425, 457, 489,
+ 10, 42, 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490,
+ 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363, 395, 427, 459, 491,
+ 12, 44, 76, 108, 140, 172, 204, 236, 268, 300, 332, 364, 396, 428, 460, 492,
+ 13, 45, 77, 109, 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493,
+ 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430, 462, 494,
+ 15, 47, 79, 111, 143, 175, 207, 239, 271, 303, 335, 367, 399, 431, 463, 495,
+ 16, 48, 80, 112, 144, 176, 208, 240, 272, 304, 336, 368, 400, 432, 464, 496,
+ 17, 49, 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497,
+ 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370, 402, 434, 466, 498,
+ 19, 51, 83, 115, 147, 179, 211, 243, 275, 307, 339, 371, 403, 435, 467, 499,
+ 20, 52, 84, 116, 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500,
+ 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437, 469, 501,
+ 22, 54, 86, 118, 150, 182, 214, 246, 278, 310, 342, 374, 406, 438, 470, 502,
+ 23, 55, 87, 119, 151, 183, 215, 247, 279, 311, 343, 375, 407, 439, 471, 503,
+ 24, 56, 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504,
+ 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377, 409, 441, 473, 505,
+ 26, 58, 90, 122, 154, 186, 218, 250, 282, 314, 346, 378, 410, 442, 474, 506,
+ 27, 59, 91, 123, 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507,
+ 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444, 476, 508,
+ 29, 61, 93, 125, 157, 189, 221, 253, 285, 317, 349, 381, 413, 445, 477, 509,
+ 30, 62, 94, 126, 158, 190, 222, 254, 286, 318, 350, 382, 414, 446, 478, 510,
+ 31, 63, 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_32x16[512]) = {
+ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224,
+ 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464,
+ 480, 496, 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193,
+ 209, 225, 241, 257, 273, 289, 305, 321, 337, 353, 369, 385, 401, 417, 433,
+ 449, 465, 481, 497, 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162,
+ 178, 194, 210, 226, 242, 258, 274, 290, 306, 322, 338, 354, 370, 386, 402,
+ 418, 434, 450, 466, 482, 498, 3, 19, 35, 51, 67, 83, 99, 115, 131,
+ 147, 163, 179, 195, 211, 227, 243, 259, 275, 291, 307, 323, 339, 355, 371,
+ 387, 403, 419, 435, 451, 467, 483, 499, 4, 20, 36, 52, 68, 84, 100,
+ 116, 132, 148, 164, 180, 196, 212, 228, 244, 260, 276, 292, 308, 324, 340,
+ 356, 372, 388, 404, 420, 436, 452, 468, 484, 500, 5, 21, 37, 53, 69,
+ 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245, 261, 277, 293, 309,
+ 325, 341, 357, 373, 389, 405, 421, 437, 453, 469, 485, 501, 6, 22, 38,
+ 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246, 262, 278,
+ 294, 310, 326, 342, 358, 374, 390, 406, 422, 438, 454, 470, 486, 502, 7,
+ 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
+ 263, 279, 295, 311, 327, 343, 359, 375, 391, 407, 423, 439, 455, 471, 487,
+ 503, 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216,
+ 232, 248, 264, 280, 296, 312, 328, 344, 360, 376, 392, 408, 424, 440, 456,
+ 472, 488, 504, 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185,
+ 201, 217, 233, 249, 265, 281, 297, 313, 329, 345, 361, 377, 393, 409, 425,
+ 441, 457, 473, 489, 505, 10, 26, 42, 58, 74, 90, 106, 122, 138, 154,
+ 170, 186, 202, 218, 234, 250, 266, 282, 298, 314, 330, 346, 362, 378, 394,
+ 410, 426, 442, 458, 474, 490, 506, 11, 27, 43, 59, 75, 91, 107, 123,
+ 139, 155, 171, 187, 203, 219, 235, 251, 267, 283, 299, 315, 331, 347, 363,
+ 379, 395, 411, 427, 443, 459, 475, 491, 507, 12, 28, 44, 60, 76, 92,
+ 108, 124, 140, 156, 172, 188, 204, 220, 236, 252, 268, 284, 300, 316, 332,
+ 348, 364, 380, 396, 412, 428, 444, 460, 476, 492, 508, 13, 29, 45, 61,
+ 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253, 269, 285, 301,
+ 317, 333, 349, 365, 381, 397, 413, 429, 445, 461, 477, 493, 509, 14, 30,
+ 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254, 270,
+ 286, 302, 318, 334, 350, 366, 382, 398, 414, 430, 446, 462, 478, 494, 510,
+ 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239,
+ 255, 271, 287, 303, 319, 335, 351, 367, 383, 399, 415, 431, 447, 463, 479,
+ 495, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_16x32[512]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+ 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+ 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269,
+ 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
+ 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
+ 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
+ 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329,
+ 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
+ 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359,
+ 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374,
+ 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
+ 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
+ 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,
+ 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
+ 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449,
+ 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464,
+ 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479,
+ 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
+ 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,
+ 510, 511,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_32x16[512]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+ 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+ 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269,
+ 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
+ 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299,
+ 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
+ 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329,
+ 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
+ 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359,
+ 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374,
+ 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
+ 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
+ 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419,
+ 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
+ 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449,
+ 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464,
+ 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479,
+ 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
+ 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509,
+ 510, 511,
+};
+
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_16x16[256]) = {
+ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
+ 1, 17, 33, 49, 65, 81, 97, 113, 129, 145, 161, 177, 193, 209, 225, 241,
+ 2, 18, 34, 50, 66, 82, 98, 114, 130, 146, 162, 178, 194, 210, 226, 242,
+ 3, 19, 35, 51, 67, 83, 99, 115, 131, 147, 163, 179, 195, 211, 227, 243,
+ 4, 20, 36, 52, 68, 84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244,
+ 5, 21, 37, 53, 69, 85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245,
+ 6, 22, 38, 54, 70, 86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246,
+ 7, 23, 39, 55, 71, 87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
+ 8, 24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248,
+ 9, 25, 41, 57, 73, 89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249,
+ 10, 26, 42, 58, 74, 90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250,
+ 11, 27, 43, 59, 75, 91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251,
+ 12, 28, 44, 60, 76, 92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252,
+ 13, 29, 45, 61, 77, 93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253,
+ 14, 30, 46, 62, 78, 94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254,
+ 15, 31, 47, 63, 79, 95, 111, 127, 143, 159, 175, 191, 207, 223, 239, 255,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_16x16[256]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+ 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+ 255,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_col_iscan_16x16[256]) = {
+ 0, 4, 11, 20, 31, 43, 59, 75, 85, 109, 130, 150, 165, 181, 195, 198,
+ 1, 6, 14, 23, 34, 47, 64, 81, 95, 114, 135, 153, 171, 188, 201, 212,
+ 2, 8, 16, 25, 38, 52, 67, 83, 101, 116, 136, 157, 172, 190, 205, 216,
+ 3, 10, 18, 29, 41, 55, 71, 89, 103, 119, 141, 159, 176, 194, 208, 218,
+ 5, 12, 21, 32, 45, 58, 74, 93, 104, 123, 144, 164, 179, 196, 210, 223,
+ 7, 15, 26, 37, 49, 63, 78, 96, 112, 129, 146, 166, 182, 200, 215, 228,
+ 9, 19, 28, 39, 54, 69, 86, 102, 117, 132, 151, 170, 187, 206, 220, 230,
+ 13, 24, 35, 46, 60, 73, 91, 108, 122, 137, 154, 174, 189, 207, 224, 235,
+ 17, 30, 40, 53, 66, 82, 98, 115, 126, 142, 161, 180, 197, 213, 227, 237,
+ 22, 36, 48, 62, 76, 92, 105, 120, 133, 147, 167, 186, 203, 219, 232, 240,
+ 27, 44, 56, 70, 84, 99, 113, 127, 140, 156, 175, 193, 209, 226, 236, 244,
+ 33, 51, 68, 79, 94, 110, 125, 138, 149, 162, 184, 202, 217, 229, 241, 247,
+ 42, 61, 77, 90, 106, 121, 134, 148, 160, 173, 191, 211, 225, 238, 245, 251,
+ 50, 72, 87, 100, 118, 128, 145, 158, 168, 183, 204, 222, 233, 242, 249, 253,
+ 57, 80, 97, 111, 131, 143, 155, 169, 178, 192, 214, 231, 239, 246, 250, 254,
+ 65, 88, 107, 124, 139, 152, 163, 177, 185, 199, 221, 234, 243, 248, 252, 255,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_row_iscan_16x16[256]) = {
+ 0, 1, 2, 4, 6, 9, 12, 17, 22, 29, 36, 43, 54, 64, 76,
+ 86, 3, 5, 7, 11, 15, 19, 25, 32, 38, 48, 59, 68, 84, 99,
+ 115, 130, 8, 10, 13, 18, 23, 27, 33, 42, 51, 60, 72, 88, 103,
+ 119, 142, 167, 14, 16, 20, 26, 31, 37, 44, 53, 61, 73, 85, 100,
+ 116, 135, 161, 185, 21, 24, 30, 35, 40, 47, 55, 65, 74, 81, 94,
+ 112, 133, 154, 179, 205, 28, 34, 39, 45, 50, 58, 67, 77, 87, 96,
+ 106, 121, 146, 169, 196, 212, 41, 46, 49, 56, 63, 70, 79, 90, 98,
+ 107, 122, 138, 159, 182, 207, 222, 52, 57, 62, 69, 75, 83, 93, 102,
+ 110, 120, 134, 150, 176, 195, 215, 226, 66, 71, 78, 82, 91, 97, 108,
+ 113, 127, 136, 148, 168, 188, 202, 221, 232, 80, 89, 92, 101, 105, 114,
+ 125, 131, 139, 151, 162, 177, 192, 208, 223, 234, 95, 104, 109, 117, 123,
+ 128, 143, 144, 155, 165, 175, 190, 206, 219, 233, 239, 111, 118, 124, 129,
+ 140, 147, 157, 164, 170, 181, 191, 203, 224, 230, 240, 243, 126, 132, 137,
+ 145, 153, 160, 174, 178, 184, 197, 204, 216, 231, 237, 244, 246, 141, 149,
+ 156, 166, 172, 180, 189, 199, 200, 210, 220, 228, 238, 242, 249, 251, 152,
+ 163, 171, 183, 186, 193, 201, 211, 214, 218, 227, 236, 245, 247, 252, 253,
+ 158, 173, 187, 194, 198, 209, 213, 217, 225, 229, 235, 241, 248, 250, 254,
+ 255,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_16x16[256]) = {
+ 0, 2, 5, 9, 17, 24, 36, 44, 55, 72, 88, 104, 128, 143, 166,
+ 179, 1, 4, 8, 13, 20, 30, 40, 54, 66, 79, 96, 113, 141, 154,
+ 178, 196, 3, 7, 11, 18, 25, 33, 46, 57, 71, 86, 101, 119, 148,
+ 164, 186, 201, 6, 12, 16, 23, 31, 39, 53, 64, 78, 92, 110, 127,
+ 153, 169, 193, 208, 10, 14, 19, 28, 37, 47, 58, 67, 84, 98, 114,
+ 133, 161, 176, 198, 214, 15, 21, 26, 34, 43, 52, 65, 77, 91, 106,
+ 120, 140, 165, 185, 205, 221, 22, 27, 32, 41, 48, 60, 73, 85, 99,
+ 116, 130, 151, 175, 190, 211, 225, 29, 35, 42, 49, 59, 69, 81, 95,
+ 108, 125, 139, 155, 182, 197, 217, 229, 38, 45, 51, 61, 68, 80, 93,
+ 105, 118, 134, 150, 168, 191, 207, 223, 234, 50, 56, 63, 74, 83, 94,
+ 109, 117, 129, 147, 163, 177, 199, 213, 228, 238, 62, 70, 76, 87, 97,
+ 107, 122, 131, 145, 159, 172, 188, 210, 222, 235, 242, 75, 82, 90, 102,
+ 112, 124, 138, 146, 157, 173, 187, 202, 219, 230, 240, 245, 89, 100, 111,
+ 123, 132, 142, 156, 167, 180, 189, 203, 216, 231, 237, 246, 250, 103, 115,
+ 126, 136, 149, 162, 171, 183, 194, 204, 215, 224, 236, 241, 248, 252, 121,
+ 135, 144, 158, 170, 181, 192, 200, 209, 218, 227, 233, 243, 244, 251, 254,
+ 137, 152, 160, 174, 184, 195, 206, 212, 220, 226, 232, 239, 247, 249, 253,
+ 255,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_mcol_iscan_32x32[1024]) = {
+ 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416,
+ 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864,
+ 896, 928, 960, 992, 1, 33, 65, 97, 129, 161, 193, 225, 257, 289,
+ 321, 353, 385, 417, 449, 481, 513, 545, 577, 609, 641, 673, 705, 737,
+ 769, 801, 833, 865, 897, 929, 961, 993, 2, 34, 66, 98, 130, 162,
+ 194, 226, 258, 290, 322, 354, 386, 418, 450, 482, 514, 546, 578, 610,
+ 642, 674, 706, 738, 770, 802, 834, 866, 898, 930, 962, 994, 3, 35,
+ 67, 99, 131, 163, 195, 227, 259, 291, 323, 355, 387, 419, 451, 483,
+ 515, 547, 579, 611, 643, 675, 707, 739, 771, 803, 835, 867, 899, 931,
+ 963, 995, 4, 36, 68, 100, 132, 164, 196, 228, 260, 292, 324, 356,
+ 388, 420, 452, 484, 516, 548, 580, 612, 644, 676, 708, 740, 772, 804,
+ 836, 868, 900, 932, 964, 996, 5, 37, 69, 101, 133, 165, 197, 229,
+ 261, 293, 325, 357, 389, 421, 453, 485, 517, 549, 581, 613, 645, 677,
+ 709, 741, 773, 805, 837, 869, 901, 933, 965, 997, 6, 38, 70, 102,
+ 134, 166, 198, 230, 262, 294, 326, 358, 390, 422, 454, 486, 518, 550,
+ 582, 614, 646, 678, 710, 742, 774, 806, 838, 870, 902, 934, 966, 998,
+ 7, 39, 71, 103, 135, 167, 199, 231, 263, 295, 327, 359, 391, 423,
+ 455, 487, 519, 551, 583, 615, 647, 679, 711, 743, 775, 807, 839, 871,
+ 903, 935, 967, 999, 8, 40, 72, 104, 136, 168, 200, 232, 264, 296,
+ 328, 360, 392, 424, 456, 488, 520, 552, 584, 616, 648, 680, 712, 744,
+ 776, 808, 840, 872, 904, 936, 968, 1000, 9, 41, 73, 105, 137, 169,
+ 201, 233, 265, 297, 329, 361, 393, 425, 457, 489, 521, 553, 585, 617,
+ 649, 681, 713, 745, 777, 809, 841, 873, 905, 937, 969, 1001, 10, 42,
+ 74, 106, 138, 170, 202, 234, 266, 298, 330, 362, 394, 426, 458, 490,
+ 522, 554, 586, 618, 650, 682, 714, 746, 778, 810, 842, 874, 906, 938,
+ 970, 1002, 11, 43, 75, 107, 139, 171, 203, 235, 267, 299, 331, 363,
+ 395, 427, 459, 491, 523, 555, 587, 619, 651, 683, 715, 747, 779, 811,
+ 843, 875, 907, 939, 971, 1003, 12, 44, 76, 108, 140, 172, 204, 236,
+ 268, 300, 332, 364, 396, 428, 460, 492, 524, 556, 588, 620, 652, 684,
+ 716, 748, 780, 812, 844, 876, 908, 940, 972, 1004, 13, 45, 77, 109,
+ 141, 173, 205, 237, 269, 301, 333, 365, 397, 429, 461, 493, 525, 557,
+ 589, 621, 653, 685, 717, 749, 781, 813, 845, 877, 909, 941, 973, 1005,
+ 14, 46, 78, 110, 142, 174, 206, 238, 270, 302, 334, 366, 398, 430,
+ 462, 494, 526, 558, 590, 622, 654, 686, 718, 750, 782, 814, 846, 878,
+ 910, 942, 974, 1006, 15, 47, 79, 111, 143, 175, 207, 239, 271, 303,
+ 335, 367, 399, 431, 463, 495, 527, 559, 591, 623, 655, 687, 719, 751,
+ 783, 815, 847, 879, 911, 943, 975, 1007, 16, 48, 80, 112, 144, 176,
+ 208, 240, 272, 304, 336, 368, 400, 432, 464, 496, 528, 560, 592, 624,
+ 656, 688, 720, 752, 784, 816, 848, 880, 912, 944, 976, 1008, 17, 49,
+ 81, 113, 145, 177, 209, 241, 273, 305, 337, 369, 401, 433, 465, 497,
+ 529, 561, 593, 625, 657, 689, 721, 753, 785, 817, 849, 881, 913, 945,
+ 977, 1009, 18, 50, 82, 114, 146, 178, 210, 242, 274, 306, 338, 370,
+ 402, 434, 466, 498, 530, 562, 594, 626, 658, 690, 722, 754, 786, 818,
+ 850, 882, 914, 946, 978, 1010, 19, 51, 83, 115, 147, 179, 211, 243,
+ 275, 307, 339, 371, 403, 435, 467, 499, 531, 563, 595, 627, 659, 691,
+ 723, 755, 787, 819, 851, 883, 915, 947, 979, 1011, 20, 52, 84, 116,
+ 148, 180, 212, 244, 276, 308, 340, 372, 404, 436, 468, 500, 532, 564,
+ 596, 628, 660, 692, 724, 756, 788, 820, 852, 884, 916, 948, 980, 1012,
+ 21, 53, 85, 117, 149, 181, 213, 245, 277, 309, 341, 373, 405, 437,
+ 469, 501, 533, 565, 597, 629, 661, 693, 725, 757, 789, 821, 853, 885,
+ 917, 949, 981, 1013, 22, 54, 86, 118, 150, 182, 214, 246, 278, 310,
+ 342, 374, 406, 438, 470, 502, 534, 566, 598, 630, 662, 694, 726, 758,
+ 790, 822, 854, 886, 918, 950, 982, 1014, 23, 55, 87, 119, 151, 183,
+ 215, 247, 279, 311, 343, 375, 407, 439, 471, 503, 535, 567, 599, 631,
+ 663, 695, 727, 759, 791, 823, 855, 887, 919, 951, 983, 1015, 24, 56,
+ 88, 120, 152, 184, 216, 248, 280, 312, 344, 376, 408, 440, 472, 504,
+ 536, 568, 600, 632, 664, 696, 728, 760, 792, 824, 856, 888, 920, 952,
+ 984, 1016, 25, 57, 89, 121, 153, 185, 217, 249, 281, 313, 345, 377,
+ 409, 441, 473, 505, 537, 569, 601, 633, 665, 697, 729, 761, 793, 825,
+ 857, 889, 921, 953, 985, 1017, 26, 58, 90, 122, 154, 186, 218, 250,
+ 282, 314, 346, 378, 410, 442, 474, 506, 538, 570, 602, 634, 666, 698,
+ 730, 762, 794, 826, 858, 890, 922, 954, 986, 1018, 27, 59, 91, 123,
+ 155, 187, 219, 251, 283, 315, 347, 379, 411, 443, 475, 507, 539, 571,
+ 603, 635, 667, 699, 731, 763, 795, 827, 859, 891, 923, 955, 987, 1019,
+ 28, 60, 92, 124, 156, 188, 220, 252, 284, 316, 348, 380, 412, 444,
+ 476, 508, 540, 572, 604, 636, 668, 700, 732, 764, 796, 828, 860, 892,
+ 924, 956, 988, 1020, 29, 61, 93, 125, 157, 189, 221, 253, 285, 317,
+ 349, 381, 413, 445, 477, 509, 541, 573, 605, 637, 669, 701, 733, 765,
+ 797, 829, 861, 893, 925, 957, 989, 1021, 30, 62, 94, 126, 158, 190,
+ 222, 254, 286, 318, 350, 382, 414, 446, 478, 510, 542, 574, 606, 638,
+ 670, 702, 734, 766, 798, 830, 862, 894, 926, 958, 990, 1022, 31, 63,
+ 95, 127, 159, 191, 223, 255, 287, 319, 351, 383, 415, 447, 479, 511,
+ 543, 575, 607, 639, 671, 703, 735, 767, 799, 831, 863, 895, 927, 959,
+ 991, 1023,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_mrow_iscan_32x32[1024]) = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
+ 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
+ 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+ 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
+ 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
+ 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
+ 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
+ 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
+ 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
+ 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
+ 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233,
+ 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246,
+ 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259,
+ 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272,
+ 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285,
+ 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298,
+ 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311,
+ 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
+ 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337,
+ 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350,
+ 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363,
+ 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376,
+ 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389,
+ 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402,
+ 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415,
+ 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428,
+ 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441,
+ 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454,
+ 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467,
+ 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480,
+ 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493,
+ 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506,
+ 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519,
+ 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532,
+ 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545,
+ 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558,
+ 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571,
+ 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584,
+ 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597,
+ 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610,
+ 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623,
+ 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636,
+ 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649,
+ 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662,
+ 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675,
+ 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688,
+ 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701,
+ 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714,
+ 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727,
+ 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740,
+ 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753,
+ 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766,
+ 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779,
+ 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792,
+ 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805,
+ 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818,
+ 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831,
+ 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844,
+ 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857,
+ 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870,
+ 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883,
+ 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896,
+ 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909,
+ 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922,
+ 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935,
+ 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948,
+ 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961,
+ 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974,
+ 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987,
+ 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000,
+ 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013,
+ 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
+};
+#endif // CONFIG_EXT_TX
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_default_iscan_32x32[1024]) = {
+ 0, 2, 5, 10, 17, 25, 38, 47, 62, 83, 101, 121, 145,
+ 170, 193, 204, 210, 219, 229, 233, 245, 257, 275, 299, 342, 356,
+ 377, 405, 455, 471, 495, 527, 1, 4, 8, 15, 22, 30, 45,
+ 58, 74, 92, 112, 133, 158, 184, 203, 215, 222, 228, 234, 237,
+ 256, 274, 298, 317, 355, 376, 404, 426, 470, 494, 526, 551, 3,
+ 7, 12, 18, 28, 36, 52, 64, 82, 102, 118, 142, 164, 189,
+ 208, 217, 224, 231, 235, 238, 273, 297, 316, 329, 375, 403, 425,
+ 440, 493, 525, 550, 567, 6, 11, 16, 23, 31, 43, 60, 73,
+ 90, 109, 126, 150, 173, 196, 211, 220, 226, 232, 236, 239, 296,
+ 315, 328, 335, 402, 424, 439, 447, 524, 549, 566, 575, 9, 14,
+ 19, 29, 37, 50, 65, 78, 95, 116, 134, 157, 179, 201, 214,
+ 223, 244, 255, 272, 295, 341, 354, 374, 401, 454, 469, 492, 523,
+ 582, 596, 617, 645, 13, 20, 26, 35, 44, 54, 72, 85, 105,
+ 123, 140, 163, 182, 205, 216, 225, 254, 271, 294, 314, 353, 373,
+ 400, 423, 468, 491, 522, 548, 595, 616, 644, 666, 21, 27, 33,
+ 42, 53, 63, 80, 94, 113, 132, 151, 172, 190, 209, 218, 227,
+ 270, 293, 313, 327, 372, 399, 422, 438, 490, 521, 547, 565, 615,
+ 643, 665, 680, 24, 32, 39, 48, 57, 71, 88, 104, 120, 139,
+ 159, 178, 197, 212, 221, 230, 292, 312, 326, 334, 398, 421, 437,
+ 446, 520, 546, 564, 574, 642, 664, 679, 687, 34, 40, 46, 56,
+ 68, 81, 96, 111, 130, 147, 167, 186, 243, 253, 269, 291, 340,
+ 352, 371, 397, 453, 467, 489, 519, 581, 594, 614, 641, 693, 705,
+ 723, 747, 41, 49, 55, 67, 77, 91, 107, 124, 138, 161, 177,
+ 194, 252, 268, 290, 311, 351, 370, 396, 420, 466, 488, 518, 545,
+ 593, 613, 640, 663, 704, 722, 746, 765, 51, 59, 66, 76, 89,
+ 99, 119, 131, 149, 168, 181, 200, 267, 289, 310, 325, 369, 395,
+ 419, 436, 487, 517, 544, 563, 612, 639, 662, 678, 721, 745, 764,
+ 777, 61, 69, 75, 87, 100, 114, 129, 144, 162, 180, 191, 207,
+ 288, 309, 324, 333, 394, 418, 435, 445, 516, 543, 562, 573, 638,
+ 661, 677, 686, 744, 763, 776, 783, 70, 79, 86, 97, 108, 122,
+ 137, 155, 242, 251, 266, 287, 339, 350, 368, 393, 452, 465, 486,
+ 515, 580, 592, 611, 637, 692, 703, 720, 743, 788, 798, 813, 833,
+ 84, 93, 103, 110, 125, 141, 154, 171, 250, 265, 286, 308, 349,
+ 367, 392, 417, 464, 485, 514, 542, 591, 610, 636, 660, 702, 719,
+ 742, 762, 797, 812, 832, 848, 98, 106, 115, 127, 143, 156, 169,
+ 185, 264, 285, 307, 323, 366, 391, 416, 434, 484, 513, 541, 561,
+ 609, 635, 659, 676, 718, 741, 761, 775, 811, 831, 847, 858, 117,
+ 128, 136, 148, 160, 175, 188, 198, 284, 306, 322, 332, 390, 415,
+ 433, 444, 512, 540, 560, 572, 634, 658, 675, 685, 740, 760, 774,
+ 782, 830, 846, 857, 863, 135, 146, 152, 165, 241, 249, 263, 283,
+ 338, 348, 365, 389, 451, 463, 483, 511, 579, 590, 608, 633, 691,
+ 701, 717, 739, 787, 796, 810, 829, 867, 875, 887, 903, 153, 166,
+ 174, 183, 248, 262, 282, 305, 347, 364, 388, 414, 462, 482, 510,
+ 539, 589, 607, 632, 657, 700, 716, 738, 759, 795, 809, 828, 845,
+ 874, 886, 902, 915, 176, 187, 195, 202, 261, 281, 304, 321, 363,
+ 387, 413, 432, 481, 509, 538, 559, 606, 631, 656, 674, 715, 737,
+ 758, 773, 808, 827, 844, 856, 885, 901, 914, 923, 192, 199, 206,
+ 213, 280, 303, 320, 331, 386, 412, 431, 443, 508, 537, 558, 571,
+ 630, 655, 673, 684, 736, 757, 772, 781, 826, 843, 855, 862, 900,
+ 913, 922, 927, 240, 247, 260, 279, 337, 346, 362, 385, 450, 461,
+ 480, 507, 578, 588, 605, 629, 690, 699, 714, 735, 786, 794, 807,
+ 825, 866, 873, 884, 899, 930, 936, 945, 957, 246, 259, 278, 302,
+ 345, 361, 384, 411, 460, 479, 506, 536, 587, 604, 628, 654, 698,
+ 713, 734, 756, 793, 806, 824, 842, 872, 883, 898, 912, 935, 944,
+ 956, 966, 258, 277, 301, 319, 360, 383, 410, 430, 478, 505, 535,
+ 557, 603, 627, 653, 672, 712, 733, 755, 771, 805, 823, 841, 854,
+ 882, 897, 911, 921, 943, 955, 965, 972, 276, 300, 318, 330, 382,
+ 409, 429, 442, 504, 534, 556, 570, 626, 652, 671, 683, 732, 754,
+ 770, 780, 822, 840, 853, 861, 896, 910, 920, 926, 954, 964, 971,
+ 975, 336, 344, 359, 381, 449, 459, 477, 503, 577, 586, 602, 625,
+ 689, 697, 711, 731, 785, 792, 804, 821, 865, 871, 881, 895, 929,
+ 934, 942, 953, 977, 981, 987, 995, 343, 358, 380, 408, 458, 476,
+ 502, 533, 585, 601, 624, 651, 696, 710, 730, 753, 791, 803, 820,
+ 839, 870, 880, 894, 909, 933, 941, 952, 963, 980, 986, 994, 1001,
+ 357, 379, 407, 428, 475, 501, 532, 555, 600, 623, 650, 670, 709,
+ 729, 752, 769, 802, 819, 838, 852, 879, 893, 908, 919, 940, 951,
+ 962, 970, 985, 993, 1000, 1005, 378, 406, 427, 441, 500, 531, 554,
+ 569, 622, 649, 669, 682, 728, 751, 768, 779, 818, 837, 851, 860,
+ 892, 907, 918, 925, 950, 961, 969, 974, 992, 999, 1004, 1007, 448,
+ 457, 474, 499, 576, 584, 599, 621, 688, 695, 708, 727, 784, 790,
+ 801, 817, 864, 869, 878, 891, 928, 932, 939, 949, 976, 979, 984,
+ 991, 1008, 1010, 1013, 1017, 456, 473, 498, 530, 583, 598, 620, 648,
+ 694, 707, 726, 750, 789, 800, 816, 836, 868, 877, 890, 906, 931,
+ 938, 948, 960, 978, 983, 990, 998, 1009, 1012, 1016, 1020, 472, 497,
+ 529, 553, 597, 619, 647, 668, 706, 725, 749, 767, 799, 815, 835,
+ 850, 876, 889, 905, 917, 937, 947, 959, 968, 982, 989, 997, 1003,
+ 1011, 1015, 1019, 1022, 496, 528, 552, 568, 618, 646, 667, 681, 724,
+ 748, 766, 778, 814, 834, 849, 859, 888, 904, 916, 924, 946, 958,
+ 967, 973, 988, 996, 1002, 1006, 1014, 1018, 1021, 1023,
+};
+
+#if CONFIG_EXT_TX
+DECLARE_ALIGNED(16, static const int16_t, vp10_v2_iscan_32x32[1024]) = {
+ 0, 1, 4, 9, 15, 22, 33, 43, 56, 71, 86, 104, 121,
+ 142, 166, 189, 512, 518, 527, 539, 551, 566, 584, 602, 621, 644,
+ 668, 695, 721, 748, 780, 811, 2, 3, 6, 11, 17, 26, 35,
+ 45, 58, 73, 90, 106, 123, 146, 168, 193, 513, 519, 528, 540,
+ 553, 567, 585, 603, 622, 647, 670, 696, 722, 751, 783, 812, 5,
+ 7, 8, 13, 20, 28, 37, 50, 62, 75, 92, 108, 129, 150,
+ 170, 195, 514, 521, 530, 541, 554, 569, 587, 605, 625, 649, 671,
+ 699, 725, 752, 785, 815, 10, 12, 14, 19, 23, 31, 41, 52,
+ 65, 81, 96, 113, 133, 152, 175, 201, 515, 522, 531, 542, 556,
+ 572, 589, 607, 629, 651, 673, 700, 726, 757, 788, 819, 16, 18,
+ 21, 24, 30, 39, 48, 59, 69, 83, 100, 119, 137, 158, 181,
+ 203, 516, 523, 534, 545, 559, 574, 591, 610, 632, 654, 679, 704,
+ 730, 762, 791, 824, 25, 27, 29, 32, 40, 46, 54, 67, 79,
+ 94, 109, 127, 143, 164, 185, 210, 517, 525, 535, 547, 561, 578,
+ 595, 615, 635, 656, 684, 707, 737, 766, 793, 830, 34, 36, 38,
+ 42, 49, 55, 64, 76, 87, 102, 117, 135, 154, 176, 197, 219,
+ 520, 529, 538, 550, 565, 580, 598, 618, 639, 664, 687, 712, 741,
+ 769, 802, 833, 44, 47, 51, 53, 60, 68, 77, 85, 98, 114,
+ 131, 147, 162, 183, 208, 227, 524, 533, 544, 557, 571, 588, 606,
+ 623, 645, 667, 692, 720, 747, 776, 806, 838, 57, 61, 63, 66,
+ 70, 80, 88, 99, 112, 124, 140, 159, 179, 199, 216, 233, 526,
+ 536, 548, 562, 577, 593, 613, 633, 653, 676, 701, 727, 756, 786,
+ 814, 847, 72, 74, 78, 82, 84, 95, 103, 115, 125, 139, 156,
+ 173, 190, 211, 229, 246, 532, 543, 555, 568, 581, 601, 619, 637,
+ 663, 685, 709, 738, 763, 792, 826, 855, 89, 91, 93, 97, 101,
+ 110, 118, 132, 141, 157, 171, 186, 206, 224, 241, 255, 537, 549,
+ 560, 576, 592, 608, 628, 650, 669, 693, 719, 744, 773, 805, 834,
+ 862, 105, 107, 111, 116, 120, 128, 136, 148, 160, 174, 187, 205,
+ 221, 236, 251, 267, 546, 558, 570, 583, 600, 617, 636, 657, 680,
+ 706, 729, 758, 787, 813, 846, 871, 122, 126, 130, 134, 138, 144,
+ 155, 163, 180, 191, 207, 222, 232, 248, 264, 278, 552, 564, 579,
+ 594, 609, 630, 648, 666, 688, 715, 742, 768, 797, 827, 856, 877,
+ 145, 149, 151, 153, 161, 165, 177, 184, 200, 212, 225, 237, 249,
+ 262, 275, 289, 563, 575, 590, 604, 620, 638, 660, 683, 705, 728,
+ 753, 779, 809, 839, 866, 889, 167, 169, 172, 178, 182, 188, 198,
+ 209, 217, 230, 242, 252, 265, 276, 288, 301, 573, 586, 599, 616,
+ 634, 652, 672, 694, 716, 743, 767, 794, 825, 850, 874, 899, 192,
+ 194, 196, 202, 204, 213, 220, 228, 234, 247, 256, 268, 279, 290,
+ 302, 315, 582, 597, 614, 631, 646, 665, 686, 708, 732, 759, 784,
+ 810, 837, 863, 886, 908, 214, 215, 218, 223, 226, 231, 239, 244,
+ 253, 261, 271, 283, 292, 304, 317, 325, 596, 611, 626, 642, 661,
+ 681, 702, 723, 745, 770, 800, 828, 853, 875, 897, 919, 235, 238,
+ 240, 243, 245, 250, 257, 263, 270, 280, 287, 298, 307, 319, 329,
+ 340, 612, 624, 640, 658, 677, 697, 717, 739, 764, 789, 816, 844,
+ 867, 890, 909, 927, 254, 258, 259, 260, 266, 269, 272, 282, 286,
+ 296, 303, 312, 323, 333, 341, 355, 627, 641, 655, 674, 690, 713,
+ 735, 760, 781, 807, 835, 857, 880, 902, 921, 940, 273, 274, 277,
+ 281, 284, 285, 291, 299, 305, 310, 320, 327, 337, 346, 357, 369,
+ 643, 659, 675, 689, 710, 733, 754, 777, 803, 831, 851, 872, 892,
+ 913, 934, 950, 293, 294, 295, 297, 300, 306, 308, 314, 321, 326,
+ 335, 343, 352, 361, 372, 378, 662, 678, 691, 711, 731, 749, 774,
+ 798, 822, 848, 869, 887, 906, 925, 942, 961, 309, 311, 313, 316,
+ 318, 322, 324, 332, 338, 344, 351, 358, 367, 375, 386, 394, 682,
+ 698, 714, 734, 750, 772, 795, 820, 842, 864, 884, 904, 923, 938,
+ 954, 967, 328, 330, 331, 334, 336, 339, 342, 348, 354, 359, 366,
+ 374, 382, 391, 400, 409, 703, 718, 736, 755, 775, 796, 818, 840,
+ 860, 882, 900, 917, 936, 952, 965, 977, 345, 347, 349, 350, 353,
+ 356, 360, 364, 371, 376, 383, 389, 395, 406, 412, 423, 724, 740,
+ 761, 778, 799, 821, 841, 859, 878, 895, 915, 932, 948, 963, 975,
+ 986, 362, 363, 365, 368, 370, 373, 377, 379, 387, 392, 397, 405,
+ 411, 420, 428, 439, 746, 765, 782, 804, 823, 843, 861, 879, 894,
+ 911, 930, 946, 959, 973, 984, 994, 380, 381, 384, 385, 388, 390,
+ 393, 396, 403, 408, 413, 422, 427, 436, 444, 452, 771, 790, 808,
+ 832, 849, 865, 883, 896, 912, 928, 944, 957, 971, 982, 992, 1001,
+ 398, 399, 401, 402, 404, 407, 410, 414, 419, 425, 429, 437, 442,
+ 449, 458, 465, 801, 817, 836, 852, 870, 885, 901, 916, 931, 945,
+ 956, 969, 980, 990, 999, 1007, 415, 416, 417, 418, 421, 424, 426,
+ 430, 434, 441, 445, 453, 459, 463, 473, 480, 829, 845, 858, 873,
+ 888, 905, 918, 933, 947, 958, 970, 979, 988, 997, 1005, 1012, 431,
+ 432, 433, 435, 438, 440, 443, 446, 451, 456, 461, 468, 475, 479,
+ 488, 494, 854, 868, 881, 893, 907, 924, 937, 949, 960, 972, 981,
+ 989, 996, 1003, 1010, 1016, 447, 448, 450, 454, 455, 457, 460, 462,
+ 469, 472, 477, 482, 490, 495, 499, 503, 876, 891, 903, 914, 926,
+ 939, 953, 964, 974, 983, 991, 998, 1004, 1009, 1014, 1019, 464, 466,
+ 467, 470, 471, 474, 476, 478, 484, 489, 493, 497, 501, 504, 506,
+ 508, 898, 910, 922, 935, 943, 955, 966, 976, 985, 993, 1000, 1006,
+ 1011, 1015, 1018, 1021, 481, 483, 485, 486, 487, 491, 492, 496, 498,
+ 500, 502, 505, 507, 509, 510, 511, 920, 929, 941, 951, 962, 968,
+ 978, 987, 995, 1002, 1008, 1013, 1017, 1020, 1022, 1023,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_h2_iscan_32x32[1024]) = {
+ 0, 1, 4, 9, 15, 22, 33, 43, 56, 71, 86, 104, 121,
+ 142, 166, 189, 214, 233, 254, 273, 292, 309, 328, 345, 362, 378,
+ 397, 415, 431, 447, 464, 481, 2, 3, 6, 11, 17, 26, 35,
+ 45, 58, 73, 90, 106, 123, 146, 168, 193, 215, 236, 255, 274,
+ 294, 310, 329, 346, 363, 381, 399, 416, 432, 448, 465, 482, 5,
+ 7, 8, 13, 20, 28, 37, 50, 62, 75, 92, 108, 129, 150,
+ 170, 195, 216, 240, 259, 275, 295, 312, 331, 348, 365, 383, 400,
+ 417, 433, 449, 467, 485, 10, 12, 14, 19, 23, 31, 41, 52,
+ 65, 81, 96, 113, 133, 152, 175, 201, 221, 243, 260, 280, 297,
+ 315, 333, 350, 367, 385, 402, 418, 434, 452, 470, 486, 16, 18,
+ 21, 24, 30, 39, 48, 59, 69, 83, 100, 119, 137, 158, 181,
+ 203, 226, 244, 264, 283, 300, 318, 335, 353, 370, 388, 404, 420,
+ 438, 455, 471, 487, 25, 27, 29, 32, 40, 46, 54, 67, 79,
+ 94, 109, 127, 143, 164, 185, 210, 231, 250, 269, 285, 304, 322,
+ 339, 356, 373, 389, 407, 423, 440, 457, 473, 491, 34, 36, 38,
+ 42, 49, 55, 64, 76, 87, 102, 117, 135, 154, 176, 197, 219,
+ 239, 256, 272, 291, 308, 324, 341, 359, 377, 393, 410, 426, 442,
+ 460, 476, 492, 44, 47, 51, 53, 60, 68, 77, 85, 98, 114,
+ 131, 147, 162, 183, 208, 227, 245, 262, 282, 298, 314, 332, 349,
+ 364, 379, 396, 412, 430, 446, 462, 478, 495, 57, 61, 63, 66,
+ 70, 80, 88, 99, 112, 124, 140, 159, 179, 199, 217, 234, 253,
+ 270, 286, 305, 321, 337, 354, 371, 387, 403, 419, 435, 451, 468,
+ 484, 498, 72, 74, 78, 82, 84, 95, 103, 115, 125, 139, 156,
+ 173, 190, 211, 229, 246, 261, 281, 296, 311, 325, 344, 360, 375,
+ 392, 408, 425, 441, 456, 472, 489, 500, 89, 91, 93, 97, 101,
+ 110, 118, 132, 141, 157, 171, 186, 206, 224, 241, 257, 271, 287,
+ 303, 320, 336, 351, 366, 384, 398, 413, 429, 445, 461, 477, 493,
+ 502, 105, 107, 111, 116, 120, 128, 136, 148, 160, 174, 187, 205,
+ 222, 237, 251, 267, 284, 299, 313, 327, 343, 358, 374, 390, 405,
+ 422, 437, 453, 469, 483, 497, 505, 122, 126, 130, 134, 138, 144,
+ 155, 163, 180, 191, 207, 223, 232, 248, 265, 278, 293, 307, 323,
+ 338, 352, 368, 382, 395, 411, 427, 443, 459, 475, 490, 501, 507,
+ 145, 149, 151, 153, 161, 165, 177, 184, 200, 212, 225, 238, 249,
+ 263, 276, 289, 306, 319, 334, 347, 361, 376, 391, 406, 421, 436,
+ 450, 463, 479, 496, 504, 509, 167, 169, 172, 178, 182, 188, 198,
+ 209, 218, 230, 242, 252, 266, 277, 288, 301, 317, 330, 342, 357,
+ 372, 386, 401, 414, 428, 444, 458, 474, 488, 499, 506, 510, 192,
+ 194, 196, 202, 204, 213, 220, 228, 235, 247, 258, 268, 279, 290,
+ 302, 316, 326, 340, 355, 369, 380, 394, 409, 424, 439, 454, 466,
+ 480, 494, 503, 508, 511, 512, 513, 514, 515, 516, 517, 520, 523,
+ 526, 532, 537, 545, 551, 561, 573, 581, 596, 610, 625, 642, 661,
+ 680, 701, 722, 745, 770, 800, 827, 853, 875, 897, 919, 518, 519,
+ 521, 522, 524, 525, 528, 533, 536, 542, 549, 557, 564, 575, 585,
+ 597, 611, 623, 640, 656, 676, 696, 717, 739, 763, 789, 815, 844,
+ 867, 889, 909, 927, 527, 529, 530, 531, 534, 535, 538, 544, 548,
+ 555, 560, 569, 579, 589, 598, 614, 626, 641, 655, 673, 690, 712,
+ 735, 760, 780, 806, 834, 857, 880, 902, 921, 940, 539, 540, 541,
+ 543, 546, 547, 550, 558, 562, 567, 576, 583, 593, 603, 616, 631,
+ 643, 657, 674, 689, 710, 733, 752, 776, 803, 830, 850, 872, 892,
+ 913, 934, 950, 552, 553, 554, 556, 559, 563, 565, 571, 577, 582,
+ 591, 600, 609, 620, 634, 644, 662, 677, 691, 711, 730, 748, 773,
+ 798, 822, 847, 869, 887, 906, 925, 942, 961, 566, 568, 570, 572,
+ 574, 578, 580, 588, 594, 601, 608, 617, 629, 637, 652, 665, 681,
+ 697, 713, 734, 749, 772, 793, 819, 842, 863, 884, 904, 923, 938,
+ 954, 967, 584, 586, 587, 590, 592, 595, 599, 605, 613, 618, 628,
+ 636, 648, 660, 671, 686, 702, 718, 736, 753, 774, 794, 818, 840,
+ 860, 882, 900, 917, 936, 952, 965, 977, 602, 604, 606, 607, 612,
+ 615, 619, 624, 633, 638, 649, 658, 666, 683, 692, 707, 723, 740,
+ 761, 777, 799, 820, 841, 859, 877, 895, 915, 932, 948, 963, 975,
+ 986, 621, 622, 627, 630, 632, 635, 639, 645, 653, 663, 668, 682,
+ 688, 704, 716, 732, 746, 764, 781, 804, 823, 843, 861, 878, 894,
+ 911, 930, 946, 959, 973, 984, 994, 646, 647, 650, 651, 654, 659,
+ 664, 667, 678, 685, 693, 706, 715, 728, 743, 757, 771, 790, 807,
+ 831, 848, 864, 883, 896, 912, 928, 944, 957, 971, 982, 992, 1001,
+ 669, 670, 672, 675, 679, 684, 687, 694, 703, 709, 719, 729, 741,
+ 754, 767, 783, 801, 816, 835, 851, 870, 885, 901, 916, 931, 945,
+ 956, 969, 980, 990, 999, 1007, 695, 698, 699, 700, 705, 708, 714,
+ 720, 726, 738, 744, 758, 768, 779, 795, 810, 828, 845, 858, 873,
+ 888, 905, 918, 933, 947, 958, 970, 979, 988, 997, 1005, 1012, 721,
+ 724, 725, 727, 731, 737, 742, 747, 756, 765, 775, 786, 797, 809,
+ 825, 837, 854, 868, 881, 893, 907, 924, 937, 949, 960, 972, 981,
+ 989, 996, 1003, 1010, 1016, 750, 751, 755, 759, 762, 766, 769, 778,
+ 787, 792, 805, 812, 829, 838, 852, 865, 876, 890, 903, 914, 926,
+ 939, 953, 964, 974, 983, 991, 998, 1004, 1009, 1014, 1019, 782, 784,
+ 785, 788, 791, 796, 802, 808, 814, 826, 836, 846, 856, 866, 874,
+ 886, 898, 910, 922, 935, 943, 955, 966, 976, 985, 993, 1000, 1006,
+ 1011, 1015, 1018, 1021, 811, 813, 817, 821, 824, 832, 833, 839, 849,
+ 855, 862, 871, 879, 891, 899, 908, 920, 929, 941, 951, 962, 968,
+ 978, 987, 995, 1002, 1008, 1013, 1017, 1020, 1022, 1023,
+};
+
+DECLARE_ALIGNED(16, static const int16_t, vp10_qtr_iscan_32x32[1024]) = {
+ 0, 1, 4, 9, 15, 22, 33, 43, 56, 71, 86, 104, 121,
+ 142, 166, 189, 256, 268, 286, 310, 334, 364, 400, 435, 471, 510,
+ 553, 598, 640, 683, 732, 780, 2, 3, 6, 11, 17, 26, 35,
+ 45, 58, 73, 90, 106, 123, 146, 168, 193, 258, 270, 288, 312,
+ 338, 366, 402, 437, 473, 516, 557, 600, 642, 687, 736, 782, 5,
+ 7, 8, 13, 20, 28, 37, 50, 62, 75, 92, 108, 129, 150,
+ 170, 195, 260, 274, 292, 314, 340, 370, 406, 441, 478, 520, 559,
+ 604, 646, 689, 740, 788, 10, 12, 14, 19, 23, 31, 41, 52,
+ 65, 81, 96, 113, 133, 152, 175, 201, 262, 276, 294, 316, 344,
+ 376, 410, 445, 484, 524, 563, 606, 648, 697, 746, 793, 16, 18,
+ 21, 24, 30, 39, 48, 59, 69, 83, 100, 119, 137, 158, 181,
+ 203, 264, 278, 300, 322, 350, 380, 414, 451, 490, 530, 571, 612,
+ 656, 705, 750, 799, 25, 27, 29, 32, 40, 46, 54, 67, 79,
+ 94, 109, 127, 143, 164, 185, 210, 266, 282, 302, 326, 354, 388,
+ 422, 459, 496, 533, 579, 618, 665, 711, 754, 809, 34, 36, 38,
+ 42, 49, 55, 64, 76, 87, 102, 117, 135, 154, 176, 197, 216,
+ 272, 289, 308, 332, 362, 392, 427, 465, 504, 545, 585, 626, 671,
+ 717, 766, 813, 44, 47, 51, 53, 60, 68, 77, 85, 98, 114,
+ 131, 147, 162, 183, 208, 222, 279, 298, 320, 346, 374, 408, 442,
+ 475, 511, 551, 592, 638, 681, 726, 772, 821, 57, 61, 63, 66,
+ 70, 80, 88, 99, 112, 124, 140, 159, 179, 199, 214, 227, 284,
+ 304, 328, 355, 386, 418, 455, 492, 528, 567, 608, 649, 695, 742,
+ 786, 833, 72, 74, 78, 82, 84, 95, 103, 115, 125, 139, 156,
+ 173, 190, 211, 224, 233, 296, 317, 342, 367, 394, 433, 466, 500,
+ 543, 581, 622, 667, 707, 752, 803, 843, 89, 91, 93, 97, 101,
+ 110, 118, 132, 141, 157, 171, 186, 206, 220, 231, 239, 306, 330,
+ 352, 384, 415, 447, 482, 521, 554, 593, 636, 677, 722, 770, 815,
+ 852, 105, 107, 111, 116, 120, 128, 136, 148, 160, 174, 187, 205,
+ 218, 229, 237, 244, 323, 347, 371, 398, 431, 463, 498, 534, 573,
+ 616, 654, 698, 743, 783, 831, 864, 122, 126, 130, 134, 138, 144,
+ 155, 163, 180, 191, 207, 219, 226, 235, 242, 248, 335, 360, 390,
+ 419, 449, 485, 518, 549, 587, 630, 672, 715, 760, 805, 845, 872,
+ 145, 149, 151, 153, 161, 165, 177, 184, 200, 212, 221, 230, 236,
+ 241, 246, 251, 356, 382, 411, 438, 469, 501, 539, 577, 613, 652,
+ 690, 730, 776, 822, 858, 886, 167, 169, 172, 178, 182, 188, 198,
+ 209, 215, 225, 232, 238, 243, 247, 250, 253, 378, 403, 428, 461,
+ 494, 526, 560, 594, 632, 675, 713, 755, 801, 837, 868, 897, 192,
+ 194, 196, 202, 204, 213, 217, 223, 228, 234, 240, 245, 249, 252,
+ 254, 255, 395, 425, 457, 488, 512, 547, 583, 619, 659, 699, 737,
+ 778, 819, 854, 882, 907, 257, 259, 261, 263, 265, 267, 273, 280,
+ 285, 297, 307, 324, 336, 357, 379, 396, 424, 452, 479, 508, 541,
+ 574, 609, 643, 679, 719, 764, 806, 841, 870, 895, 919, 269, 271,
+ 275, 277, 281, 283, 290, 299, 305, 318, 331, 348, 361, 383, 404,
+ 426, 453, 476, 506, 535, 568, 601, 634, 669, 708, 748, 789, 829,
+ 860, 887, 909, 927, 287, 291, 293, 295, 301, 303, 309, 321, 329,
+ 343, 353, 372, 391, 412, 429, 458, 480, 507, 532, 564, 590, 627,
+ 663, 703, 733, 773, 816, 847, 876, 901, 921, 940, 311, 313, 315,
+ 319, 325, 327, 333, 349, 358, 368, 385, 399, 420, 439, 462, 489,
+ 509, 536, 565, 589, 624, 661, 691, 727, 768, 810, 838, 866, 890,
+ 913, 934, 950, 337, 339, 341, 345, 351, 359, 363, 375, 387, 397,
+ 416, 432, 450, 470, 495, 513, 542, 569, 591, 625, 657, 684, 723,
+ 762, 797, 834, 862, 884, 905, 925, 942, 961, 365, 369, 373, 377,
+ 381, 389, 393, 409, 421, 434, 448, 464, 486, 502, 527, 548, 575,
+ 602, 628, 662, 685, 721, 756, 794, 827, 855, 880, 903, 923, 938,
+ 954, 967, 401, 405, 407, 413, 417, 423, 430, 443, 456, 467, 483,
+ 499, 519, 540, 561, 584, 610, 635, 664, 692, 724, 757, 792, 825,
+ 850, 878, 899, 917, 936, 952, 965, 977, 436, 440, 444, 446, 454,
+ 460, 468, 477, 493, 503, 522, 537, 550, 578, 595, 620, 644, 670,
+ 704, 728, 763, 795, 826, 849, 873, 893, 915, 932, 948, 963, 975,
+ 986, 472, 474, 481, 487, 491, 497, 505, 514, 529, 544, 555, 576,
+ 588, 614, 633, 660, 680, 709, 734, 769, 798, 828, 851, 874, 892,
+ 911, 930, 946, 959, 973, 984, 994, 515, 517, 523, 525, 531, 538,
+ 546, 552, 570, 582, 596, 617, 631, 653, 676, 700, 720, 749, 774,
+ 811, 835, 856, 879, 894, 912, 928, 944, 957, 971, 982, 992, 1001,
+ 556, 558, 562, 566, 572, 580, 586, 597, 611, 623, 637, 655, 673,
+ 693, 714, 738, 765, 790, 817, 839, 863, 881, 900, 916, 931, 945,
+ 956, 969, 980, 990, 999, 1007, 599, 603, 605, 607, 615, 621, 629,
+ 639, 650, 668, 678, 701, 716, 731, 758, 779, 807, 830, 848, 867,
+ 885, 904, 918, 933, 947, 958, 970, 979, 988, 997, 1005, 1012, 641,
+ 645, 647, 651, 658, 666, 674, 682, 696, 710, 725, 744, 761, 777,
+ 802, 820, 842, 861, 877, 891, 906, 924, 937, 949, 960, 972, 981,
+ 989, 996, 1003, 1010, 1016, 686, 688, 694, 702, 706, 712, 718, 729,
+ 745, 753, 771, 784, 808, 823, 840, 857, 871, 888, 902, 914, 926,
+ 939, 953, 964, 974, 983, 991, 998, 1004, 1009, 1014, 1019, 735, 739,
+ 741, 747, 751, 759, 767, 775, 787, 804, 818, 832, 846, 859, 869,
+ 883, 896, 910, 922, 935, 943, 955, 966, 976, 985, 993, 1000, 1006,
+ 1011, 1015, 1018, 1021, 781, 785, 791, 796, 800, 812, 814, 824, 836,
+ 844, 853, 865, 875, 889, 898, 908, 920, 929, 941, 951, 962, 968,
+ 978, 987, 995, 1002, 1008, 1013, 1017, 1020, 1022, 1023,
+};
+#endif // CONFIG_EXT_TX
+
+const scan_order vp10_default_scan_orders[TX_SIZES] = {
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+};
+
+#if CONFIG_EXT_TX
+const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+ {
+ // TX_4X4
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors },
+ { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+ { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors },
+ { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors },
+ { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors },
+ { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors },
+ { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors },
+ { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors },
+ },
+ {
+ // TX_8X8
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors },
+ { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
+ { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors },
+ { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors },
+ { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors },
+ { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors },
+ { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors },
+ { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors },
+ },
+ {
+ // TX_16X16
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors },
+ { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
+ { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors },
+ { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors },
+ { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors },
+ { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors },
+ { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors },
+ { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors },
+ },
+ {
+ // TX_32X32
+ { default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { h2_scan_32x32, vp10_h2_iscan_32x32, h2_scan_32x32_neighbors },
+ { v2_scan_32x32, vp10_v2_iscan_32x32, v2_scan_32x32_neighbors },
+ { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+ { h2_scan_32x32, vp10_h2_iscan_32x32, h2_scan_32x32_neighbors },
+ { v2_scan_32x32, vp10_v2_iscan_32x32, v2_scan_32x32_neighbors },
+ { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+ { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+ { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+ { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+ { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+ { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+ }
+};
+
+const scan_order vp10_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
+ {
+ // TX_4X4
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+ { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+ { mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
+ { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+ { mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
+ { mrow_scan_4x4, vp10_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+ { mcol_scan_4x4, vp10_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
+ },
+ {
+ // TX_8X8
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
+ { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
+ { mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
+ { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
+ { mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
+ { mrow_scan_8x8, vp10_mrow_iscan_8x8, mrow_scan_8x8_neighbors },
+ { mcol_scan_8x8, vp10_mcol_iscan_8x8, mcol_scan_8x8_neighbors },
+ },
+ {
+ // TX_16X16
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
+ { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
+ { mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
+ { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
+ { mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
+ { mrow_scan_16x16, vp10_mrow_iscan_16x16, mrow_scan_16x16_neighbors },
+ { mcol_scan_16x16, vp10_mcol_iscan_16x16, mcol_scan_16x16_neighbors },
+ },
+ {
+ // TX_32X32
+ { default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { h2_scan_32x32, vp10_h2_iscan_32x32, h2_scan_32x32_neighbors },
+ { v2_scan_32x32, vp10_v2_iscan_32x32, v2_scan_32x32_neighbors },
+ { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+ { h2_scan_32x32, vp10_h2_iscan_32x32, h2_scan_32x32_neighbors },
+ { v2_scan_32x32, vp10_v2_iscan_32x32, v2_scan_32x32_neighbors },
+ { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+ { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+ { qtr_scan_32x32, vp10_qtr_iscan_32x32, qtr_scan_32x32_neighbors },
+ { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+ { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+ { mrow_scan_32x32, vp10_mrow_iscan_32x32, mrow_scan_32x32_neighbors },
+ { mcol_scan_32x32, vp10_mcol_iscan_32x32, mcol_scan_32x32_neighbors },
+ },
+ {
+ // TX_4X8
+ { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+ { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+ { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+ { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+ { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+ { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+ { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+ { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+ { default_scan_4x8, vp10_default_iscan_4x8, default_scan_4x8_neighbors },
+ { mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+ { mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+ { mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
+ { mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+ { mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
+ { mrow_scan_4x8, vp10_mrow_iscan_4x8, mrow_scan_4x8_neighbors },
+ { mcol_scan_4x8, vp10_mcol_iscan_4x8, mcol_scan_4x8_neighbors },
+ },
+ {
+ // TX_8X4
+ { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+ { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+ { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+ { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+ { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+ { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+ { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+ { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+ { default_scan_8x4, vp10_default_iscan_8x4, default_scan_8x4_neighbors },
+ { mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+ { mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+ { mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
+ { mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+ { mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
+ { mrow_scan_8x4, vp10_mrow_iscan_8x4, mrow_scan_8x4_neighbors },
+ { mcol_scan_8x4, vp10_mcol_iscan_8x4, mcol_scan_8x4_neighbors },
+ },
+ {
+ // TX_8X16
+ { default_scan_8x16, vp10_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { default_scan_8x16, vp10_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { default_scan_8x16, vp10_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { default_scan_8x16, vp10_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { default_scan_8x16, vp10_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { default_scan_8x16, vp10_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { default_scan_8x16, vp10_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { default_scan_8x16, vp10_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { default_scan_8x16, vp10_default_iscan_8x16,
+ default_scan_8x16_neighbors },
+ { mrow_scan_8x16, vp10_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+ { mrow_scan_8x16, vp10_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+ { mcol_scan_8x16, vp10_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
+ { mrow_scan_8x16, vp10_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+ { mcol_scan_8x16, vp10_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
+ { mrow_scan_8x16, vp10_mrow_iscan_8x16, mrow_scan_8x16_neighbors },
+ { mcol_scan_8x16, vp10_mcol_iscan_8x16, mcol_scan_8x16_neighbors },
+ },
+ {
+ // TX_16X8
+ { default_scan_16x8, vp10_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { default_scan_16x8, vp10_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { default_scan_16x8, vp10_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { default_scan_16x8, vp10_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { default_scan_16x8, vp10_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { default_scan_16x8, vp10_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { default_scan_16x8, vp10_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { default_scan_16x8, vp10_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { default_scan_16x8, vp10_default_iscan_16x8,
+ default_scan_16x8_neighbors },
+ { mrow_scan_16x8, vp10_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+ { mrow_scan_16x8, vp10_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+ { mcol_scan_16x8, vp10_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
+ { mrow_scan_16x8, vp10_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+ { mcol_scan_16x8, vp10_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
+ { mrow_scan_16x8, vp10_mrow_iscan_16x8, mrow_scan_16x8_neighbors },
+ { mcol_scan_16x8, vp10_mcol_iscan_16x8, mcol_scan_16x8_neighbors },
+ },
+ {
+ // TX_16X32
+ { default_scan_16x32, vp10_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, vp10_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, vp10_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, vp10_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, vp10_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, vp10_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, vp10_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, vp10_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { default_scan_16x32, vp10_default_iscan_16x32,
+ default_scan_16x32_neighbors },
+ { mrow_scan_16x32, vp10_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+ { mrow_scan_16x32, vp10_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+ { mcol_scan_16x32, vp10_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
+ { mrow_scan_16x32, vp10_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+ { mcol_scan_16x32, vp10_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
+ { mrow_scan_16x32, vp10_mrow_iscan_16x32, mrow_scan_16x32_neighbors },
+ { mcol_scan_16x32, vp10_mcol_iscan_16x32, mcol_scan_16x32_neighbors },
+ },
+ {
+ // TX_32X16
+ { default_scan_32x16, vp10_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, vp10_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, vp10_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, vp10_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, vp10_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, vp10_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, vp10_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, vp10_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { default_scan_32x16, vp10_default_iscan_32x16,
+ default_scan_32x16_neighbors },
+ { mrow_scan_32x16, vp10_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+ { mrow_scan_32x16, vp10_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+ { mcol_scan_32x16, vp10_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
+ { mrow_scan_32x16, vp10_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+ { mcol_scan_32x16, vp10_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
+ { mrow_scan_32x16, vp10_mrow_iscan_32x16, mrow_scan_32x16_neighbors },
+ { mcol_scan_32x16, vp10_mcol_iscan_32x16, mcol_scan_32x16_neighbors },
+ }
+};
+
+#else // CONFIG_EXT_TX
+
+const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+ { // TX_4X4
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors },
+ { row_scan_4x4, vp10_row_iscan_4x4, row_scan_4x4_neighbors },
+ { col_scan_4x4, vp10_col_iscan_4x4, col_scan_4x4_neighbors },
+ { default_scan_4x4, vp10_default_iscan_4x4, default_scan_4x4_neighbors } },
+ { // TX_8X8
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors },
+ { row_scan_8x8, vp10_row_iscan_8x8, row_scan_8x8_neighbors },
+ { col_scan_8x8, vp10_col_iscan_8x8, col_scan_8x8_neighbors },
+ { default_scan_8x8, vp10_default_iscan_8x8, default_scan_8x8_neighbors } },
+ { // TX_16X16
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors },
+ { row_scan_16x16, vp10_row_iscan_16x16, row_scan_16x16_neighbors },
+ { col_scan_16x16, vp10_col_iscan_16x16, col_scan_16x16_neighbors },
+ { default_scan_16x16, vp10_default_iscan_16x16,
+ default_scan_16x16_neighbors } },
+ {
+ // TX_32X32
+ { default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ { default_scan_32x32, vp10_default_iscan_32x32,
+ default_scan_32x32_neighbors },
+ }
+};
+#endif // CONFIG_EXT_TX
diff --git a/av1/common/scan.h b/av1/common/scan.h
new file mode 100644
index 0000000..d2d9f35
--- /dev/null
+++ b/av1/common/scan.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_SCAN_H_
+#define VP10_COMMON_SCAN_H_
+
+#include "aom/vpx_integer.h"
+#include "aom_ports/mem.h"
+
+#include "av1/common/enums.h"
+#include "av1/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_NEIGHBORS 2
+
+typedef struct {
+ const int16_t *scan;
+ const int16_t *iscan;
+ const int16_t *neighbors;
+} scan_order;
+
+extern const scan_order vp10_default_scan_orders[TX_SIZES];
+extern const scan_order vp10_intra_scan_orders[TX_SIZES][TX_TYPES];
+
+static INLINE int get_coef_context(const int16_t *neighbors,
+ const uint8_t *token_cache, int c) {
+ return (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] +
+ token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >>
+ 1;
+}
+
+static INLINE const scan_order *get_intra_scan(TX_SIZE tx_size,
+ TX_TYPE tx_type) {
+ return &vp10_intra_scan_orders[tx_size][tx_type];
+}
+
+#if CONFIG_EXT_TX
+extern const scan_order vp10_inter_scan_orders[TX_SIZES_ALL][TX_TYPES];
+
+static INLINE const scan_order *get_inter_scan(TX_SIZE tx_size,
+ TX_TYPE tx_type) {
+ return &vp10_inter_scan_orders[tx_size][tx_type];
+}
+#endif // CONFIG_EXT_TX
+
+static INLINE const scan_order *get_scan(TX_SIZE tx_size, TX_TYPE tx_type,
+ int is_inter) {
+#if CONFIG_EXT_TX
+ return is_inter ? &vp10_inter_scan_orders[tx_size][tx_type]
+ : &vp10_intra_scan_orders[tx_size][tx_type];
+#else
+ (void)is_inter;
+ return &vp10_intra_scan_orders[tx_size][tx_type];
+#endif // CONFIG_EXT_TX
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_SCAN_H_
diff --git a/av1/common/seg_common.c b/av1/common/seg_common.c
new file mode 100644
index 0000000..f131c7b
--- /dev/null
+++ b/av1/common/seg_common.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/blockd.h"
+#include "av1/common/loopfilter.h"
+#include "av1/common/seg_common.h"
+#include "av1/common/quant_common.h"
+
+static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 };
+
+static const int seg_feature_data_max[SEG_LVL_MAX] = { MAXQ, MAX_LOOP_FILTER, 3,
+ 0 };
+
+// These functions provide access to new segment level features.
+// Eventually these function may be "optimized out" but for the moment,
+// the coding mechanism is still subject to change so these provide a
+// convenient single point of change.
+
+void vp10_clearall_segfeatures(struct segmentation *seg) {
+ vp10_zero(seg->feature_data);
+ vp10_zero(seg->feature_mask);
+}
+
+void vp10_enable_segfeature(struct segmentation *seg, int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ seg->feature_mask[segment_id] |= 1 << feature_id;
+}
+
+int vp10_seg_feature_data_max(SEG_LVL_FEATURES feature_id) {
+ return seg_feature_data_max[feature_id];
+}
+
+int vp10_is_segfeature_signed(SEG_LVL_FEATURES feature_id) {
+ return seg_feature_data_signed[feature_id];
+}
+
+void vp10_set_segdata(struct segmentation *seg, int segment_id,
+ SEG_LVL_FEATURES feature_id, int seg_data) {
+ assert(seg_data <= seg_feature_data_max[feature_id]);
+ if (seg_data < 0) {
+ assert(seg_feature_data_signed[feature_id]);
+ assert(-seg_data <= seg_feature_data_max[feature_id]);
+ }
+
+ seg->feature_data[segment_id][feature_id] = seg_data;
+}
+
+const vpx_tree_index vp10_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = {
+ 2, 4, 6, 8, 10, 12, 0, -1, -2, -3, -4, -5, -6, -7
+};
+
+// TBD? Functions to read and write segment data with range / validity checking
diff --git a/av1/common/seg_common.h b/av1/common/seg_common.h
new file mode 100644
index 0000000..7a8fa8f
--- /dev/null
+++ b/av1/common/seg_common.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_SEG_COMMON_H_
+#define VP10_COMMON_SEG_COMMON_H_
+
+#include "aom_dsp/prob.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SEGMENT_DELTADATA 0
+#define SEGMENT_ABSDATA 1
+
+#define MAX_SEGMENTS 8
+#define SEG_TREE_PROBS (MAX_SEGMENTS - 1)
+
+#define PREDICTION_PROBS 3
+
+// Segment level features.
+typedef enum {
+ SEG_LVL_ALT_Q = 0, // Use alternate Quantizer ....
+ SEG_LVL_ALT_LF = 1, // Use alternate loop filter value...
+ SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame
+ SEG_LVL_SKIP = 3, // Optional Segment (0,0) + skip mode
+ SEG_LVL_MAX = 4 // Number of features supported
+} SEG_LVL_FEATURES;
+
+struct segmentation {
+ uint8_t enabled;
+ uint8_t update_map;
+ uint8_t update_data;
+ uint8_t abs_delta;
+ uint8_t temporal_update;
+
+ int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
+ unsigned int feature_mask[MAX_SEGMENTS];
+};
+
+struct segmentation_probs {
+ vpx_prob tree_probs[SEG_TREE_PROBS];
+ vpx_prob pred_probs[PREDICTION_PROBS];
+};
+
+static INLINE int segfeature_active(const struct segmentation *seg,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ return seg->enabled && (seg->feature_mask[segment_id] & (1 << feature_id));
+}
+
+void vp10_clearall_segfeatures(struct segmentation *seg);
+
+void vp10_enable_segfeature(struct segmentation *seg, int segment_id,
+ SEG_LVL_FEATURES feature_id);
+
+int vp10_seg_feature_data_max(SEG_LVL_FEATURES feature_id);
+
+int vp10_is_segfeature_signed(SEG_LVL_FEATURES feature_id);
+
+void vp10_set_segdata(struct segmentation *seg, int segment_id,
+ SEG_LVL_FEATURES feature_id, int seg_data);
+
+static INLINE int get_segdata(const struct segmentation *seg, int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ return seg->feature_data[segment_id][feature_id];
+}
+
+extern const vpx_tree_index vp10_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_SEG_COMMON_H_
diff --git a/av1/common/thread_common.c b/av1/common/thread_common.c
new file mode 100644
index 0000000..ba91a46
--- /dev/null
+++ b/av1/common/thread_common.c
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/thread_common.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/loopfilter.h"
+
+#if CONFIG_MULTITHREAD
+static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
+ const int kMaxTryLocks = 4000;
+ int locked = 0;
+ int i;
+
+ for (i = 0; i < kMaxTryLocks; ++i) {
+ if (!pthread_mutex_trylock(mutex)) {
+ locked = 1;
+ break;
+ }
+ }
+
+ if (!locked) pthread_mutex_lock(mutex);
+}
+#endif // CONFIG_MULTITHREAD
+
+static INLINE void sync_read(VP10LfSync *const lf_sync, int r, int c) {
+#if CONFIG_MULTITHREAD
+ const int nsync = lf_sync->sync_range;
+
+ if (r && !(c & (nsync - 1))) {
+ pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1];
+ mutex_lock(mutex);
+
+ while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
+ pthread_cond_wait(&lf_sync->cond_[r - 1], mutex);
+ }
+ pthread_mutex_unlock(mutex);
+ }
+#else
+ (void)lf_sync;
+ (void)r;
+ (void)c;
+#endif // CONFIG_MULTITHREAD
+}
+
+static INLINE void sync_write(VP10LfSync *const lf_sync, int r, int c,
+ const int sb_cols) {
+#if CONFIG_MULTITHREAD
+ const int nsync = lf_sync->sync_range;
+ int cur;
+ // Only signal when there are enough filtered SB for next row to run.
+ int sig = 1;
+
+ if (c < sb_cols - 1) {
+ cur = c;
+ if (c % nsync) sig = 0;
+ } else {
+ cur = sb_cols + nsync;
+ }
+
+ if (sig) {
+ mutex_lock(&lf_sync->mutex_[r]);
+
+ lf_sync->cur_sb_col[r] = cur;
+
+ pthread_cond_signal(&lf_sync->cond_[r]);
+ pthread_mutex_unlock(&lf_sync->mutex_[r]);
+ }
+#else
+ (void)lf_sync;
+ (void)r;
+ (void)c;
+ (void)sb_cols;
+#endif // CONFIG_MULTITHREAD
+}
+
+// Implement row loopfiltering for each thread.
+static INLINE void thread_loop_filter_rows(
+ const YV12_BUFFER_CONFIG *const frame_buffer, VP10_COMMON *const cm,
+ struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop,
+ int y_only, VP10LfSync *const lf_sync) {
+ const int num_planes = y_only ? 1 : MAX_MB_PLANE;
+ const int sb_cols = mi_cols_aligned_to_sb(cm) >> cm->mib_size_log2;
+ int mi_row, mi_col;
+#if !CONFIG_EXT_PARTITION_TYPES
+ enum lf_path path;
+ LOOP_FILTER_MASK lfm;
+ if (y_only)
+ path = LF_PATH_444;
+ else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
+ path = LF_PATH_420;
+ else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
+ path = LF_PATH_444;
+ else
+ path = LF_PATH_SLOW;
+#endif // !CONFIG_EXT_PARTITION_TYPES
+
+#if CONFIG_EXT_PARTITION
+ printf(
+ "STOPPING: This code has not been modified to work with the "
+ "extended coding unit size experiment");
+ exit(EXIT_FAILURE);
+#endif // CONFIG_EXT_PARTITION
+
+ for (mi_row = start; mi_row < stop;
+ mi_row += lf_sync->num_workers * cm->mib_size) {
+ MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
+
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) {
+ const int r = mi_row >> cm->mib_size_log2;
+ const int c = mi_col >> cm->mib_size_log2;
+ int plane;
+
+ sync_read(lf_sync, r, c);
+
+ vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
+
+#if CONFIG_EXT_PARTITION_TYPES
+ for (plane = 0; plane < num_planes; ++plane)
+ vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, mi_row,
+ mi_col);
+#else
+ // TODO(JBB): Make setup_mask work for non 420.
+ vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
+
+ vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
+ for (plane = 1; plane < num_planes; ++plane) {
+ switch (path) {
+ case LF_PATH_420:
+ vp10_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
+ break;
+ case LF_PATH_444:
+ vp10_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
+ break;
+ case LF_PATH_SLOW:
+ vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
+ mi_row, mi_col);
+ break;
+ }
+ }
+#endif // CONFIG_EXT_PARTITION_TYPES
+ sync_write(lf_sync, r, c, sb_cols);
+ }
+ }
+}
+
+// Row-based multi-threaded loopfilter hook
+static int loop_filter_row_worker(VP10LfSync *const lf_sync,
+ LFWorkerData *const lf_data) {
+ thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+ lf_data->start, lf_data->stop, lf_data->y_only,
+ lf_sync);
+ return 1;
+}
+
+static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
+ int start, int stop, int y_only,
+ VPxWorker *workers, int nworkers,
+ VP10LfSync *lf_sync) {
+ const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+ // Number of superblock rows and cols
+ const int sb_rows = mi_rows_aligned_to_sb(cm) >> cm->mib_size_log2;
+ // Decoder may allocate more threads than number of tiles based on user's
+ // input.
+ const int tile_cols = cm->tile_cols;
+ const int num_workers = VPXMIN(nworkers, tile_cols);
+ int i;
+
+#if CONFIG_EXT_PARTITION
+ printf(
+ "STOPPING: This code has not been modified to work with the "
+ "extended coding unit size experiment");
+ exit(EXIT_FAILURE);
+#endif // CONFIG_EXT_PARTITION
+
+ if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
+ num_workers > lf_sync->num_workers) {
+ vp10_loop_filter_dealloc(lf_sync);
+ vp10_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
+ }
+
+ // Initialize cur_sb_col to -1 for all SB rows.
+ memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
+
+ // Set up loopfilter thread data.
+ // The decoder is capping num_workers because it has been observed that using
+ // more threads on the loopfilter than there are cores will hurt performance
+ // on Android. This is because the system will only schedule the tile decode
+ // workers on cores equal to the number of tile columns. Then if the decoder
+ // tries to use more threads for the loopfilter, it will hurt performance
+ // because of contention. If the multithreading code changes in the future
+ // then the number of workers used by the loopfilter should be revisited.
+ for (i = 0; i < num_workers; ++i) {
+ VPxWorker *const worker = &workers[i];
+ LFWorkerData *const lf_data = &lf_sync->lfdata[i];
+
+ worker->hook = (VPxWorkerHook)loop_filter_row_worker;
+ worker->data1 = lf_sync;
+ worker->data2 = lf_data;
+
+ // Loopfilter data
+ vp10_loop_filter_data_reset(lf_data, frame, cm, planes);
+ lf_data->start = start + i * cm->mib_size;
+ lf_data->stop = stop;
+ lf_data->y_only = y_only;
+
+ // Start loopfiltering
+ if (i == num_workers - 1) {
+ winterface->execute(worker);
+ } else {
+ winterface->launch(worker);
+ }
+ }
+
+ // Wait till all rows are finished
+ for (i = 0; i < num_workers; ++i) {
+ winterface->sync(&workers[i]);
+ }
+}
+
+void vp10_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
+ int frame_filter_level, int y_only,
+ int partial_frame, VPxWorker *workers,
+ int num_workers, VP10LfSync *lf_sync) {
+ int start_mi_row, end_mi_row, mi_rows_to_filter;
+
+ if (!frame_filter_level) return;
+
+ start_mi_row = 0;
+ mi_rows_to_filter = cm->mi_rows;
+ if (partial_frame && cm->mi_rows > 8) {
+ start_mi_row = cm->mi_rows >> 1;
+ start_mi_row &= 0xfffffff8;
+ mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
+ }
+ end_mi_row = start_mi_row + mi_rows_to_filter;
+ vp10_loop_filter_frame_init(cm, frame_filter_level);
+
+ loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, y_only,
+ workers, num_workers, lf_sync);
+}
+
+// Set up nsync by width.
+static INLINE int get_sync_range(int width) {
+ // nsync numbers are picked by testing. For example, for 4k
+ // video, using 4 gives best performance.
+ if (width < 640)
+ return 1;
+ else if (width <= 1280)
+ return 2;
+ else if (width <= 4096)
+ return 4;
+ else
+ return 8;
+}
+
+// Allocate memory for lf row synchronization
+void vp10_loop_filter_alloc(VP10LfSync *lf_sync, VP10_COMMON *cm, int rows,
+ int width, int num_workers) {
+ lf_sync->rows = rows;
+#if CONFIG_MULTITHREAD
+ {
+ int i;
+
+ CHECK_MEM_ERROR(cm, lf_sync->mutex_,
+ vpx_malloc(sizeof(*lf_sync->mutex_) * rows));
+ if (lf_sync->mutex_) {
+ for (i = 0; i < rows; ++i) {
+ pthread_mutex_init(&lf_sync->mutex_[i], NULL);
+ }
+ }
+
+ CHECK_MEM_ERROR(cm, lf_sync->cond_,
+ vpx_malloc(sizeof(*lf_sync->cond_) * rows));
+ if (lf_sync->cond_) {
+ for (i = 0; i < rows; ++i) {
+ pthread_cond_init(&lf_sync->cond_[i], NULL);
+ }
+ }
+ }
+#endif // CONFIG_MULTITHREAD
+
+ CHECK_MEM_ERROR(cm, lf_sync->lfdata,
+ vpx_malloc(num_workers * sizeof(*lf_sync->lfdata)));
+ lf_sync->num_workers = num_workers;
+
+ CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
+ vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
+
+ // Set up nsync.
+ lf_sync->sync_range = get_sync_range(width);
+}
+
+// Deallocate lf synchronization related mutex and data
+void vp10_loop_filter_dealloc(VP10LfSync *lf_sync) {
+ if (lf_sync != NULL) {
+#if CONFIG_MULTITHREAD
+ int i;
+
+ if (lf_sync->mutex_ != NULL) {
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_mutex_destroy(&lf_sync->mutex_[i]);
+ }
+ vpx_free(lf_sync->mutex_);
+ }
+ if (lf_sync->cond_ != NULL) {
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_cond_destroy(&lf_sync->cond_[i]);
+ }
+ vpx_free(lf_sync->cond_);
+ }
+#endif // CONFIG_MULTITHREAD
+ vpx_free(lf_sync->lfdata);
+ vpx_free(lf_sync->cur_sb_col);
+ // clear the structure as the source of this call may be a resize in which
+ // case this call will be followed by an _alloc() which may fail.
+ vp10_zero(*lf_sync);
+ }
+}
+
+// Accumulate frame counts. FRAME_COUNTS consist solely of 'unsigned int'
+// members, so we treat it as an array, and sum over the whole length.
+void vp10_accumulate_frame_counts(VP10_COMMON *cm, FRAME_COUNTS *counts) {
+ unsigned int *const acc = (unsigned int *)&cm->counts;
+ const unsigned int *const cnt = (unsigned int *)counts;
+
+ const unsigned int n_counts = sizeof(FRAME_COUNTS) / sizeof(unsigned int);
+ unsigned int i;
+
+ for (i = 0; i < n_counts; i++) acc[i] += cnt[i];
+}
diff --git a/av1/common/thread_common.h b/av1/common/thread_common.h
new file mode 100644
index 0000000..3df9557
--- /dev/null
+++ b/av1/common/thread_common.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_LOOPFILTER_THREAD_H_
+#define VP10_COMMON_LOOPFILTER_THREAD_H_
+#include "./vpx_config.h"
+#include "av1/common/loopfilter.h"
+#include "aom_util/vpx_thread.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP10Common;
+struct FRAME_COUNTS;
+
+// Loopfilter row synchronization
+typedef struct VP10LfSyncData {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t *mutex_;
+ pthread_cond_t *cond_;
+#endif
+ // Allocate memory to store the loop-filtered superblock index in each row.
+ int *cur_sb_col;
+ // The optimal sync_range for different resolution and platform should be
+ // determined by testing. Currently, it is chosen to be a power-of-2 number.
+ int sync_range;
+ int rows;
+
+ // Row-based parallel loopfilter data
+ LFWorkerData *lfdata;
+ int num_workers;
+} VP10LfSync;
+
+// Allocate memory for loopfilter row synchronization.
+void vp10_loop_filter_alloc(VP10LfSync *lf_sync, struct VP10Common *cm,
+ int rows, int width, int num_workers);
+
+// Deallocate loopfilter synchronization related mutex and data.
+void vp10_loop_filter_dealloc(VP10LfSync *lf_sync);
+
+// Multi-threaded loopfilter that uses the tile threads.
+void vp10_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct VP10Common *cm,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
+ int frame_filter_level, int y_only,
+ int partial_frame, VPxWorker *workers,
+ int num_workers, VP10LfSync *lf_sync);
+
+void vp10_accumulate_frame_counts(struct VP10Common *cm,
+ struct FRAME_COUNTS *counts);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_LOOPFILTER_THREAD_H_
diff --git a/av1/common/tile_common.c b/av1/common/tile_common.c
new file mode 100644
index 0000000..e79734e
--- /dev/null
+++ b/av1/common/tile_common.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/common/tile_common.h"
+#include "av1/common/onyxc_int.h"
+#include "aom_dsp/vpx_dsp_common.h"
+
+void vp10_tile_set_row(TileInfo *tile, const VP10_COMMON *cm, int row) {
+ tile->mi_row_start = row * cm->tile_height;
+ tile->mi_row_end = VPXMIN(tile->mi_row_start + cm->tile_height, cm->mi_rows);
+}
+
+void vp10_tile_set_col(TileInfo *tile, const VP10_COMMON *cm, int col) {
+ tile->mi_col_start = col * cm->tile_width;
+ tile->mi_col_end = VPXMIN(tile->mi_col_start + cm->tile_width, cm->mi_cols);
+}
+
+void vp10_tile_init(TileInfo *tile, const VP10_COMMON *cm, int row, int col) {
+ vp10_tile_set_row(tile, cm, row);
+ vp10_tile_set_col(tile, cm, col);
+}
+
+#if !CONFIG_EXT_TILE
+
+#if CONFIG_EXT_PARTITION
+#define MIN_TILE_WIDTH_MAX_SB 2
+#define MAX_TILE_WIDTH_MAX_SB 32
+#else
+#define MIN_TILE_WIDTH_MAX_SB 4
+#define MAX_TILE_WIDTH_MAX_SB 64
+#endif // CONFIG_EXT_PARTITION
+
+static int get_min_log2_tile_cols(const int max_sb_cols) {
+ int min_log2 = 0;
+ while ((MAX_TILE_WIDTH_MAX_SB << min_log2) < max_sb_cols) ++min_log2;
+ return min_log2;
+}
+
+static int get_max_log2_tile_cols(const int max_sb_cols) {
+ int max_log2 = 1;
+ while ((max_sb_cols >> max_log2) >= MIN_TILE_WIDTH_MAX_SB) ++max_log2;
+ return max_log2 - 1;
+}
+
+void vp10_get_tile_n_bits(const int mi_cols, int *min_log2_tile_cols,
+ int *max_log2_tile_cols) {
+ const int max_sb_cols =
+ ALIGN_POWER_OF_TWO(mi_cols, MAX_MIB_SIZE_LOG2) >> MAX_MIB_SIZE_LOG2;
+ *min_log2_tile_cols = get_min_log2_tile_cols(max_sb_cols);
+ *max_log2_tile_cols = get_max_log2_tile_cols(max_sb_cols);
+ assert(*min_log2_tile_cols <= *max_log2_tile_cols);
+}
+#endif // !CONFIG_EXT_TILE
diff --git a/av1/common/tile_common.h b/av1/common/tile_common.h
new file mode 100644
index 0000000..a502173
--- /dev/null
+++ b/av1/common/tile_common.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_TILE_COMMON_H_
+#define VP10_COMMON_TILE_COMMON_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP10Common;
+
+typedef struct TileInfo {
+ int mi_row_start, mi_row_end;
+ int mi_col_start, mi_col_end;
+} TileInfo;
+
+// initializes 'tile->mi_(row|col)_(start|end)' for (row, col) based on
+// 'cm->log2_tile_(rows|cols)' & 'cm->mi_(rows|cols)'
+void vp10_tile_init(TileInfo *tile, const struct VP10Common *cm, int row,
+ int col);
+
+void vp10_tile_set_row(TileInfo *tile, const struct VP10Common *cm, int row);
+void vp10_tile_set_col(TileInfo *tile, const struct VP10Common *cm, int col);
+
+void vp10_get_tile_n_bits(const int mi_cols, int *min_log2_tile_cols,
+ int *max_log2_tile_cols);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_TILE_COMMON_H_
diff --git a/av1/common/vp10_convolve.c b/av1/common/vp10_convolve.c
new file mode 100644
index 0000000..b62bae5
--- /dev/null
+++ b/av1/common/vp10_convolve.c
@@ -0,0 +1,353 @@
+#include <assert.h>
+#include <string.h>
+
+#include "./vp10_rtcd.h"
+#include "av1/common/vp10_convolve.h"
+#include "av1/common/filter.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_ports/mem.h"
+
+#define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
+#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
+#define MAX_STEP (32)
+#define MAX_FILTER_TAP (12)
+
+void vp10_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x_q4, int x_step_q4, int avg) {
+ int x, y;
+ int filter_size = filter_params.taps;
+ src -= filter_size / 2 - 1;
+ for (y = 0; y < h; ++y) {
+ int x_q4 = subpel_x_q4;
+ for (x = 0; x < w; ++x) {
+ const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
+ const int16_t *x_filter = vp10_get_interp_filter_subpel_kernel(
+ filter_params, x_q4 & SUBPEL_MASK);
+ int k, sum = 0;
+ for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
+ if (avg) {
+ dst[x] = ROUND_POWER_OF_TWO(
+ dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
+ } else {
+ dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ x_q4 += x_step_q4;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp10_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_y_q4, int y_step_q4, int avg) {
+ int x, y;
+ int filter_size = filter_params.taps;
+ src -= src_stride * (filter_size / 2 - 1);
+
+ for (x = 0; x < w; ++x) {
+ int y_q4 = subpel_y_q4;
+ for (y = 0; y < h; ++y) {
+ const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
+ const int16_t *y_filter = vp10_get_interp_filter_subpel_kernel(
+ filter_params, y_q4 & SUBPEL_MASK);
+ int k, sum = 0;
+ for (k = 0; k < filter_size; ++k)
+ sum += src_y[k * src_stride] * y_filter[k];
+ if (avg) {
+ dst[y * dst_stride] = ROUND_POWER_OF_TWO(
+ dst[y * dst_stride] +
+ clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
+ 1);
+ } else {
+ dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ y_q4 += y_step_q4;
+ }
+ ++src;
+ ++dst;
+ }
+}
+
+static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h, int avg) {
+ if (avg == 0) {
+ int r;
+ for (r = 0; r < h; ++r) {
+ memcpy(dst, src, w);
+ src += src_stride;
+ dst += dst_stride;
+ }
+ } else {
+ int r, c;
+ for (r = 0; r < h; ++r) {
+ for (c = 0; c < w; ++c) {
+ dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+ }
+}
+
+void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER *interp_filter,
+#else
+ const INTERP_FILTER interp_filter,
+#endif
+ const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
+ int y_step_q4, int ref_idx) {
+ int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
+ int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
+
+ assert(w <= MAX_BLOCK_WIDTH);
+ assert(h <= MAX_BLOCK_HEIGHT);
+ assert(y_step_q4 <= MAX_STEP);
+ assert(x_step_q4 <= MAX_STEP);
+
+ if (ignore_horiz && ignore_vert) {
+ convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx);
+ } else if (ignore_vert) {
+#if CONFIG_DUAL_FILTER
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
+#else
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+#endif
+ assert(filter_params.taps <= MAX_FILTER_TAP);
+ vp10_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
+ subpel_x_q4, x_step_q4, ref_idx);
+ } else if (ignore_horiz) {
+#if CONFIG_DUAL_FILTER
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter[2 * ref_idx]);
+#else
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+#endif
+ assert(filter_params.taps <= MAX_FILTER_TAP);
+ vp10_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
+ subpel_y_q4, y_step_q4, ref_idx);
+ } else {
+ // temp's size is set to (maximum possible intermediate_height) *
+ // MAX_BLOCK_WIDTH
+ uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
+ MAX_FILTER_TAP) *
+ MAX_BLOCK_WIDTH];
+ int temp_stride = MAX_BLOCK_WIDTH;
+#if CONFIG_DUAL_FILTER
+ InterpFilterParams filter_params_x =
+ vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
+ InterpFilterParams filter_params_y =
+ vp10_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
+ InterpFilterParams filter_params = filter_params_x;
+
+ // The filter size implies the required number of reference pixels for
+ // the second stage filtering. It is possible that the two directions
+ // require different filter sizes.
+ int filter_size = filter_params_y.taps;
+#else
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ int filter_size = filter_params.taps;
+#endif
+ int intermediate_height =
+ (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+
+ assert(filter_params.taps <= MAX_FILTER_TAP);
+
+ vp10_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
+ temp, temp_stride, w, intermediate_height,
+ filter_params, subpel_x_q4, x_step_q4, 0);
+
+#if CONFIG_DUAL_FILTER
+ filter_params = filter_params_y;
+#else
+ filter_params = vp10_get_interp_filter_params(interp_filter);
+#endif
+ filter_size = filter_params.taps;
+ assert(filter_params.taps <= MAX_FILTER_TAP);
+
+ vp10_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
+ dst, dst_stride, w, h, filter_params, subpel_y_q4,
+ y_step_q4, ref_idx);
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x_q4, int x_step_q4, int avg,
+ int bd) {
+ int x, y;
+ int filter_size = filter_params.taps;
+ src -= filter_size / 2 - 1;
+ for (y = 0; y < h; ++y) {
+ int x_q4 = subpel_x_q4;
+ for (x = 0; x < w; ++x) {
+ const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
+ const int16_t *x_filter = vp10_get_interp_filter_subpel_kernel(
+ filter_params, x_q4 & SUBPEL_MASK);
+ int k, sum = 0;
+ for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
+ if (avg)
+ dst[x] = ROUND_POWER_OF_TWO(
+ dst[x] +
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
+ 1);
+ else
+ dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
+ x_q4 += x_step_q4;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+void vp10_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_y_q4, int y_step_q4, int avg,
+ int bd) {
+ int x, y;
+ int filter_size = filter_params.taps;
+ src -= src_stride * (filter_size / 2 - 1);
+
+ for (x = 0; x < w; ++x) {
+ int y_q4 = subpel_y_q4;
+ for (y = 0; y < h; ++y) {
+ const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
+ const int16_t *y_filter = vp10_get_interp_filter_subpel_kernel(
+ filter_params, y_q4 & SUBPEL_MASK);
+ int k, sum = 0;
+ for (k = 0; k < filter_size; ++k)
+ sum += src_y[k * src_stride] * y_filter[k];
+ if (avg) {
+ dst[y * dst_stride] = ROUND_POWER_OF_TWO(
+ dst[y * dst_stride] +
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
+ 1);
+ } else {
+ dst[y * dst_stride] =
+ clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
+ }
+ y_q4 += y_step_q4;
+ }
+ ++src;
+ ++dst;
+ }
+}
+
+static void highbd_convolve_copy(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w, int h,
+ int avg, int bd) {
+ if (avg == 0) {
+ int r;
+ for (r = 0; r < h; ++r) {
+ memcpy(dst, src, w * sizeof(*src));
+ src += src_stride;
+ dst += dst_stride;
+ }
+ } else {
+ int r, c;
+ for (r = 0; r < h; ++r) {
+ for (c = 0; c < w; ++c) {
+ dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+ }
+}
+
+void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
+ int dst_stride, int w, int h,
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER *interp_filter,
+#else
+ const INTERP_FILTER interp_filter,
+#endif
+ const int subpel_x_q4, int x_step_q4,
+ const int subpel_y_q4, int y_step_q4, int ref_idx,
+ int bd) {
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
+ int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
+
+ assert(w <= MAX_BLOCK_WIDTH);
+ assert(h <= MAX_BLOCK_HEIGHT);
+ assert(y_step_q4 <= MAX_STEP);
+ assert(x_step_q4 <= MAX_STEP);
+
+ if (ignore_horiz && ignore_vert) {
+ highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
+ } else if (ignore_vert) {
+#if CONFIG_DUAL_FILTER
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
+#else
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+#endif
+ vp10_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, ref_idx,
+ bd);
+ } else if (ignore_horiz) {
+#if CONFIG_DUAL_FILTER
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
+#else
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+#endif
+ vp10_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
+ filter_params, subpel_y_q4, y_step_q4, ref_idx,
+ bd);
+ } else {
+ // temp's size is set to (maximum possible intermediate_height) *
+ // MAX_BLOCK_WIDTH
+ uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
+ MAX_FILTER_TAP) *
+ MAX_BLOCK_WIDTH];
+ int temp_stride = MAX_BLOCK_WIDTH;
+
+#if CONFIG_DUAL_FILTER
+ InterpFilterParams filter_params_x =
+ vp10_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
+ InterpFilterParams filter_params_y =
+ vp10_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
+ InterpFilterParams filter_params = filter_params_x;
+ int filter_size = filter_params_y.taps;
+#else
+ InterpFilterParams filter_params =
+ vp10_get_interp_filter_params(interp_filter);
+ int filter_size = filter_params.taps;
+#endif
+
+ int intermediate_height =
+ (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
+
+ vp10_highbd_convolve_horiz(
+ src - src_stride * (filter_size / 2 - 1), src_stride, temp, temp_stride,
+ w, intermediate_height, filter_params, subpel_x_q4, x_step_q4, 0, bd);
+
+#if CONFIG_DUAL_FILTER
+ filter_params = filter_params_y;
+#endif
+ filter_size = filter_params.taps;
+ assert(filter_params.taps <= MAX_FILTER_TAP);
+
+ vp10_highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
+ temp_stride, dst, dst_stride, w, h, filter_params,
+ subpel_y_q4, y_step_q4, ref_idx, bd);
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/vp10_convolve.h b/av1/common/vp10_convolve.h
new file mode 100644
index 0000000..9343402
--- /dev/null
+++ b/av1/common/vp10_convolve.h
@@ -0,0 +1,35 @@
+#ifndef VP10_COMMON_VP10_CONVOLVE_H_
+#define VP10_COMMON_VP10_CONVOLVE_H_
+#include "av1/common/filter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER *interp_filter,
+#else
+ const INTERP_FILTER interp_filter,
+#endif
+ const int subpel_x, int xstep, const int subpel_y, int ystep,
+ int avg);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER *interp_filter,
+#else
+ const INTERP_FILTER interp_filter,
+#endif
+ const int subpel_x, int xstep, const int subpel_y,
+ int ystep, int avg, int bd);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_COMMON_VP10_CONVOLVE_H_
diff --git a/av1/common/vp10_fwd_txfm.c b/av1/common/vp10_fwd_txfm.c
new file mode 100644
index 0000000..eb1c018
--- /dev/null
+++ b/av1/common/vp10_fwd_txfm.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "av1/common/vp10_fwd_txfm.h"
+
+void vp10_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
+ // The 2D transform is done with two passes which are actually pretty
+ // similar. In the first one, we transform the columns and transpose
+ // the results. In the second one, we transform the rows. To achieve that,
+ // as the first pass results are transposed, we transpose the columns (that
+ // is the transposed rows) and transpose the results (so that it goes back
+ // in normal/row positions).
+ int pass;
+ // We need an intermediate buffer between passes.
+ tran_low_t intermediate[4 * 4];
+ const int16_t *in_pass0 = input;
+ const tran_low_t *in = NULL;
+ tran_low_t *out = intermediate;
+ // Do the two transform/transpose passes
+ for (pass = 0; pass < 2; ++pass) {
+ tran_high_t input[4]; // canbe16
+ tran_high_t step[4]; // canbe16
+ tran_high_t temp1, temp2; // needs32
+ int i;
+ for (i = 0; i < 4; ++i) {
+ // Load inputs.
+ if (0 == pass) {
+ input[0] = in_pass0[0 * stride] * 16;
+ input[1] = in_pass0[1 * stride] * 16;
+ input[2] = in_pass0[2 * stride] * 16;
+ input[3] = in_pass0[3 * stride] * 16;
+ if (i == 0 && input[0]) {
+ input[0] += 1;
+ }
+ } else {
+ input[0] = in[0 * 4];
+ input[1] = in[1 * 4];
+ input[2] = in[2 * 4];
+ input[3] = in[3 * 4];
+ }
+ // Transform.
+ step[0] = input[0] + input[3];
+ step[1] = input[1] + input[2];
+ step[2] = input[1] - input[2];
+ step[3] = input[0] - input[3];
+ temp1 = (step[0] + step[1]) * cospi_16_64;
+ temp2 = (step[0] - step[1]) * cospi_16_64;
+ out[0] = (tran_low_t)fdct_round_shift(temp1);
+ out[2] = (tran_low_t)fdct_round_shift(temp2);
+ temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
+ temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
+ out[1] = (tran_low_t)fdct_round_shift(temp1);
+ out[3] = (tran_low_t)fdct_round_shift(temp2);
+ // Do next column (which is a transposed row in second/horizontal pass)
+ in_pass0++;
+ in++;
+ out += 4;
+ }
+ // Setup in/out for next pass.
+ in = intermediate;
+ out = output;
+ }
+
+ {
+ int i, j;
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j) output[j + i * 4] = (output[j + i * 4] + 1) >> 2;
+ }
+ }
+}
+
+void vp10_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) {
+ int r, c;
+ tran_low_t sum = 0;
+ for (r = 0; r < 4; ++r)
+ for (c = 0; c < 4; ++c) sum += input[r * stride + c];
+
+ output[0] = sum << 1;
+ output[1] = 0;
+}
+
+void vp10_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
+ int stride) {
+ int i, j;
+ tran_low_t intermediate[64];
+ int pass;
+ tran_low_t *output = intermediate;
+ const tran_low_t *in = NULL;
+
+ // Transform columns
+ for (pass = 0; pass < 2; ++pass) {
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
+ tran_high_t t0, t1, t2, t3; // needs32
+ tran_high_t x0, x1, x2, x3; // canbe16
+
+ int i;
+ for (i = 0; i < 8; i++) {
+ // stage 1
+ if (pass == 0) {
+ s0 = (input[0 * stride] + input[7 * stride]) * 4;
+ s1 = (input[1 * stride] + input[6 * stride]) * 4;
+ s2 = (input[2 * stride] + input[5 * stride]) * 4;
+ s3 = (input[3 * stride] + input[4 * stride]) * 4;
+ s4 = (input[3 * stride] - input[4 * stride]) * 4;
+ s5 = (input[2 * stride] - input[5 * stride]) * 4;
+ s6 = (input[1 * stride] - input[6 * stride]) * 4;
+ s7 = (input[0 * stride] - input[7 * stride]) * 4;
+ ++input;
+ } else {
+ s0 = in[0 * 8] + in[7 * 8];
+ s1 = in[1 * 8] + in[6 * 8];
+ s2 = in[2 * 8] + in[5 * 8];
+ s3 = in[3 * 8] + in[4 * 8];
+ s4 = in[3 * 8] - in[4 * 8];
+ s5 = in[2 * 8] - in[5 * 8];
+ s6 = in[1 * 8] - in[6 * 8];
+ s7 = in[0 * 8] - in[7 * 8];
+ ++in;
+ }
+
+ // fdct4(step, step);
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
+ t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
+ output[0] = (tran_low_t)fdct_round_shift(t0);
+ output[2] = (tran_low_t)fdct_round_shift(t2);
+ output[4] = (tran_low_t)fdct_round_shift(t1);
+ output[6] = (tran_low_t)fdct_round_shift(t3);
+
+ // Stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = fdct_round_shift(t0);
+ t3 = fdct_round_shift(t1);
+
+ // Stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
+
+ // Stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ output[1] = (tran_low_t)fdct_round_shift(t0);
+ output[3] = (tran_low_t)fdct_round_shift(t2);
+ output[5] = (tran_low_t)fdct_round_shift(t1);
+ output[7] = (tran_low_t)fdct_round_shift(t3);
+ output += 8;
+ }
+ in = intermediate;
+ output = final_output;
+ }
+
+ // Rows
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j) final_output[j + i * 8] /= 2;
+ }
+}
+
+void vp10_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) {
+ int r, c;
+ tran_low_t sum = 0;
+ for (r = 0; r < 8; ++r)
+ for (c = 0; c < 8; ++c) sum += input[r * stride + c];
+
+ output[0] = sum;
+ output[1] = 0;
+}
+
+void vp10_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
+ // The 2D transform is done with two passes which are actually pretty
+ // similar. In the first one, we transform the columns and transpose
+ // the results. In the second one, we transform the rows. To achieve that,
+ // as the first pass results are transposed, we transpose the columns (that
+ // is the transposed rows) and transpose the results (so that it goes back
+ // in normal/row positions).
+ int pass;
+ // We need an intermediate buffer between passes.
+ tran_low_t intermediate[256];
+ const int16_t *in_pass0 = input;
+ const tran_low_t *in = NULL;
+ tran_low_t *out = intermediate;
+ // Do the two transform/transpose passes
+ for (pass = 0; pass < 2; ++pass) {
+ tran_high_t step1[8]; // canbe16
+ tran_high_t step2[8]; // canbe16
+ tran_high_t step3[8]; // canbe16
+ tran_high_t input[8]; // canbe16
+ tran_high_t temp1, temp2; // needs32
+ int i;
+ for (i = 0; i < 16; i++) {
+ if (0 == pass) {
+ // Calculate input for the first 8 results.
+ input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) * 4;
+ input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) * 4;
+ input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) * 4;
+ input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) * 4;
+ input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) * 4;
+ input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) * 4;
+ input[6] = (in_pass0[6 * stride] + in_pass0[9 * stride]) * 4;
+ input[7] = (in_pass0[7 * stride] + in_pass0[8 * stride]) * 4;
+ // Calculate input for the next 8 results.
+ step1[0] = (in_pass0[7 * stride] - in_pass0[8 * stride]) * 4;
+ step1[1] = (in_pass0[6 * stride] - in_pass0[9 * stride]) * 4;
+ step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) * 4;
+ step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) * 4;
+ step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) * 4;
+ step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) * 4;
+ step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) * 4;
+ step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) * 4;
+ } else {
+ // Calculate input for the first 8 results.
+ input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
+ input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
+ input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
+ input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
+ input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
+ input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
+ input[6] = ((in[6 * 16] + 1) >> 2) + ((in[9 * 16] + 1) >> 2);
+ input[7] = ((in[7 * 16] + 1) >> 2) + ((in[8 * 16] + 1) >> 2);
+ // Calculate input for the next 8 results.
+ step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[8 * 16] + 1) >> 2);
+ step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[9 * 16] + 1) >> 2);
+ step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
+ step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
+ step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
+ step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
+ step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
+ step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
+ }
+ // Work on the first eight values; fdct8(input, even_results);
+ {
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
+ tran_high_t t0, t1, t2, t3; // needs32
+ tran_high_t x0, x1, x2, x3; // canbe16
+
+ // stage 1
+ s0 = input[0] + input[7];
+ s1 = input[1] + input[6];
+ s2 = input[2] + input[5];
+ s3 = input[3] + input[4];
+ s4 = input[3] - input[4];
+ s5 = input[2] - input[5];
+ s6 = input[1] - input[6];
+ s7 = input[0] - input[7];
+
+ // fdct4(step, step);
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
+ t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
+ out[0] = (tran_low_t)fdct_round_shift(t0);
+ out[4] = (tran_low_t)fdct_round_shift(t2);
+ out[8] = (tran_low_t)fdct_round_shift(t1);
+ out[12] = (tran_low_t)fdct_round_shift(t3);
+
+ // Stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = fdct_round_shift(t0);
+ t3 = fdct_round_shift(t1);
+
+ // Stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
+
+ // Stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ out[2] = (tran_low_t)fdct_round_shift(t0);
+ out[6] = (tran_low_t)fdct_round_shift(t2);
+ out[10] = (tran_low_t)fdct_round_shift(t1);
+ out[14] = (tran_low_t)fdct_round_shift(t3);
+ }
+ // Work on the next eight values; step1 -> odd_results
+ {
+ // step 2
+ temp1 = (step1[5] - step1[2]) * cospi_16_64;
+ temp2 = (step1[4] - step1[3]) * cospi_16_64;
+ step2[2] = fdct_round_shift(temp1);
+ step2[3] = fdct_round_shift(temp2);
+ temp1 = (step1[4] + step1[3]) * cospi_16_64;
+ temp2 = (step1[5] + step1[2]) * cospi_16_64;
+ step2[4] = fdct_round_shift(temp1);
+ step2[5] = fdct_round_shift(temp2);
+ // step 3
+ step3[0] = step1[0] + step2[3];
+ step3[1] = step1[1] + step2[2];
+ step3[2] = step1[1] - step2[2];
+ step3[3] = step1[0] - step2[3];
+ step3[4] = step1[7] - step2[4];
+ step3[5] = step1[6] - step2[5];
+ step3[6] = step1[6] + step2[5];
+ step3[7] = step1[7] + step2[4];
+ // step 4
+ temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
+ temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64;
+ step2[1] = fdct_round_shift(temp1);
+ step2[2] = fdct_round_shift(temp2);
+ temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64;
+ temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
+ step2[5] = fdct_round_shift(temp1);
+ step2[6] = fdct_round_shift(temp2);
+ // step 5
+ step1[0] = step3[0] + step2[1];
+ step1[1] = step3[0] - step2[1];
+ step1[2] = step3[3] + step2[2];
+ step1[3] = step3[3] - step2[2];
+ step1[4] = step3[4] - step2[5];
+ step1[5] = step3[4] + step2[5];
+ step1[6] = step3[7] - step2[6];
+ step1[7] = step3[7] + step2[6];
+ // step 6
+ temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
+ temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
+ out[1] = (tran_low_t)fdct_round_shift(temp1);
+ out[9] = (tran_low_t)fdct_round_shift(temp2);
+ temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
+ temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
+ out[5] = (tran_low_t)fdct_round_shift(temp1);
+ out[13] = (tran_low_t)fdct_round_shift(temp2);
+ temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
+ temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
+ out[3] = (tran_low_t)fdct_round_shift(temp1);
+ out[11] = (tran_low_t)fdct_round_shift(temp2);
+ temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
+ temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
+ out[7] = (tran_low_t)fdct_round_shift(temp1);
+ out[15] = (tran_low_t)fdct_round_shift(temp2);
+ }
+ // Do next column (which is a transposed row in second/horizontal pass)
+ in++;
+ in_pass0++;
+ out += 16;
+ }
+ // Setup in/out for next pass.
+ in = intermediate;
+ out = output;
+ }
+}
+
+void vp10_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) {
+ int r, c;
+ tran_low_t sum = 0;
+ for (r = 0; r < 16; ++r)
+ for (c = 0; c < 16; ++c) sum += input[r * stride + c];
+
+ output[0] = sum >> 1;
+ output[1] = 0;
+}
+
+static INLINE tran_high_t dct_32_round(tran_high_t input) {
+ tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
+ // TODO(debargha, peter.derivaz): Find new bounds for this assert,
+ // and make the bounds consts.
+ // assert(-131072 <= rv && rv <= 131071);
+ return rv;
+}
+
+static INLINE tran_high_t half_round_shift(tran_high_t input) {
+ tran_high_t rv = (input + 1 + (input < 0)) >> 2;
+ return rv;
+}
+
+void vp10_fdct32(const tran_high_t *input, tran_high_t *output, int round) {
+ tran_high_t step[32];
+ // Stage 1
+ step[0] = input[0] + input[(32 - 1)];
+ step[1] = input[1] + input[(32 - 2)];
+ step[2] = input[2] + input[(32 - 3)];
+ step[3] = input[3] + input[(32 - 4)];
+ step[4] = input[4] + input[(32 - 5)];
+ step[5] = input[5] + input[(32 - 6)];
+ step[6] = input[6] + input[(32 - 7)];
+ step[7] = input[7] + input[(32 - 8)];
+ step[8] = input[8] + input[(32 - 9)];
+ step[9] = input[9] + input[(32 - 10)];
+ step[10] = input[10] + input[(32 - 11)];
+ step[11] = input[11] + input[(32 - 12)];
+ step[12] = input[12] + input[(32 - 13)];
+ step[13] = input[13] + input[(32 - 14)];
+ step[14] = input[14] + input[(32 - 15)];
+ step[15] = input[15] + input[(32 - 16)];
+ step[16] = -input[16] + input[(32 - 17)];
+ step[17] = -input[17] + input[(32 - 18)];
+ step[18] = -input[18] + input[(32 - 19)];
+ step[19] = -input[19] + input[(32 - 20)];
+ step[20] = -input[20] + input[(32 - 21)];
+ step[21] = -input[21] + input[(32 - 22)];
+ step[22] = -input[22] + input[(32 - 23)];
+ step[23] = -input[23] + input[(32 - 24)];
+ step[24] = -input[24] + input[(32 - 25)];
+ step[25] = -input[25] + input[(32 - 26)];
+ step[26] = -input[26] + input[(32 - 27)];
+ step[27] = -input[27] + input[(32 - 28)];
+ step[28] = -input[28] + input[(32 - 29)];
+ step[29] = -input[29] + input[(32 - 30)];
+ step[30] = -input[30] + input[(32 - 31)];
+ step[31] = -input[31] + input[(32 - 32)];
+
+ // Stage 2
+ output[0] = step[0] + step[16 - 1];
+ output[1] = step[1] + step[16 - 2];
+ output[2] = step[2] + step[16 - 3];
+ output[3] = step[3] + step[16 - 4];
+ output[4] = step[4] + step[16 - 5];
+ output[5] = step[5] + step[16 - 6];
+ output[6] = step[6] + step[16 - 7];
+ output[7] = step[7] + step[16 - 8];
+ output[8] = -step[8] + step[16 - 9];
+ output[9] = -step[9] + step[16 - 10];
+ output[10] = -step[10] + step[16 - 11];
+ output[11] = -step[11] + step[16 - 12];
+ output[12] = -step[12] + step[16 - 13];
+ output[13] = -step[13] + step[16 - 14];
+ output[14] = -step[14] + step[16 - 15];
+ output[15] = -step[15] + step[16 - 16];
+
+ output[16] = step[16];
+ output[17] = step[17];
+ output[18] = step[18];
+ output[19] = step[19];
+
+ output[20] = dct_32_round((-step[20] + step[27]) * cospi_16_64);
+ output[21] = dct_32_round((-step[21] + step[26]) * cospi_16_64);
+ output[22] = dct_32_round((-step[22] + step[25]) * cospi_16_64);
+ output[23] = dct_32_round((-step[23] + step[24]) * cospi_16_64);
+
+ output[24] = dct_32_round((step[24] + step[23]) * cospi_16_64);
+ output[25] = dct_32_round((step[25] + step[22]) * cospi_16_64);
+ output[26] = dct_32_round((step[26] + step[21]) * cospi_16_64);
+ output[27] = dct_32_round((step[27] + step[20]) * cospi_16_64);
+
+ output[28] = step[28];
+ output[29] = step[29];
+ output[30] = step[30];
+ output[31] = step[31];
+
+ // dump the magnitude by 4, hence the intermediate values are within
+ // the range of 16 bits.
+ if (round) {
+ output[0] = half_round_shift(output[0]);
+ output[1] = half_round_shift(output[1]);
+ output[2] = half_round_shift(output[2]);
+ output[3] = half_round_shift(output[3]);
+ output[4] = half_round_shift(output[4]);
+ output[5] = half_round_shift(output[5]);
+ output[6] = half_round_shift(output[6]);
+ output[7] = half_round_shift(output[7]);
+ output[8] = half_round_shift(output[8]);
+ output[9] = half_round_shift(output[9]);
+ output[10] = half_round_shift(output[10]);
+ output[11] = half_round_shift(output[11]);
+ output[12] = half_round_shift(output[12]);
+ output[13] = half_round_shift(output[13]);
+ output[14] = half_round_shift(output[14]);
+ output[15] = half_round_shift(output[15]);
+
+ output[16] = half_round_shift(output[16]);
+ output[17] = half_round_shift(output[17]);
+ output[18] = half_round_shift(output[18]);
+ output[19] = half_round_shift(output[19]);
+ output[20] = half_round_shift(output[20]);
+ output[21] = half_round_shift(output[21]);
+ output[22] = half_round_shift(output[22]);
+ output[23] = half_round_shift(output[23]);
+ output[24] = half_round_shift(output[24]);
+ output[25] = half_round_shift(output[25]);
+ output[26] = half_round_shift(output[26]);
+ output[27] = half_round_shift(output[27]);
+ output[28] = half_round_shift(output[28]);
+ output[29] = half_round_shift(output[29]);
+ output[30] = half_round_shift(output[30]);
+ output[31] = half_round_shift(output[31]);
+ }
+
+ // Stage 3
+ step[0] = output[0] + output[(8 - 1)];
+ step[1] = output[1] + output[(8 - 2)];
+ step[2] = output[2] + output[(8 - 3)];
+ step[3] = output[3] + output[(8 - 4)];
+ step[4] = -output[4] + output[(8 - 5)];
+ step[5] = -output[5] + output[(8 - 6)];
+ step[6] = -output[6] + output[(8 - 7)];
+ step[7] = -output[7] + output[(8 - 8)];
+ step[8] = output[8];
+ step[9] = output[9];
+ step[10] = dct_32_round((-output[10] + output[13]) * cospi_16_64);
+ step[11] = dct_32_round((-output[11] + output[12]) * cospi_16_64);
+ step[12] = dct_32_round((output[12] + output[11]) * cospi_16_64);
+ step[13] = dct_32_round((output[13] + output[10]) * cospi_16_64);
+ step[14] = output[14];
+ step[15] = output[15];
+
+ step[16] = output[16] + output[23];
+ step[17] = output[17] + output[22];
+ step[18] = output[18] + output[21];
+ step[19] = output[19] + output[20];
+ step[20] = -output[20] + output[19];
+ step[21] = -output[21] + output[18];
+ step[22] = -output[22] + output[17];
+ step[23] = -output[23] + output[16];
+ step[24] = -output[24] + output[31];
+ step[25] = -output[25] + output[30];
+ step[26] = -output[26] + output[29];
+ step[27] = -output[27] + output[28];
+ step[28] = output[28] + output[27];
+ step[29] = output[29] + output[26];
+ step[30] = output[30] + output[25];
+ step[31] = output[31] + output[24];
+
+ // Stage 4
+ output[0] = step[0] + step[3];
+ output[1] = step[1] + step[2];
+ output[2] = -step[2] + step[1];
+ output[3] = -step[3] + step[0];
+ output[4] = step[4];
+ output[5] = dct_32_round((-step[5] + step[6]) * cospi_16_64);
+ output[6] = dct_32_round((step[6] + step[5]) * cospi_16_64);
+ output[7] = step[7];
+ output[8] = step[8] + step[11];
+ output[9] = step[9] + step[10];
+ output[10] = -step[10] + step[9];
+ output[11] = -step[11] + step[8];
+ output[12] = -step[12] + step[15];
+ output[13] = -step[13] + step[14];
+ output[14] = step[14] + step[13];
+ output[15] = step[15] + step[12];
+
+ output[16] = step[16];
+ output[17] = step[17];
+ output[18] = dct_32_round(step[18] * -cospi_8_64 + step[29] * cospi_24_64);
+ output[19] = dct_32_round(step[19] * -cospi_8_64 + step[28] * cospi_24_64);
+ output[20] = dct_32_round(step[20] * -cospi_24_64 + step[27] * -cospi_8_64);
+ output[21] = dct_32_round(step[21] * -cospi_24_64 + step[26] * -cospi_8_64);
+ output[22] = step[22];
+ output[23] = step[23];
+ output[24] = step[24];
+ output[25] = step[25];
+ output[26] = dct_32_round(step[26] * cospi_24_64 + step[21] * -cospi_8_64);
+ output[27] = dct_32_round(step[27] * cospi_24_64 + step[20] * -cospi_8_64);
+ output[28] = dct_32_round(step[28] * cospi_8_64 + step[19] * cospi_24_64);
+ output[29] = dct_32_round(step[29] * cospi_8_64 + step[18] * cospi_24_64);
+ output[30] = step[30];
+ output[31] = step[31];
+
+ // Stage 5
+ step[0] = dct_32_round((output[0] + output[1]) * cospi_16_64);
+ step[1] = dct_32_round((-output[1] + output[0]) * cospi_16_64);
+ step[2] = dct_32_round(output[2] * cospi_24_64 + output[3] * cospi_8_64);
+ step[3] = dct_32_round(output[3] * cospi_24_64 - output[2] * cospi_8_64);
+ step[4] = output[4] + output[5];
+ step[5] = -output[5] + output[4];
+ step[6] = -output[6] + output[7];
+ step[7] = output[7] + output[6];
+ step[8] = output[8];
+ step[9] = dct_32_round(output[9] * -cospi_8_64 + output[14] * cospi_24_64);
+ step[10] = dct_32_round(output[10] * -cospi_24_64 + output[13] * -cospi_8_64);
+ step[11] = output[11];
+ step[12] = output[12];
+ step[13] = dct_32_round(output[13] * cospi_24_64 + output[10] * -cospi_8_64);
+ step[14] = dct_32_round(output[14] * cospi_8_64 + output[9] * cospi_24_64);
+ step[15] = output[15];
+
+ step[16] = output[16] + output[19];
+ step[17] = output[17] + output[18];
+ step[18] = -output[18] + output[17];
+ step[19] = -output[19] + output[16];
+ step[20] = -output[20] + output[23];
+ step[21] = -output[21] + output[22];
+ step[22] = output[22] + output[21];
+ step[23] = output[23] + output[20];
+ step[24] = output[24] + output[27];
+ step[25] = output[25] + output[26];
+ step[26] = -output[26] + output[25];
+ step[27] = -output[27] + output[24];
+ step[28] = -output[28] + output[31];
+ step[29] = -output[29] + output[30];
+ step[30] = output[30] + output[29];
+ step[31] = output[31] + output[28];
+
+ // Stage 6
+ output[0] = step[0];
+ output[1] = step[1];
+ output[2] = step[2];
+ output[3] = step[3];
+ output[4] = dct_32_round(step[4] * cospi_28_64 + step[7] * cospi_4_64);
+ output[5] = dct_32_round(step[5] * cospi_12_64 + step[6] * cospi_20_64);
+ output[6] = dct_32_round(step[6] * cospi_12_64 + step[5] * -cospi_20_64);
+ output[7] = dct_32_round(step[7] * cospi_28_64 + step[4] * -cospi_4_64);
+ output[8] = step[8] + step[9];
+ output[9] = -step[9] + step[8];
+ output[10] = -step[10] + step[11];
+ output[11] = step[11] + step[10];
+ output[12] = step[12] + step[13];
+ output[13] = -step[13] + step[12];
+ output[14] = -step[14] + step[15];
+ output[15] = step[15] + step[14];
+
+ output[16] = step[16];
+ output[17] = dct_32_round(step[17] * -cospi_4_64 + step[30] * cospi_28_64);
+ output[18] = dct_32_round(step[18] * -cospi_28_64 + step[29] * -cospi_4_64);
+ output[19] = step[19];
+ output[20] = step[20];
+ output[21] = dct_32_round(step[21] * -cospi_20_64 + step[26] * cospi_12_64);
+ output[22] = dct_32_round(step[22] * -cospi_12_64 + step[25] * -cospi_20_64);
+ output[23] = step[23];
+ output[24] = step[24];
+ output[25] = dct_32_round(step[25] * cospi_12_64 + step[22] * -cospi_20_64);
+ output[26] = dct_32_round(step[26] * cospi_20_64 + step[21] * cospi_12_64);
+ output[27] = step[27];
+ output[28] = step[28];
+ output[29] = dct_32_round(step[29] * cospi_28_64 + step[18] * -cospi_4_64);
+ output[30] = dct_32_round(step[30] * cospi_4_64 + step[17] * cospi_28_64);
+ output[31] = step[31];
+
+ // Stage 7
+ step[0] = output[0];
+ step[1] = output[1];
+ step[2] = output[2];
+ step[3] = output[3];
+ step[4] = output[4];
+ step[5] = output[5];
+ step[6] = output[6];
+ step[7] = output[7];
+ step[8] = dct_32_round(output[8] * cospi_30_64 + output[15] * cospi_2_64);
+ step[9] = dct_32_round(output[9] * cospi_14_64 + output[14] * cospi_18_64);
+ step[10] = dct_32_round(output[10] * cospi_22_64 + output[13] * cospi_10_64);
+ step[11] = dct_32_round(output[11] * cospi_6_64 + output[12] * cospi_26_64);
+ step[12] = dct_32_round(output[12] * cospi_6_64 + output[11] * -cospi_26_64);
+ step[13] = dct_32_round(output[13] * cospi_22_64 + output[10] * -cospi_10_64);
+ step[14] = dct_32_round(output[14] * cospi_14_64 + output[9] * -cospi_18_64);
+ step[15] = dct_32_round(output[15] * cospi_30_64 + output[8] * -cospi_2_64);
+
+ step[16] = output[16] + output[17];
+ step[17] = -output[17] + output[16];
+ step[18] = -output[18] + output[19];
+ step[19] = output[19] + output[18];
+ step[20] = output[20] + output[21];
+ step[21] = -output[21] + output[20];
+ step[22] = -output[22] + output[23];
+ step[23] = output[23] + output[22];
+ step[24] = output[24] + output[25];
+ step[25] = -output[25] + output[24];
+ step[26] = -output[26] + output[27];
+ step[27] = output[27] + output[26];
+ step[28] = output[28] + output[29];
+ step[29] = -output[29] + output[28];
+ step[30] = -output[30] + output[31];
+ step[31] = output[31] + output[30];
+
+ // Final stage --- outputs indices are bit-reversed.
+ output[0] = step[0];
+ output[16] = step[1];
+ output[8] = step[2];
+ output[24] = step[3];
+ output[4] = step[4];
+ output[20] = step[5];
+ output[12] = step[6];
+ output[28] = step[7];
+ output[2] = step[8];
+ output[18] = step[9];
+ output[10] = step[10];
+ output[26] = step[11];
+ output[6] = step[12];
+ output[22] = step[13];
+ output[14] = step[14];
+ output[30] = step[15];
+
+ output[1] = dct_32_round(step[16] * cospi_31_64 + step[31] * cospi_1_64);
+ output[17] = dct_32_round(step[17] * cospi_15_64 + step[30] * cospi_17_64);
+ output[9] = dct_32_round(step[18] * cospi_23_64 + step[29] * cospi_9_64);
+ output[25] = dct_32_round(step[19] * cospi_7_64 + step[28] * cospi_25_64);
+ output[5] = dct_32_round(step[20] * cospi_27_64 + step[27] * cospi_5_64);
+ output[21] = dct_32_round(step[21] * cospi_11_64 + step[26] * cospi_21_64);
+ output[13] = dct_32_round(step[22] * cospi_19_64 + step[25] * cospi_13_64);
+ output[29] = dct_32_round(step[23] * cospi_3_64 + step[24] * cospi_29_64);
+ output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64);
+ output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64);
+ output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64);
+ output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64);
+ output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64);
+ output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64);
+ output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64);
+ output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
+}
+
+void vp10_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
+ int i, j;
+ tran_high_t output[32 * 32];
+
+ // Columns
+ for (i = 0; i < 32; ++i) {
+ tran_high_t temp_in[32], temp_out[32];
+ for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4;
+ vp10_fdct32(temp_in, temp_out, 0);
+ for (j = 0; j < 32; ++j)
+ output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
+ }
+
+ // Rows
+ for (i = 0; i < 32; ++i) {
+ tran_high_t temp_in[32], temp_out[32];
+ for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32];
+ vp10_fdct32(temp_in, temp_out, 0);
+ for (j = 0; j < 32; ++j)
+ out[j + i * 32] =
+ (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
+ }
+}
+
+// Note that although we use dct_32_round in dct32 computation flow,
+// this 2d fdct32x32 for rate-distortion optimization loop is operating
+// within 16 bits precision.
+void vp10_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
+ int i, j;
+ tran_high_t output[32 * 32];
+
+ // Columns
+ for (i = 0; i < 32; ++i) {
+ tran_high_t temp_in[32], temp_out[32];
+ for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4;
+ vp10_fdct32(temp_in, temp_out, 0);
+ for (j = 0; j < 32; ++j)
+ // TODO(cd): see quality impact of only doing
+ // output[j * 32 + i] = (temp_out[j] + 1) >> 2;
+ // PS: also change code in vp10_dsp/x86/vp10_dct_sse2.c
+ output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
+ }
+
+ // Rows
+ for (i = 0; i < 32; ++i) {
+ tran_high_t temp_in[32], temp_out[32];
+ for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32];
+ vp10_fdct32(temp_in, temp_out, 1);
+ for (j = 0; j < 32; ++j) out[j + i * 32] = (tran_low_t)temp_out[j];
+ }
+}
+
+void vp10_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) {
+ int r, c;
+ tran_low_t sum = 0;
+ for (r = 0; r < 32; ++r)
+ for (c = 0; c < 32; ++c) sum += input[r * stride + c];
+
+ output[0] = sum >> 3;
+ output[1] = 0;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output,
+ int stride) {
+ vp10_fdct4x4_c(input, output, stride);
+}
+
+void vp10_highbd_fdct8x8_c(const int16_t *input, tran_low_t *final_output,
+ int stride) {
+ vp10_fdct8x8_c(input, final_output, stride);
+}
+
+void vp10_highbd_fdct8x8_1_c(const int16_t *input, tran_low_t *final_output,
+ int stride) {
+ vp10_fdct8x8_1_c(input, final_output, stride);
+}
+
+void vp10_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output,
+ int stride) {
+ vp10_fdct16x16_c(input, output, stride);
+}
+
+void vp10_highbd_fdct16x16_1_c(const int16_t *input, tran_low_t *output,
+ int stride) {
+ vp10_fdct16x16_1_c(input, output, stride);
+}
+
+void vp10_highbd_fdct32x32_c(const int16_t *input, tran_low_t *out,
+ int stride) {
+ vp10_fdct32x32_c(input, out, stride);
+}
+
+void vp10_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out,
+ int stride) {
+ vp10_fdct32x32_rd_c(input, out, stride);
+}
+
+void vp10_highbd_fdct32x32_1_c(const int16_t *input, tran_low_t *out,
+ int stride) {
+ vp10_fdct32x32_1_c(input, out, stride);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/vp10_fwd_txfm.h b/av1/common/vp10_fwd_txfm.h
new file mode 100644
index 0000000..a0481d3
--- /dev/null
+++ b/av1/common/vp10_fwd_txfm.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_VP10_FWD_TXFM_H_
+#define VP10_COMMON_VP10_FWD_TXFM_H_
+
+#include "aom_dsp/txfm_common.h"
+#include "aom_dsp/fwd_txfm.h"
+
+void vp10_fdct32(const tran_high_t *input, tran_high_t *output, int round);
+#endif // VP10_COMMON_VP10_FWD_TXFM_H_
diff --git a/av1/common/vp10_fwd_txfm1d.c b/av1/common/vp10_fwd_txfm1d.c
new file mode 100644
index 0000000..6dff077
--- /dev/null
+++ b/av1/common/vp10_fwd_txfm1d.c
@@ -0,0 +1,1530 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include "av1/common/vp10_fwd_txfm1d.h"
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+#define range_check(stage, input, buf, size, bit) \
+ { \
+ int i, j; \
+ for (i = 0; i < size; ++i) { \
+ int buf_bit = get_max_bit(abs(buf[i])) + 1; \
+ if (buf_bit > bit) { \
+ printf("======== %s %d overflow ========\n", __FILE__, __LINE__); \
+ printf("stage: %d node: %d\n", stage, i); \
+ printf("bit: %d buf_bit: %d buf[i]: %d\n", bit, buf_bit, buf[i]); \
+ printf("input:\n"); \
+ for (j = 0; j < size; j++) { \
+ printf("%d,", input[j]); \
+ } \
+ printf("\n"); \
+ assert(0); \
+ } \
+ } \
+ }
+#else
+#define range_check(stage, input, buf, size, bit) \
+ { \
+ (void) stage; \
+ (void) input; \
+ (void) buf; \
+ (void) size; \
+ (void) bit; \
+ }
+#endif
+
+void vp10_fdct4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 4;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[4];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0] + input[3];
+ bf1[1] = input[1] + input[2];
+ bf1[2] = -input[2] + input[1];
+ bf1[3] = -input[3] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[2];
+ bf1[2] = bf0[1];
+ bf1[3] = bf0[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fdct8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 8;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[8];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0] + input[7];
+ bf1[1] = input[1] + input[6];
+ bf1[2] = input[2] + input[5];
+ bf1[3] = input[3] + input[4];
+ bf1[4] = -input[4] + input[3];
+ bf1[5] = -input[5] + input[2];
+ bf1[6] = -input[6] + input[1];
+ bf1[7] = -input[7] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = -bf0[2] + bf0[1];
+ bf1[3] = -bf0[3] + bf0[0];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = -bf0[5] + bf0[4];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[7] + bf0[6];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[4];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[6];
+ bf1[4] = bf0[1];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[3];
+ bf1[7] = bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fdct16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 16;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[16];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0] + input[15];
+ bf1[1] = input[1] + input[14];
+ bf1[2] = input[2] + input[13];
+ bf1[3] = input[3] + input[12];
+ bf1[4] = input[4] + input[11];
+ bf1[5] = input[5] + input[10];
+ bf1[6] = input[6] + input[9];
+ bf1[7] = input[7] + input[8];
+ bf1[8] = -input[8] + input[7];
+ bf1[9] = -input[9] + input[6];
+ bf1[10] = -input[10] + input[5];
+ bf1[11] = -input[11] + input[4];
+ bf1[12] = -input[12] + input[3];
+ bf1[13] = -input[13] + input[2];
+ bf1[14] = -input[14] + input[1];
+ bf1[15] = -input[15] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = -bf0[4] + bf0[3];
+ bf1[5] = -bf0[5] + bf0[2];
+ bf1[6] = -bf0[6] + bf0[1];
+ bf1[7] = -bf0[7] + bf0[0];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = -bf0[2] + bf0[1];
+ bf1[3] = -bf0[3] + bf0[0];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = -bf0[10] + bf0[9];
+ bf1[11] = -bf0[11] + bf0[8];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[14] + bf0[13];
+ bf1[15] = bf0[15] + bf0[12];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = -bf0[5] + bf0[4];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[7] + bf0[6];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = -bf0[9] + bf0[8];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[11] + bf0[10];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = -bf0[13] + bf0[12];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[15] + bf0[14];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[8];
+ bf1[2] = bf0[4];
+ bf1[3] = bf0[12];
+ bf1[4] = bf0[2];
+ bf1[5] = bf0[10];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[14];
+ bf1[8] = bf0[1];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[5];
+ bf1[11] = bf0[13];
+ bf1[12] = bf0[3];
+ bf1[13] = bf0[11];
+ bf1[14] = bf0[7];
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fdct32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 32;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[32];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0] + input[31];
+ bf1[1] = input[1] + input[30];
+ bf1[2] = input[2] + input[29];
+ bf1[3] = input[3] + input[28];
+ bf1[4] = input[4] + input[27];
+ bf1[5] = input[5] + input[26];
+ bf1[6] = input[6] + input[25];
+ bf1[7] = input[7] + input[24];
+ bf1[8] = input[8] + input[23];
+ bf1[9] = input[9] + input[22];
+ bf1[10] = input[10] + input[21];
+ bf1[11] = input[11] + input[20];
+ bf1[12] = input[12] + input[19];
+ bf1[13] = input[13] + input[18];
+ bf1[14] = input[14] + input[17];
+ bf1[15] = input[15] + input[16];
+ bf1[16] = -input[16] + input[15];
+ bf1[17] = -input[17] + input[14];
+ bf1[18] = -input[18] + input[13];
+ bf1[19] = -input[19] + input[12];
+ bf1[20] = -input[20] + input[11];
+ bf1[21] = -input[21] + input[10];
+ bf1[22] = -input[22] + input[9];
+ bf1[23] = -input[23] + input[8];
+ bf1[24] = -input[24] + input[7];
+ bf1[25] = -input[25] + input[6];
+ bf1[26] = -input[26] + input[5];
+ bf1[27] = -input[27] + input[4];
+ bf1[28] = -input[28] + input[3];
+ bf1[29] = -input[29] + input[2];
+ bf1[30] = -input[30] + input[1];
+ bf1[31] = -input[31] + input[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = -bf0[8] + bf0[7];
+ bf1[9] = -bf0[9] + bf0[6];
+ bf1[10] = -bf0[10] + bf0[5];
+ bf1[11] = -bf0[11] + bf0[4];
+ bf1[12] = -bf0[12] + bf0[3];
+ bf1[13] = -bf0[13] + bf0[2];
+ bf1[14] = -bf0[14] + bf0[1];
+ bf1[15] = -bf0[15] + bf0[0];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = -bf0[4] + bf0[3];
+ bf1[5] = -bf0[5] + bf0[2];
+ bf1[6] = -bf0[6] + bf0[1];
+ bf1[7] = -bf0[7] + bf0[0];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[23];
+ bf1[17] = bf0[17] + bf0[22];
+ bf1[18] = bf0[18] + bf0[21];
+ bf1[19] = bf0[19] + bf0[20];
+ bf1[20] = -bf0[20] + bf0[19];
+ bf1[21] = -bf0[21] + bf0[18];
+ bf1[22] = -bf0[22] + bf0[17];
+ bf1[23] = -bf0[23] + bf0[16];
+ bf1[24] = -bf0[24] + bf0[31];
+ bf1[25] = -bf0[25] + bf0[30];
+ bf1[26] = -bf0[26] + bf0[29];
+ bf1[27] = -bf0[27] + bf0[28];
+ bf1[28] = bf0[28] + bf0[27];
+ bf1[29] = bf0[29] + bf0[26];
+ bf1[30] = bf0[30] + bf0[25];
+ bf1[31] = bf0[31] + bf0[24];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = -bf0[2] + bf0[1];
+ bf1[3] = -bf0[3] + bf0[0];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = -bf0[10] + bf0[9];
+ bf1[11] = -bf0[11] + bf0[8];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[14] + bf0[13];
+ bf1[15] = bf0[15] + bf0[12];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit[stage]);
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = -bf0[5] + bf0[4];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[7] + bf0[6];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[19];
+ bf1[17] = bf0[17] + bf0[18];
+ bf1[18] = -bf0[18] + bf0[17];
+ bf1[19] = -bf0[19] + bf0[16];
+ bf1[20] = -bf0[20] + bf0[23];
+ bf1[21] = -bf0[21] + bf0[22];
+ bf1[22] = bf0[22] + bf0[21];
+ bf1[23] = bf0[23] + bf0[20];
+ bf1[24] = bf0[24] + bf0[27];
+ bf1[25] = bf0[25] + bf0[26];
+ bf1[26] = -bf0[26] + bf0[25];
+ bf1[27] = -bf0[27] + bf0[24];
+ bf1[28] = -bf0[28] + bf0[31];
+ bf1[29] = -bf0[29] + bf0[30];
+ bf1[30] = bf0[30] + bf0[29];
+ bf1[31] = bf0[31] + bf0[28];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = -bf0[9] + bf0[8];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[11] + bf0[10];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = -bf0[13] + bf0[12];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[15] + bf0[14];
+ bf1[16] = bf0[16];
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit[stage]);
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit[stage]);
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit[stage]);
+ bf1[16] = bf0[16] + bf0[17];
+ bf1[17] = -bf0[17] + bf0[16];
+ bf1[18] = -bf0[18] + bf0[19];
+ bf1[19] = bf0[19] + bf0[18];
+ bf1[20] = bf0[20] + bf0[21];
+ bf1[21] = -bf0[21] + bf0[20];
+ bf1[22] = -bf0[22] + bf0[23];
+ bf1[23] = bf0[23] + bf0[22];
+ bf1[24] = bf0[24] + bf0[25];
+ bf1[25] = -bf0[25] + bf0[24];
+ bf1[26] = -bf0[26] + bf0[27];
+ bf1[27] = bf0[27] + bf0[26];
+ bf1[28] = bf0[28] + bf0[29];
+ bf1[29] = -bf0[29] + bf0[28];
+ bf1[30] = -bf0[30] + bf0[31];
+ bf1[31] = bf0[31] + bf0[30];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[16];
+ bf1[2] = bf0[8];
+ bf1[3] = bf0[24];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[20];
+ bf1[6] = bf0[12];
+ bf1[7] = bf0[28];
+ bf1[8] = bf0[2];
+ bf1[9] = bf0[18];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[26];
+ bf1[12] = bf0[6];
+ bf1[13] = bf0[22];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[30];
+ bf1[16] = bf0[1];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[9];
+ bf1[19] = bf0[25];
+ bf1[20] = bf0[5];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[13];
+ bf1[23] = bf0[29];
+ bf1[24] = bf0[3];
+ bf1[25] = bf0[19];
+ bf1[26] = bf0[11];
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[7];
+ bf1[29] = bf0[23];
+ bf1[30] = bf0[15];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 4;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[4];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[3];
+ bf1[1] = input[0];
+ bf1[2] = input[1];
+ bf1[3] = input[2];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[8], bf0[1], cospi[56], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[40], bf0[3], cospi[24], bf0[2], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = -bf0[2] + bf0[0];
+ bf1[3] = -bf0[3] + bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[2];
+ bf1[2] = bf0[3];
+ bf1[3] = -bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 8;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[8];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[7];
+ bf1[1] = input[0];
+ bf1[2] = input[5];
+ bf1[3] = input[2];
+ bf1[4] = input[3];
+ bf1[5] = input[4];
+ bf1[6] = input[1];
+ bf1[7] = input[6];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[4], bf0[1], cospi[60], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[20], bf0[3], cospi[44], bf0[2], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[36], bf0[5], cospi[28], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[52], bf0[7], cospi[12], bf0[6], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = -bf0[4] + bf0[0];
+ bf1[5] = -bf0[5] + bf0[1];
+ bf1[6] = -bf0[6] + bf0[2];
+ bf1[7] = -bf0[7] + bf0[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = -bf0[2] + bf0[0];
+ bf1[3] = -bf0[3] + bf0[1];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = -bf0[6] + bf0[4];
+ bf1[7] = -bf0[7] + bf0[5];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[4];
+ bf1[2] = bf0[6];
+ bf1[3] = -bf0[2];
+ bf1[4] = bf0[3];
+ bf1[5] = -bf0[7];
+ bf1[6] = bf0[5];
+ bf1[7] = -bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 16;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[16];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[15];
+ bf1[1] = input[0];
+ bf1[2] = input[13];
+ bf1[3] = input[2];
+ bf1[4] = input[11];
+ bf1[5] = input[4];
+ bf1[6] = input[9];
+ bf1[7] = input[6];
+ bf1[8] = input[7];
+ bf1[9] = input[8];
+ bf1[10] = input[5];
+ bf1[11] = input[10];
+ bf1[12] = input[3];
+ bf1[13] = input[12];
+ bf1[14] = input[1];
+ bf1[15] = input[14];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[2], bf0[1], cospi[62], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[10], bf0[3], cospi[54], bf0[2], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[18], bf0[5], cospi[46], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[26], bf0[7], cospi[38], bf0[6], cos_bit[stage]);
+ bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[34], bf0[9], cospi[30], bf0[8], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[42], bf0[11], cospi[22], bf0[10], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[50], bf0[13], cospi[14], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(-cospi[58], bf0[15], cospi[6], bf0[14], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[8];
+ bf1[1] = bf0[1] + bf0[9];
+ bf1[2] = bf0[2] + bf0[10];
+ bf1[3] = bf0[3] + bf0[11];
+ bf1[4] = bf0[4] + bf0[12];
+ bf1[5] = bf0[5] + bf0[13];
+ bf1[6] = bf0[6] + bf0[14];
+ bf1[7] = bf0[7] + bf0[15];
+ bf1[8] = -bf0[8] + bf0[0];
+ bf1[9] = -bf0[9] + bf0[1];
+ bf1[10] = -bf0[10] + bf0[2];
+ bf1[11] = -bf0[11] + bf0[3];
+ bf1[12] = -bf0[12] + bf0[4];
+ bf1[13] = -bf0[13] + bf0[5];
+ bf1[14] = -bf0[14] + bf0[6];
+ bf1[15] = -bf0[15] + bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
+ bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = -bf0[4] + bf0[0];
+ bf1[5] = -bf0[5] + bf0[1];
+ bf1[6] = -bf0[6] + bf0[2];
+ bf1[7] = -bf0[7] + bf0[3];
+ bf1[8] = bf0[8] + bf0[12];
+ bf1[9] = bf0[9] + bf0[13];
+ bf1[10] = bf0[10] + bf0[14];
+ bf1[11] = bf0[11] + bf0[15];
+ bf1[12] = -bf0[12] + bf0[8];
+ bf1[13] = -bf0[13] + bf0[9];
+ bf1[14] = -bf0[14] + bf0[10];
+ bf1[15] = -bf0[15] + bf0[11];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = -bf0[2] + bf0[0];
+ bf1[3] = -bf0[3] + bf0[1];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = -bf0[6] + bf0[4];
+ bf1[7] = -bf0[7] + bf0[5];
+ bf1[8] = bf0[8] + bf0[10];
+ bf1[9] = bf0[9] + bf0[11];
+ bf1[10] = -bf0[10] + bf0[8];
+ bf1[11] = -bf0[11] + bf0[9];
+ bf1[12] = bf0[12] + bf0[14];
+ bf1[13] = bf0[13] + bf0[15];
+ bf1[14] = -bf0[14] + bf0[12];
+ bf1[15] = -bf0[15] + bf0[13];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[8];
+ bf1[2] = bf0[12];
+ bf1[3] = -bf0[4];
+ bf1[4] = bf0[6];
+ bf1[5] = -bf0[14];
+ bf1[6] = bf0[10];
+ bf1[7] = -bf0[2];
+ bf1[8] = bf0[3];
+ bf1[9] = -bf0[11];
+ bf1[10] = bf0[15];
+ bf1[11] = -bf0[7];
+ bf1[12] = bf0[5];
+ bf1[13] = -bf0[13];
+ bf1[14] = bf0[9];
+ bf1[15] = -bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_fadst32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 32;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[32];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[31];
+ bf1[1] = input[0];
+ bf1[2] = input[29];
+ bf1[3] = input[2];
+ bf1[4] = input[27];
+ bf1[5] = input[4];
+ bf1[6] = input[25];
+ bf1[7] = input[6];
+ bf1[8] = input[23];
+ bf1[9] = input[8];
+ bf1[10] = input[21];
+ bf1[11] = input[10];
+ bf1[12] = input[19];
+ bf1[13] = input[12];
+ bf1[14] = input[17];
+ bf1[15] = input[14];
+ bf1[16] = input[15];
+ bf1[17] = input[16];
+ bf1[18] = input[13];
+ bf1[19] = input[18];
+ bf1[20] = input[11];
+ bf1[21] = input[20];
+ bf1[22] = input[9];
+ bf1[23] = input[22];
+ bf1[24] = input[7];
+ bf1[25] = input[24];
+ bf1[26] = input[5];
+ bf1[27] = input[26];
+ bf1[28] = input[3];
+ bf1[29] = input[28];
+ bf1[30] = input[1];
+ bf1[31] = input[30];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(-cospi[1], bf0[1], cospi[63], bf0[0], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[5], bf0[3], cospi[59], bf0[2], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[9], bf0[5], cospi[55], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[13], bf0[7], cospi[51], bf0[6], cos_bit[stage]);
+ bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[17], bf0[9], cospi[47], bf0[8], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[21], bf0[11], cospi[43], bf0[10], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[25], bf0[13], cospi[39], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(-cospi[29], bf0[15], cospi[35], bf0[14], cos_bit[stage]);
+ bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]);
+ bf1[17] = half_btf(-cospi[33], bf0[17], cospi[31], bf0[16], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[37], bf0[19], cospi[27], bf0[18], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[41], bf0[21], cospi[23], bf0[20], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[45], bf0[23], cospi[19], bf0[22], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(-cospi[49], bf0[25], cospi[15], bf0[24], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[53], bf0[27], cospi[11], bf0[26], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(-cospi[57], bf0[29], cospi[7], bf0[28], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(-cospi[61], bf0[31], cospi[3], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[16];
+ bf1[1] = bf0[1] + bf0[17];
+ bf1[2] = bf0[2] + bf0[18];
+ bf1[3] = bf0[3] + bf0[19];
+ bf1[4] = bf0[4] + bf0[20];
+ bf1[5] = bf0[5] + bf0[21];
+ bf1[6] = bf0[6] + bf0[22];
+ bf1[7] = bf0[7] + bf0[23];
+ bf1[8] = bf0[8] + bf0[24];
+ bf1[9] = bf0[9] + bf0[25];
+ bf1[10] = bf0[10] + bf0[26];
+ bf1[11] = bf0[11] + bf0[27];
+ bf1[12] = bf0[12] + bf0[28];
+ bf1[13] = bf0[13] + bf0[29];
+ bf1[14] = bf0[14] + bf0[30];
+ bf1[15] = bf0[15] + bf0[31];
+ bf1[16] = -bf0[16] + bf0[0];
+ bf1[17] = -bf0[17] + bf0[1];
+ bf1[18] = -bf0[18] + bf0[2];
+ bf1[19] = -bf0[19] + bf0[3];
+ bf1[20] = -bf0[20] + bf0[4];
+ bf1[21] = -bf0[21] + bf0[5];
+ bf1[22] = -bf0[22] + bf0[6];
+ bf1[23] = -bf0[23] + bf0[7];
+ bf1[24] = -bf0[24] + bf0[8];
+ bf1[25] = -bf0[25] + bf0[9];
+ bf1[26] = -bf0[26] + bf0[10];
+ bf1[27] = -bf0[27] + bf0[11];
+ bf1[28] = -bf0[28] + bf0[12];
+ bf1[29] = -bf0[29] + bf0[13];
+ bf1[30] = -bf0[30] + bf0[14];
+ bf1[31] = -bf0[31] + bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]);
+ bf1[17] = half_btf(-cospi[4], bf0[17], cospi[60], bf0[16], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[20], bf0[19], cospi[44], bf0[18], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[36], bf0[21], cospi[28], bf0[20], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[52], bf0[23], cospi[12], bf0[22], cos_bit[stage]);
+ bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[60], bf0[25], cospi[4], bf0[24], cos_bit[stage]);
+ bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[44], bf0[27], cospi[20], bf0[26], cos_bit[stage]);
+ bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[28], bf0[29], cospi[36], bf0[28], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[12], bf0[31], cospi[52], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[8];
+ bf1[1] = bf0[1] + bf0[9];
+ bf1[2] = bf0[2] + bf0[10];
+ bf1[3] = bf0[3] + bf0[11];
+ bf1[4] = bf0[4] + bf0[12];
+ bf1[5] = bf0[5] + bf0[13];
+ bf1[6] = bf0[6] + bf0[14];
+ bf1[7] = bf0[7] + bf0[15];
+ bf1[8] = -bf0[8] + bf0[0];
+ bf1[9] = -bf0[9] + bf0[1];
+ bf1[10] = -bf0[10] + bf0[2];
+ bf1[11] = -bf0[11] + bf0[3];
+ bf1[12] = -bf0[12] + bf0[4];
+ bf1[13] = -bf0[13] + bf0[5];
+ bf1[14] = -bf0[14] + bf0[6];
+ bf1[15] = -bf0[15] + bf0[7];
+ bf1[16] = bf0[16] + bf0[24];
+ bf1[17] = bf0[17] + bf0[25];
+ bf1[18] = bf0[18] + bf0[26];
+ bf1[19] = bf0[19] + bf0[27];
+ bf1[20] = bf0[20] + bf0[28];
+ bf1[21] = bf0[21] + bf0[29];
+ bf1[22] = bf0[22] + bf0[30];
+ bf1[23] = bf0[23] + bf0[31];
+ bf1[24] = -bf0[24] + bf0[16];
+ bf1[25] = -bf0[25] + bf0[17];
+ bf1[26] = -bf0[26] + bf0[18];
+ bf1[27] = -bf0[27] + bf0[19];
+ bf1[28] = -bf0[28] + bf0[20];
+ bf1[29] = -bf0[29] + bf0[21];
+ bf1[30] = -bf0[30] + bf0[22];
+ bf1[31] = -bf0[31] + bf0[23];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(-cospi[8], bf0[9], cospi[56], bf0[8], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[40], bf0[11], cospi[24], bf0[10], cos_bit[stage]);
+ bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[56], bf0[13], cospi[8], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[24], bf0[15], cospi[40], bf0[14], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(-cospi[8], bf0[25], cospi[56], bf0[24], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[40], bf0[27], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[56], bf0[29], cospi[8], bf0[28], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[24], bf0[31], cospi[40], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = -bf0[4] + bf0[0];
+ bf1[5] = -bf0[5] + bf0[1];
+ bf1[6] = -bf0[6] + bf0[2];
+ bf1[7] = -bf0[7] + bf0[3];
+ bf1[8] = bf0[8] + bf0[12];
+ bf1[9] = bf0[9] + bf0[13];
+ bf1[10] = bf0[10] + bf0[14];
+ bf1[11] = bf0[11] + bf0[15];
+ bf1[12] = -bf0[12] + bf0[8];
+ bf1[13] = -bf0[13] + bf0[9];
+ bf1[14] = -bf0[14] + bf0[10];
+ bf1[15] = -bf0[15] + bf0[11];
+ bf1[16] = bf0[16] + bf0[20];
+ bf1[17] = bf0[17] + bf0[21];
+ bf1[18] = bf0[18] + bf0[22];
+ bf1[19] = bf0[19] + bf0[23];
+ bf1[20] = -bf0[20] + bf0[16];
+ bf1[21] = -bf0[21] + bf0[17];
+ bf1[22] = -bf0[22] + bf0[18];
+ bf1[23] = -bf0[23] + bf0[19];
+ bf1[24] = bf0[24] + bf0[28];
+ bf1[25] = bf0[25] + bf0[29];
+ bf1[26] = bf0[26] + bf0[30];
+ bf1[27] = bf0[27] + bf0[31];
+ bf1[28] = -bf0[28] + bf0[24];
+ bf1[29] = -bf0[29] + bf0[25];
+ bf1[30] = -bf0[30] + bf0[26];
+ bf1[31] = -bf0[31] + bf0[27];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(-cospi[16], bf0[5], cospi[48], bf0[4], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[48], bf0[7], cospi[16], bf0[6], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(-cospi[16], bf0[13], cospi[48], bf0[12], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[48], bf0[15], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[20], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[48], bf0[23], cospi[16], bf0[22], cos_bit[stage]);
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = bf0[26];
+ bf1[27] = bf0[27];
+ bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(-cospi[16], bf0[29], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[48], bf0[31], cospi[16], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = -bf0[2] + bf0[0];
+ bf1[3] = -bf0[3] + bf0[1];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = -bf0[6] + bf0[4];
+ bf1[7] = -bf0[7] + bf0[5];
+ bf1[8] = bf0[8] + bf0[10];
+ bf1[9] = bf0[9] + bf0[11];
+ bf1[10] = -bf0[10] + bf0[8];
+ bf1[11] = -bf0[11] + bf0[9];
+ bf1[12] = bf0[12] + bf0[14];
+ bf1[13] = bf0[13] + bf0[15];
+ bf1[14] = -bf0[14] + bf0[12];
+ bf1[15] = -bf0[15] + bf0[13];
+ bf1[16] = bf0[16] + bf0[18];
+ bf1[17] = bf0[17] + bf0[19];
+ bf1[18] = -bf0[18] + bf0[16];
+ bf1[19] = -bf0[19] + bf0[17];
+ bf1[20] = bf0[20] + bf0[22];
+ bf1[21] = bf0[21] + bf0[23];
+ bf1[22] = -bf0[22] + bf0[20];
+ bf1[23] = -bf0[23] + bf0[21];
+ bf1[24] = bf0[24] + bf0[26];
+ bf1[25] = bf0[25] + bf0[27];
+ bf1[26] = -bf0[26] + bf0[24];
+ bf1[27] = -bf0[27] + bf0[25];
+ bf1[28] = bf0[28] + bf0[30];
+ bf1[29] = bf0[29] + bf0[31];
+ bf1[30] = -bf0[30] + bf0[28];
+ bf1[31] = -bf0[31] + bf0[29];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 10
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(-cospi[32], bf0[3], cospi[32], bf0[2], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(-cospi[32], bf0[7], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[10], cos_bit[stage]);
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(-cospi[32], bf0[15], cospi[32], bf0[14], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[32], bf0[19], cospi[32], bf0[18], cos_bit[stage]);
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[22], cos_bit[stage]);
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[32], bf0[27], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(-cospi[32], bf0[31], cospi[32], bf0[30], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 11
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = -bf0[16];
+ bf1[2] = bf0[24];
+ bf1[3] = -bf0[8];
+ bf1[4] = bf0[12];
+ bf1[5] = -bf0[28];
+ bf1[6] = bf0[20];
+ bf1[7] = -bf0[4];
+ bf1[8] = bf0[6];
+ bf1[9] = -bf0[22];
+ bf1[10] = bf0[30];
+ bf1[11] = -bf0[14];
+ bf1[12] = bf0[10];
+ bf1[13] = -bf0[26];
+ bf1[14] = bf0[18];
+ bf1[15] = -bf0[2];
+ bf1[16] = bf0[3];
+ bf1[17] = -bf0[19];
+ bf1[18] = bf0[27];
+ bf1[19] = -bf0[11];
+ bf1[20] = bf0[15];
+ bf1[21] = -bf0[31];
+ bf1[22] = bf0[23];
+ bf1[23] = -bf0[7];
+ bf1[24] = bf0[5];
+ bf1[25] = -bf0[21];
+ bf1[26] = bf0[29];
+ bf1[27] = -bf0[13];
+ bf1[28] = bf0[9];
+ bf1[29] = -bf0[25];
+ bf1[30] = bf0[17];
+ bf1[31] = -bf0[1];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
diff --git a/av1/common/vp10_fwd_txfm1d.h b/av1/common/vp10_fwd_txfm1d.h
new file mode 100644
index 0000000..ab9d2ee
--- /dev/null
+++ b/av1/common/vp10_fwd_txfm1d.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_FWD_TXFM1D_H_
+#define VP10_FWD_TXFM1D_H_
+
+#include "av1/common/vp10_txfm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_fdct4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct64_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_fadst4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // VP10_FWD_TXFM1D_H_
diff --git a/av1/common/vp10_fwd_txfm2d.c b/av1/common/vp10_fwd_txfm2d.c
new file mode 100644
index 0000000..85c6b68
--- /dev/null
+++ b/av1/common/vp10_fwd_txfm2d.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vp10_rtcd.h"
+#include "av1/common/enums.h"
+#include "av1/common/vp10_fwd_txfm1d.h"
+#include "av1/common/vp10_fwd_txfm2d_cfg.h"
+#include "av1/common/vp10_txfm.h"
+
+static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
+ switch (txfm_type) {
+ case TXFM_TYPE_DCT4: return vp10_fdct4_new;
+ case TXFM_TYPE_DCT8: return vp10_fdct8_new;
+ case TXFM_TYPE_DCT16: return vp10_fdct16_new;
+ case TXFM_TYPE_DCT32: return vp10_fdct32_new;
+ case TXFM_TYPE_ADST4: return vp10_fadst4_new;
+ case TXFM_TYPE_ADST8: return vp10_fadst8_new;
+ case TXFM_TYPE_ADST16: return vp10_fadst16_new;
+ case TXFM_TYPE_ADST32: return vp10_fadst32_new;
+ default: assert(0); return NULL;
+ }
+}
+
+static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_FLIP_CFG *cfg,
+ int32_t *buf) {
+ int c, r;
+ const int txfm_size = cfg->cfg->txfm_size;
+ const int8_t *shift = cfg->cfg->shift;
+ const int8_t *stage_range_col = cfg->cfg->stage_range_col;
+ const int8_t *stage_range_row = cfg->cfg->stage_range_row;
+ const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;
+ const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;
+ const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->cfg->txfm_type_col);
+ const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->cfg->txfm_type_row);
+
+ // use output buffer as temp buffer
+ int32_t *temp_in = output;
+ int32_t *temp_out = output + txfm_size;
+
+ // Columns
+ for (c = 0; c < txfm_size; ++c) {
+ if (cfg->ud_flip == 0) {
+ for (r = 0; r < txfm_size; ++r) temp_in[r] = input[r * stride + c];
+ } else {
+ for (r = 0; r < txfm_size; ++r)
+ // flip upside down
+ temp_in[r] = input[(txfm_size - r - 1) * stride + c];
+ }
+ round_shift_array(temp_in, txfm_size, -shift[0]);
+ txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
+ round_shift_array(temp_out, txfm_size, -shift[1]);
+ if (cfg->lr_flip == 0) {
+ for (r = 0; r < txfm_size; ++r) buf[r * txfm_size + c] = temp_out[r];
+ } else {
+ for (r = 0; r < txfm_size; ++r)
+ // flip from left to right
+ buf[r * txfm_size + (txfm_size - c - 1)] = temp_out[r];
+ }
+ }
+
+ // Rows
+ for (r = 0; r < txfm_size; ++r) {
+ txfm_func_row(buf + r * txfm_size, output + r * txfm_size, cos_bit_row,
+ stage_range_row);
+ round_shift_array(output + r * txfm_size, txfm_size, -shift[2]);
+ }
+}
+
+void vp10_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride,
+ int tx_type, int bd) {
+ int32_t txfm_buf[4 * 4];
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_4X4);
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride,
+ int tx_type, int bd) {
+ int32_t txfm_buf[8 * 8];
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_8X8);
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride,
+ int tx_type, int bd) {
+ int32_t txfm_buf[16 * 16];
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_16X16);
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride,
+ int tx_type, int bd) {
+ int32_t txfm_buf[32 * 32];
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_32X32);
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
+ int tx_type, int bd) {
+ int32_t txfm_buf[64 * 64];
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_64x64_cfg(tx_type);
+ (void)bd;
+ fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
+}
+
+#if CONFIG_EXT_TX
+static const TXFM_2D_CFG *fwd_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = {
+ { &fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_dct_8,
+ &fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_dct_32 },
+ { &fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8,
+ &fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 },
+ { &fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8,
+ &fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 },
+ { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+ &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+ { &fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8,
+ &fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 },
+ { &fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8,
+ &fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 },
+ { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+ &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+ { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+ &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+ { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+ &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+};
+#else // CONFIG_EXT_TX
+static const TXFM_2D_CFG *fwd_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
+ { &fwd_txfm_2d_cfg_dct_dct_4, &fwd_txfm_2d_cfg_dct_dct_8,
+ &fwd_txfm_2d_cfg_dct_dct_16, &fwd_txfm_2d_cfg_dct_dct_32 },
+ { &fwd_txfm_2d_cfg_adst_dct_4, &fwd_txfm_2d_cfg_adst_dct_8,
+ &fwd_txfm_2d_cfg_adst_dct_16, &fwd_txfm_2d_cfg_adst_dct_32 },
+ { &fwd_txfm_2d_cfg_dct_adst_4, &fwd_txfm_2d_cfg_dct_adst_8,
+ &fwd_txfm_2d_cfg_dct_adst_16, &fwd_txfm_2d_cfg_dct_adst_32 },
+ { &fwd_txfm_2d_cfg_adst_adst_4, &fwd_txfm_2d_cfg_adst_adst_8,
+ &fwd_txfm_2d_cfg_adst_adst_16, &fwd_txfm_2d_cfg_adst_adst_32 },
+};
+#endif // CONFIG_EXT_TX
+
+TXFM_2D_FLIP_CFG vp10_get_fwd_txfm_cfg(int tx_type, int tx_size) {
+ TXFM_2D_FLIP_CFG cfg;
+ set_flip_cfg(tx_type, &cfg);
+ cfg.cfg = fwd_txfm_cfg_ls[tx_type][tx_size];
+ return cfg;
+}
+
+TXFM_2D_FLIP_CFG vp10_get_fwd_txfm_64x64_cfg(int tx_type) {
+ TXFM_2D_FLIP_CFG cfg;
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg.cfg = &fwd_txfm_2d_cfg_dct_dct_64;
+ cfg.ud_flip = 0;
+ cfg.lr_flip = 0;
+ break;
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ default:
+ cfg.ud_flip = 0;
+ cfg.lr_flip = 0;
+ assert(0);
+ }
+ return cfg;
+}
diff --git a/av1/common/vp10_fwd_txfm2d_cfg.h b/av1/common/vp10_fwd_txfm2d_cfg.h
new file mode 100644
index 0000000..f780b87
--- /dev/null
+++ b/av1/common/vp10_fwd_txfm2d_cfg.h
@@ -0,0 +1,443 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_FWD_TXFM2D_CFG_H_
+#define VP10_FWD_TXFM2D_CFG_H_
+#include "av1/common/enums.h"
+#include "av1/common/vp10_fwd_txfm1d.h"
+// ---------------- config fwd_dct_dct_4 ----------------
+static const int8_t fwd_shift_dct_dct_4[3] = { 2, 0, 0 };
+static const int8_t fwd_stage_range_col_dct_dct_4[4] = { 15, 16, 17, 17 };
+static const int8_t fwd_stage_range_row_dct_dct_4[4] = { 17, 18, 18, 18 };
+static const int8_t fwd_cos_bit_col_dct_dct_4[4] = { 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_dct_4[4] = { 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_4 = {
+ 4, // .txfm_size
+ 4, // .stage_num_col
+ 4, // .stage_num_row
+ // 0, // .log_scale
+ fwd_shift_dct_dct_4, // .shift
+ fwd_stage_range_col_dct_dct_4, // .stage_range_col
+ fwd_stage_range_row_dct_dct_4, // .stage_range_row
+ fwd_cos_bit_col_dct_dct_4, // .cos_bit_col
+ fwd_cos_bit_row_dct_dct_4, // .cos_bit_row
+ TXFM_TYPE_DCT4, // .txfm_type_col
+ TXFM_TYPE_DCT4
+}; // .txfm_type_row
+
+// ---------------- config fwd_dct_dct_8 ----------------
+static const int8_t fwd_shift_dct_dct_8[3] = { 2, -1, 0 };
+static const int8_t fwd_stage_range_col_dct_dct_8[6] = {
+ 15, 16, 17, 18, 18, 18
+};
+static const int8_t fwd_stage_range_row_dct_dct_8[6] = {
+ 17, 18, 19, 19, 19, 19
+};
+static const int8_t fwd_cos_bit_col_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_8 = {
+ 8, // .txfm_size
+ 6, // .stage_num_col
+ 6, // .stage_num_row
+ // 0, // .log_scale
+ fwd_shift_dct_dct_8, // .shift
+ fwd_stage_range_col_dct_dct_8, // .stage_range_col
+ fwd_stage_range_row_dct_dct_8, // .stage_range_row
+ fwd_cos_bit_col_dct_dct_8, // .cos_bit_col
+ fwd_cos_bit_row_dct_dct_8, // .cos_bit_row
+ TXFM_TYPE_DCT8, // .txfm_type_col
+ TXFM_TYPE_DCT8
+}; // .txfm_type_row
+
+// ---------------- config fwd_dct_dct_16 ----------------
+static const int8_t fwd_shift_dct_dct_16[3] = { 2, -2, 0 };
+static const int8_t fwd_stage_range_col_dct_dct_16[8] = { 15, 16, 17, 18,
+ 19, 19, 19, 19 };
+static const int8_t fwd_stage_range_row_dct_dct_16[8] = { 17, 18, 19, 20,
+ 20, 20, 20, 20 };
+static const int8_t fwd_cos_bit_col_dct_dct_16[8] = { 13, 13, 13, 13,
+ 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_dct_16[8] = { 12, 12, 12, 12,
+ 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_16 = {
+ 16, // .txfm_size
+ 8, // .stage_num_col
+ 8, // .stage_num_row
+ // 0, // .log_scale
+ fwd_shift_dct_dct_16, // .shift
+ fwd_stage_range_col_dct_dct_16, // .stage_range_col
+ fwd_stage_range_row_dct_dct_16, // .stage_range_row
+ fwd_cos_bit_col_dct_dct_16, // .cos_bit_col
+ fwd_cos_bit_row_dct_dct_16, // .cos_bit_row
+ TXFM_TYPE_DCT16, // .txfm_type_col
+ TXFM_TYPE_DCT16
+}; // .txfm_type_row
+
+// ---------------- config fwd_dct_dct_32 ----------------
+static const int8_t fwd_shift_dct_dct_32[3] = { 2, -4, 0 };
+static const int8_t fwd_stage_range_col_dct_dct_32[10] = { 15, 16, 17, 18, 19,
+ 20, 20, 20, 20, 20 };
+static const int8_t fwd_stage_range_row_dct_dct_32[10] = { 16, 17, 18, 19, 20,
+ 20, 20, 20, 20, 20 };
+static const int8_t fwd_cos_bit_col_dct_dct_32[10] = { 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12 };
+static const int8_t fwd_cos_bit_row_dct_dct_32[10] = { 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_32 = {
+ 32, // .txfm_size
+ 10, // .stage_num_col
+ 10, // .stage_num_row
+ // 1, // .log_scale
+ fwd_shift_dct_dct_32, // .shift
+ fwd_stage_range_col_dct_dct_32, // .stage_range_col
+ fwd_stage_range_row_dct_dct_32, // .stage_range_row
+ fwd_cos_bit_col_dct_dct_32, // .cos_bit_col
+ fwd_cos_bit_row_dct_dct_32, // .cos_bit_row
+ TXFM_TYPE_DCT32, // .txfm_type_col
+ TXFM_TYPE_DCT32
+}; // .txfm_type_row
+
+// ---------------- config fwd_dct_dct_64 ----------------
+static const int8_t fwd_shift_dct_dct_64[3] = { 2, -2, -2 };
+static const int8_t fwd_stage_range_col_dct_dct_64[12] = {
+ 13, 14, 15, 16, 17, 18, 19, 19, 19, 19, 19, 19
+};
+static const int8_t fwd_stage_range_row_dct_dct_64[12] = {
+ 17, 18, 19, 20, 21, 22, 22, 22, 22, 22, 22, 22
+};
+static const int8_t fwd_cos_bit_col_dct_dct_64[12] = { 15, 15, 15, 15, 15, 14,
+ 13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_dct_64[12] = { 15, 14, 13, 12, 11, 10,
+ 10, 10, 10, 10, 10, 10 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_dct_64 = {
+ 64, // .txfm_size
+ 12, // .stage_num_col
+ 12, // .stage_num_row
+ fwd_shift_dct_dct_64, // .shift
+ fwd_stage_range_col_dct_dct_64, // .stage_range_col
+ fwd_stage_range_row_dct_dct_64, // .stage_range_row
+ fwd_cos_bit_col_dct_dct_64, // .cos_bit_col
+ fwd_cos_bit_row_dct_dct_64, // .cos_bit_row
+ TXFM_TYPE_DCT64, // .txfm_type_col
+ TXFM_TYPE_DCT64
+}; // .txfm_type_row
+
+// ---------------- config fwd_dct_adst_4 ----------------
+static const int8_t fwd_shift_dct_adst_4[3] = { 2, 0, 0 };
+static const int8_t fwd_stage_range_col_dct_adst_4[4] = { 15, 16, 17, 17 };
+static const int8_t fwd_stage_range_row_dct_adst_4[6] = {
+ 17, 17, 17, 18, 18, 18
+};
+static const int8_t fwd_cos_bit_col_dct_adst_4[4] = { 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_4 = {
+ 4, // .txfm_size
+ 4, // .stage_num_col
+ 6, // .stage_num_row
+ // 0, // .log_scale
+ fwd_shift_dct_adst_4, // .shift
+ fwd_stage_range_col_dct_adst_4, // .stage_range_col
+ fwd_stage_range_row_dct_adst_4, // .stage_range_row
+ fwd_cos_bit_col_dct_adst_4, // .cos_bit_col
+ fwd_cos_bit_row_dct_adst_4, // .cos_bit_row
+ TXFM_TYPE_DCT4, // .txfm_type_col
+ TXFM_TYPE_ADST4
+}; // .txfm_type_row
+
+// ---------------- config fwd_dct_adst_8 ----------------
+static const int8_t fwd_shift_dct_adst_8[3] = { 2, -1, 0 };
+static const int8_t fwd_stage_range_col_dct_adst_8[6] = {
+ 15, 16, 17, 18, 18, 18
+};
+static const int8_t fwd_stage_range_row_dct_adst_8[8] = { 17, 17, 17, 18,
+ 18, 19, 19, 19 };
+static const int8_t fwd_cos_bit_col_dct_adst_8[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_adst_8[8] = { 13, 13, 13, 13,
+ 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_8 = {
+ 8, // .txfm_size
+ 6, // .stage_num_col
+ 8, // .stage_num_row
+ // 0, // .log_scale
+ fwd_shift_dct_adst_8, // .shift
+ fwd_stage_range_col_dct_adst_8, // .stage_range_col
+ fwd_stage_range_row_dct_adst_8, // .stage_range_row
+ fwd_cos_bit_col_dct_adst_8, // .cos_bit_col
+ fwd_cos_bit_row_dct_adst_8, // .cos_bit_row
+ TXFM_TYPE_DCT8, // .txfm_type_col
+ TXFM_TYPE_ADST8
+}; // .txfm_type_row
+
+// ---------------- config fwd_dct_adst_16 ----------------
+static const int8_t fwd_shift_dct_adst_16[3] = { 2, -2, 0 };
+static const int8_t fwd_stage_range_col_dct_adst_16[8] = { 15, 16, 17, 18,
+ 19, 19, 19, 19 };
+static const int8_t fwd_stage_range_row_dct_adst_16[10] = {
+ 17, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+static const int8_t fwd_cos_bit_col_dct_adst_16[8] = { 13, 13, 13, 13,
+ 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_dct_adst_16[10] = { 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_16 = {
+ 16, // .txfm_size
+ 8, // .stage_num_col
+ 10, // .stage_num_row
+ // 0, // .log_scale
+ fwd_shift_dct_adst_16, // .shift
+ fwd_stage_range_col_dct_adst_16, // .stage_range_col
+ fwd_stage_range_row_dct_adst_16, // .stage_range_row
+ fwd_cos_bit_col_dct_adst_16, // .cos_bit_col
+ fwd_cos_bit_row_dct_adst_16, // .cos_bit_row
+ TXFM_TYPE_DCT16, // .txfm_type_col
+ TXFM_TYPE_ADST16
+}; // .txfm_type_row
+
+// ---------------- config fwd_dct_adst_32 ----------------
+static const int8_t fwd_shift_dct_adst_32[3] = { 2, -4, 0 };
+static const int8_t fwd_stage_range_col_dct_adst_32[10] = {
+ 15, 16, 17, 18, 19, 20, 20, 20, 20, 20
+};
+static const int8_t fwd_stage_range_row_dct_adst_32[12] = {
+ 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+static const int8_t fwd_cos_bit_col_dct_adst_32[10] = { 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12 };
+static const int8_t fwd_cos_bit_row_dct_adst_32[12] = {
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
+};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_dct_adst_32 = {
+ 32, // .txfm_size
+ 10, // .stage_num_col
+ 12, // .stage_num_row
+ // 1, // .log_scale
+ fwd_shift_dct_adst_32, // .shift
+ fwd_stage_range_col_dct_adst_32, // .stage_range_col
+ fwd_stage_range_row_dct_adst_32, // .stage_range_row
+ fwd_cos_bit_col_dct_adst_32, // .cos_bit_col
+ fwd_cos_bit_row_dct_adst_32, // .cos_bit_row
+ TXFM_TYPE_DCT32, // .txfm_type_col
+ TXFM_TYPE_ADST32
+}; // .txfm_type_row
+// ---------------- config fwd_adst_adst_4 ----------------
+static const int8_t fwd_shift_adst_adst_4[3] = { 2, 0, 0 };
+static const int8_t fwd_stage_range_col_adst_adst_4[6] = { 15, 15, 16,
+ 17, 17, 17 };
+static const int8_t fwd_stage_range_row_adst_adst_4[6] = { 17, 17, 17,
+ 18, 18, 18 };
+static const int8_t fwd_cos_bit_col_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_4 = {
+ 4, // .txfm_size
+ 6, // .stage_num_col
+ 6, // .stage_num_row
+ // 0, // .log_scale
+ fwd_shift_adst_adst_4, // .shift
+ fwd_stage_range_col_adst_adst_4, // .stage_range_col
+ fwd_stage_range_row_adst_adst_4, // .stage_range_row
+ fwd_cos_bit_col_adst_adst_4, // .cos_bit_col
+ fwd_cos_bit_row_adst_adst_4, // .cos_bit_row
+ TXFM_TYPE_ADST4, // .txfm_type_col
+ TXFM_TYPE_ADST4
+}; // .txfm_type_row
+
+// ---------------- config fwd_adst_adst_8 ----------------
+static const int8_t fwd_shift_adst_adst_8[3] = { 2, -1, 0 };
+static const int8_t fwd_stage_range_col_adst_adst_8[8] = { 15, 15, 16, 17,
+ 17, 18, 18, 18 };
+static const int8_t fwd_stage_range_row_adst_adst_8[8] = { 17, 17, 17, 18,
+ 18, 19, 19, 19 };
+static const int8_t fwd_cos_bit_col_adst_adst_8[8] = { 13, 13, 13, 13,
+ 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_adst_8[8] = { 13, 13, 13, 13,
+ 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_8 = {
+ 8, // .txfm_size
+ 8, // .stage_num_col
+ 8, // .stage_num_row
+ // 0, // .log_scale
+ fwd_shift_adst_adst_8, // .shift
+ fwd_stage_range_col_adst_adst_8, // .stage_range_col
+ fwd_stage_range_row_adst_adst_8, // .stage_range_row
+ fwd_cos_bit_col_adst_adst_8, // .cos_bit_col
+ fwd_cos_bit_row_adst_adst_8, // .cos_bit_row
+ TXFM_TYPE_ADST8, // .txfm_type_col
+ TXFM_TYPE_ADST8
+}; // .txfm_type_row
+
+// ---------------- config fwd_adst_adst_16 ----------------
+static const int8_t fwd_shift_adst_adst_16[3] = { 2, -2, 0 };
+static const int8_t fwd_stage_range_col_adst_adst_16[10] = {
+ 15, 15, 16, 17, 17, 18, 18, 19, 19, 19
+};
+static const int8_t fwd_stage_range_row_adst_adst_16[10] = {
+ 17, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+static const int8_t fwd_cos_bit_col_adst_adst_16[10] = { 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_adst_16[10] = { 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_16 = {
+ 16, // .txfm_size
+ 10, // .stage_num_col
+ 10, // .stage_num_row
+ // 0, // .log_scale
+ fwd_shift_adst_adst_16, // .shift
+ fwd_stage_range_col_adst_adst_16, // .stage_range_col
+ fwd_stage_range_row_adst_adst_16, // .stage_range_row
+ fwd_cos_bit_col_adst_adst_16, // .cos_bit_col
+ fwd_cos_bit_row_adst_adst_16, // .cos_bit_row
+ TXFM_TYPE_ADST16, // .txfm_type_col
+ TXFM_TYPE_ADST16
+}; // .txfm_type_row
+
+// ---------------- config fwd_adst_adst_32 ----------------
+static const int8_t fwd_shift_adst_adst_32[3] = { 2, -4, 0 };
+static const int8_t fwd_stage_range_col_adst_adst_32[12] = {
+ 15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+static const int8_t fwd_stage_range_row_adst_adst_32[12] = {
+ 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+static const int8_t fwd_cos_bit_col_adst_adst_32[12] = {
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
+};
+static const int8_t fwd_cos_bit_row_adst_adst_32[12] = {
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
+};
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_adst_32 = {
+ 32, // .txfm_size
+ 12, // .stage_num_col
+ 12, // .stage_num_row
+ // 1, // .log_scale
+ fwd_shift_adst_adst_32, // .shift
+ fwd_stage_range_col_adst_adst_32, // .stage_range_col
+ fwd_stage_range_row_adst_adst_32, // .stage_range_row
+ fwd_cos_bit_col_adst_adst_32, // .cos_bit_col
+ fwd_cos_bit_row_adst_adst_32, // .cos_bit_row
+ TXFM_TYPE_ADST32, // .txfm_type_col
+ TXFM_TYPE_ADST32
+}; // .txfm_type_row
+
+// ---------------- config fwd_adst_dct_4 ----------------
+static const int8_t fwd_shift_adst_dct_4[3] = { 2, 0, 0 };
+static const int8_t fwd_stage_range_col_adst_dct_4[6] = {
+ 15, 15, 16, 17, 17, 17
+};
+static const int8_t fwd_stage_range_row_adst_dct_4[4] = { 17, 18, 18, 18 };
+static const int8_t fwd_cos_bit_col_adst_dct_4[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_dct_4[4] = { 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_4 = {
+ 4, // .txfm_size
+ 6, // .stage_num_col
+ 4, // .stage_num_row
+ // 0, // .log_scale
+ fwd_shift_adst_dct_4, // .shift
+ fwd_stage_range_col_adst_dct_4, // .stage_range_col
+ fwd_stage_range_row_adst_dct_4, // .stage_range_row
+ fwd_cos_bit_col_adst_dct_4, // .cos_bit_col
+ fwd_cos_bit_row_adst_dct_4, // .cos_bit_row
+ TXFM_TYPE_ADST4, // .txfm_type_col
+ TXFM_TYPE_DCT4
+}; // .txfm_type_row
+
+// ---------------- config fwd_adst_dct_8 ----------------
+static const int8_t fwd_shift_adst_dct_8[3] = { 2, -1, 0 };
+static const int8_t fwd_stage_range_col_adst_dct_8[8] = { 15, 15, 16, 17,
+ 17, 18, 18, 18 };
+static const int8_t fwd_stage_range_row_adst_dct_8[6] = {
+ 17, 18, 19, 19, 19, 19
+};
+static const int8_t fwd_cos_bit_col_adst_dct_8[8] = { 13, 13, 13, 13,
+ 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_8 = {
+ 8, // .txfm_size
+ 8, // .stage_num_col
+ 6, // .stage_num_row
+ // 0, // .log_scale
+ fwd_shift_adst_dct_8, // .shift
+ fwd_stage_range_col_adst_dct_8, // .stage_range_col
+ fwd_stage_range_row_adst_dct_8, // .stage_range_row
+ fwd_cos_bit_col_adst_dct_8, // .cos_bit_col
+ fwd_cos_bit_row_adst_dct_8, // .cos_bit_row
+ TXFM_TYPE_ADST8, // .txfm_type_col
+ TXFM_TYPE_DCT8
+}; // .txfm_type_row
+
+// ---------------- config fwd_adst_dct_16 ----------------
+static const int8_t fwd_shift_adst_dct_16[3] = { 2, -2, 0 };
+static const int8_t fwd_stage_range_col_adst_dct_16[10] = {
+ 15, 15, 16, 17, 17, 18, 18, 19, 19, 19
+};
+static const int8_t fwd_stage_range_row_adst_dct_16[8] = { 17, 18, 19, 20,
+ 20, 20, 20, 20 };
+static const int8_t fwd_cos_bit_col_adst_dct_16[10] = { 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13 };
+static const int8_t fwd_cos_bit_row_adst_dct_16[8] = { 12, 12, 12, 12,
+ 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_16 = {
+ 16, // .txfm_size
+ 10, // .stage_num_col
+ 8, // .stage_num_row
+ // 0, // .log_scale
+ fwd_shift_adst_dct_16, // .shift
+ fwd_stage_range_col_adst_dct_16, // .stage_range_col
+ fwd_stage_range_row_adst_dct_16, // .stage_range_row
+ fwd_cos_bit_col_adst_dct_16, // .cos_bit_col
+ fwd_cos_bit_row_adst_dct_16, // .cos_bit_row
+ TXFM_TYPE_ADST16, // .txfm_type_col
+ TXFM_TYPE_DCT16
+}; // .txfm_type_row
+
+// ---------------- config fwd_adst_dct_32 ----------------
+static const int8_t fwd_shift_adst_dct_32[3] = { 2, -4, 0 };
+static const int8_t fwd_stage_range_col_adst_dct_32[12] = {
+ 15, 15, 16, 17, 17, 18, 18, 19, 19, 20, 20, 20
+};
+static const int8_t fwd_stage_range_row_adst_dct_32[10] = {
+ 16, 17, 18, 19, 20, 20, 20, 20, 20, 20
+};
+static const int8_t fwd_cos_bit_col_adst_dct_32[12] = {
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
+};
+static const int8_t fwd_cos_bit_row_adst_dct_32[10] = { 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG fwd_txfm_2d_cfg_adst_dct_32 = {
+ 32, // .txfm_size
+ 12, // .stage_num_col
+ 10, // .stage_num_row
+ // 1, // .log_scale
+ fwd_shift_adst_dct_32, // .shift
+ fwd_stage_range_col_adst_dct_32, // .stage_range_col
+ fwd_stage_range_row_adst_dct_32, // .stage_range_row
+ fwd_cos_bit_col_adst_dct_32, // .cos_bit_col
+ fwd_cos_bit_row_adst_dct_32, // .cos_bit_row
+ TXFM_TYPE_ADST32, // .txfm_type_col
+ TXFM_TYPE_DCT32
+}; // .txfm_type_row
+#endif // VP10_FWD_TXFM2D_CFG_H_
diff --git a/av1/common/vp10_inv_txfm.c b/av1/common/vp10_inv_txfm.c
new file mode 100644
index 0000000..a74de09
--- /dev/null
+++ b/av1/common/vp10_inv_txfm.c
@@ -0,0 +1,2472 @@
+/*
+ *
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <string.h>
+
+#include "./vp10_rtcd.h"
+#include "av1/common/vp10_inv_txfm.h"
+
+void vp10_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+ /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
+ 0.5 shifts per pixel. */
+ int i;
+ tran_low_t output[16];
+ tran_high_t a1, b1, c1, d1, e1;
+ const tran_low_t *ip = input;
+ tran_low_t *op = output;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0] >> UNIT_QUANT_SHIFT;
+ c1 = ip[1] >> UNIT_QUANT_SHIFT;
+ d1 = ip[2] >> UNIT_QUANT_SHIFT;
+ b1 = ip[3] >> UNIT_QUANT_SHIFT;
+ a1 += c1;
+ d1 -= b1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= b1;
+ d1 += c1;
+ op[0] = WRAPLOW(a1);
+ op[1] = WRAPLOW(b1);
+ op[2] = WRAPLOW(c1);
+ op[3] = WRAPLOW(d1);
+ ip += 4;
+ op += 4;
+ }
+
+ ip = output;
+ for (i = 0; i < 4; i++) {
+ a1 = ip[4 * 0];
+ c1 = ip[4 * 1];
+ d1 = ip[4 * 2];
+ b1 = ip[4 * 3];
+ a1 += c1;
+ d1 -= b1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= b1;
+ d1 += c1;
+ dest[stride * 0] = clip_pixel_add(dest[stride * 0], WRAPLOW(a1));
+ dest[stride * 1] = clip_pixel_add(dest[stride * 1], WRAPLOW(b1));
+ dest[stride * 2] = clip_pixel_add(dest[stride * 2], WRAPLOW(c1));
+ dest[stride * 3] = clip_pixel_add(dest[stride * 3], WRAPLOW(d1));
+
+ ip++;
+ dest++;
+ }
+}
+
+void vp10_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest,
+ int dest_stride) {
+ int i;
+ tran_high_t a1, e1;
+ tran_low_t tmp[4];
+ const tran_low_t *ip = in;
+ tran_low_t *op = tmp;
+
+ a1 = ip[0] >> UNIT_QUANT_SHIFT;
+ e1 = a1 >> 1;
+ a1 -= e1;
+ op[0] = WRAPLOW(a1);
+ op[1] = op[2] = op[3] = WRAPLOW(e1);
+
+ ip = tmp;
+ for (i = 0; i < 4; i++) {
+ e1 = ip[0] >> 1;
+ a1 = ip[0] - e1;
+ dest[dest_stride * 0] = clip_pixel_add(dest[dest_stride * 0], a1);
+ dest[dest_stride * 1] = clip_pixel_add(dest[dest_stride * 1], e1);
+ dest[dest_stride * 2] = clip_pixel_add(dest[dest_stride * 2], e1);
+ dest[dest_stride * 3] = clip_pixel_add(dest[dest_stride * 3], e1);
+ ip++;
+ dest++;
+ }
+}
+
+void vp10_idct4_c(const tran_low_t *input, tran_low_t *output) {
+ tran_low_t step[4];
+ tran_high_t temp1, temp2;
+ // stage 1
+ temp1 = (input[0] + input[2]) * cospi_16_64;
+ temp2 = (input[0] - input[2]) * cospi_16_64;
+ step[0] = WRAPLOW(dct_const_round_shift(temp1));
+ step[1] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
+ temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
+ step[2] = WRAPLOW(dct_const_round_shift(temp1));
+ step[3] = WRAPLOW(dct_const_round_shift(temp2));
+
+ // stage 2
+ output[0] = WRAPLOW(step[0] + step[3]);
+ output[1] = WRAPLOW(step[1] + step[2]);
+ output[2] = WRAPLOW(step[1] - step[2]);
+ output[3] = WRAPLOW(step[0] - step[3]);
+}
+
+void vp10_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+ tran_low_t out[4 * 4];
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[4], temp_out[4];
+
+ // Rows
+ for (i = 0; i < 4; ++i) {
+ vp10_idct4_c(input, outptr);
+ input += 4;
+ outptr += 4;
+ }
+
+ // Columns
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
+ vp10_idct4_c(temp_in, temp_out);
+ for (j = 0; j < 4; ++j) {
+ dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+ ROUND_POWER_OF_TWO(temp_out[j], 4));
+ }
+ }
+}
+
+void vp10_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
+ int dest_stride) {
+ int i;
+ tran_high_t a1;
+ tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+ out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
+ a1 = ROUND_POWER_OF_TWO(out, 4);
+
+ for (i = 0; i < 4; i++) {
+ dest[0] = clip_pixel_add(dest[0], a1);
+ dest[1] = clip_pixel_add(dest[1], a1);
+ dest[2] = clip_pixel_add(dest[2], a1);
+ dest[3] = clip_pixel_add(dest[3], a1);
+ dest += dest_stride;
+ }
+}
+
+void vp10_idct8_c(const tran_low_t *input, tran_low_t *output) {
+ tran_low_t step1[8], step2[8];
+ tran_high_t temp1, temp2;
+ // stage 1
+ step1[0] = input[0];
+ step1[2] = input[4];
+ step1[1] = input[2];
+ step1[3] = input[6];
+ temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
+ temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
+ step1[4] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[7] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
+ temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+
+ // stage 2
+ temp1 = (step1[0] + step1[2]) * cospi_16_64;
+ temp2 = (step1[0] - step1[2]) * cospi_16_64;
+ step2[0] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[1] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[3] = WRAPLOW(dct_const_round_shift(temp2));
+ step2[4] = WRAPLOW(step1[4] + step1[5]);
+ step2[5] = WRAPLOW(step1[4] - step1[5]);
+ step2[6] = WRAPLOW(-step1[6] + step1[7]);
+ step2[7] = WRAPLOW(step1[6] + step1[7]);
+
+ // stage 3
+ step1[0] = WRAPLOW(step2[0] + step2[3]);
+ step1[1] = WRAPLOW(step2[1] + step2[2]);
+ step1[2] = WRAPLOW(step2[1] - step2[2]);
+ step1[3] = WRAPLOW(step2[0] - step2[3]);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+ step1[7] = step2[7];
+
+ // stage 4
+ output[0] = WRAPLOW(step1[0] + step1[7]);
+ output[1] = WRAPLOW(step1[1] + step1[6]);
+ output[2] = WRAPLOW(step1[2] + step1[5]);
+ output[3] = WRAPLOW(step1[3] + step1[4]);
+ output[4] = WRAPLOW(step1[3] - step1[4]);
+ output[5] = WRAPLOW(step1[2] - step1[5]);
+ output[6] = WRAPLOW(step1[1] - step1[6]);
+ output[7] = WRAPLOW(step1[0] - step1[7]);
+}
+
+void vp10_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+ tran_low_t out[8 * 8];
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[8], temp_out[8];
+
+ // First transform rows
+ for (i = 0; i < 8; ++i) {
+ vp10_idct8_c(input, outptr);
+ input += 8;
+ outptr += 8;
+ }
+
+ // Then transform columns
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+ vp10_idct8_c(temp_in, temp_out);
+ for (j = 0; j < 8; ++j) {
+ dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+ ROUND_POWER_OF_TWO(temp_out[j], 5));
+ }
+ }
+}
+
+void vp10_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+ int i, j;
+ tran_high_t a1;
+ tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+ out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
+ a1 = ROUND_POWER_OF_TWO(out, 5);
+ for (j = 0; j < 8; ++j) {
+ for (i = 0; i < 8; ++i) dest[i] = clip_pixel_add(dest[i], a1);
+ dest += stride;
+ }
+}
+
+void vp10_iadst4_c(const tran_low_t *input, tran_low_t *output) {
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+ tran_low_t x0 = input[0];
+ tran_low_t x1 = input[1];
+ tran_low_t x2 = input[2];
+ tran_low_t x3 = input[3];
+
+ if (!(x0 | x1 | x2 | x3)) {
+ output[0] = output[1] = output[2] = output[3] = 0;
+ return;
+ }
+
+ s0 = sinpi_1_9 * x0;
+ s1 = sinpi_2_9 * x0;
+ s2 = sinpi_3_9 * x1;
+ s3 = sinpi_4_9 * x2;
+ s4 = sinpi_1_9 * x2;
+ s5 = sinpi_2_9 * x3;
+ s6 = sinpi_4_9 * x3;
+ s7 = WRAPLOW(x0 - x2 + x3);
+
+ s0 = s0 + s3 + s5;
+ s1 = s1 - s4 - s6;
+ s3 = s2;
+ s2 = sinpi_3_9 * s7;
+
+ // 1-D transform scaling factor is sqrt(2).
+ // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
+ // + 1b (addition) = 29b.
+ // Hence the output bit depth is 15b.
+ output[0] = WRAPLOW(dct_const_round_shift(s0 + s3));
+ output[1] = WRAPLOW(dct_const_round_shift(s1 + s3));
+ output[2] = WRAPLOW(dct_const_round_shift(s2));
+ output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3));
+}
+
+void vp10_iadst8_c(const tran_low_t *input, tran_low_t *output) {
+ int s0, s1, s2, s3, s4, s5, s6, s7;
+
+ tran_high_t x0 = input[7];
+ tran_high_t x1 = input[0];
+ tran_high_t x2 = input[5];
+ tran_high_t x3 = input[2];
+ tran_high_t x4 = input[3];
+ tran_high_t x5 = input[4];
+ tran_high_t x6 = input[1];
+ tran_high_t x7 = input[6];
+
+ if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
+ output[0] = output[1] = output[2] = output[3] = output[4] = output[5] =
+ output[6] = output[7] = 0;
+ return;
+ }
+
+ // stage 1
+ s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1);
+ s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1);
+ s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3);
+ s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3);
+ s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5);
+ s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5);
+ s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7);
+ s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7);
+
+ x0 = WRAPLOW(dct_const_round_shift(s0 + s4));
+ x1 = WRAPLOW(dct_const_round_shift(s1 + s5));
+ x2 = WRAPLOW(dct_const_round_shift(s2 + s6));
+ x3 = WRAPLOW(dct_const_round_shift(s3 + s7));
+ x4 = WRAPLOW(dct_const_round_shift(s0 - s4));
+ x5 = WRAPLOW(dct_const_round_shift(s1 - s5));
+ x6 = WRAPLOW(dct_const_round_shift(s2 - s6));
+ x7 = WRAPLOW(dct_const_round_shift(s3 - s7));
+
+ // stage 2
+ s0 = (int)x0;
+ s1 = (int)x1;
+ s2 = (int)x2;
+ s3 = (int)x3;
+ s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5);
+ s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5);
+ s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7);
+ s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7);
+
+ x0 = WRAPLOW(s0 + s2);
+ x1 = WRAPLOW(s1 + s3);
+ x2 = WRAPLOW(s0 - s2);
+ x3 = WRAPLOW(s1 - s3);
+ x4 = WRAPLOW(dct_const_round_shift(s4 + s6));
+ x5 = WRAPLOW(dct_const_round_shift(s5 + s7));
+ x6 = WRAPLOW(dct_const_round_shift(s4 - s6));
+ x7 = WRAPLOW(dct_const_round_shift(s5 - s7));
+
+ // stage 3
+ s2 = (int)(cospi_16_64 * (x2 + x3));
+ s3 = (int)(cospi_16_64 * (x2 - x3));
+ s6 = (int)(cospi_16_64 * (x6 + x7));
+ s7 = (int)(cospi_16_64 * (x6 - x7));
+
+ x2 = WRAPLOW(dct_const_round_shift(s2));
+ x3 = WRAPLOW(dct_const_round_shift(s3));
+ x6 = WRAPLOW(dct_const_round_shift(s6));
+ x7 = WRAPLOW(dct_const_round_shift(s7));
+
+ output[0] = WRAPLOW(x0);
+ output[1] = WRAPLOW(-x4);
+ output[2] = WRAPLOW(x6);
+ output[3] = WRAPLOW(-x2);
+ output[4] = WRAPLOW(x3);
+ output[5] = WRAPLOW(-x7);
+ output[6] = WRAPLOW(x5);
+ output[7] = WRAPLOW(-x1);
+}
+
+void vp10_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+ tran_low_t out[8 * 8] = { 0 };
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[8], temp_out[8];
+
+ // First transform rows
+ // only first 4 row has non-zero coefs
+ for (i = 0; i < 4; ++i) {
+ vp10_idct8_c(input, outptr);
+ input += 8;
+ outptr += 8;
+ }
+
+ // Then transform columns
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+ vp10_idct8_c(temp_in, temp_out);
+ for (j = 0; j < 8; ++j) {
+ dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+ ROUND_POWER_OF_TWO(temp_out[j], 5));
+ }
+ }
+}
+
+void vp10_idct16_c(const tran_low_t *input, tran_low_t *output) {
+ tran_low_t step1[16], step2[16];
+ tran_high_t temp1, temp2;
+
+ // stage 1
+ step1[0] = input[0 / 2];
+ step1[1] = input[16 / 2];
+ step1[2] = input[8 / 2];
+ step1[3] = input[24 / 2];
+ step1[4] = input[4 / 2];
+ step1[5] = input[20 / 2];
+ step1[6] = input[12 / 2];
+ step1[7] = input[28 / 2];
+ step1[8] = input[2 / 2];
+ step1[9] = input[18 / 2];
+ step1[10] = input[10 / 2];
+ step1[11] = input[26 / 2];
+ step1[12] = input[6 / 2];
+ step1[13] = input[22 / 2];
+ step1[14] = input[14 / 2];
+ step1[15] = input[30 / 2];
+
+ // stage 2
+ step2[0] = step1[0];
+ step2[1] = step1[1];
+ step2[2] = step1[2];
+ step2[3] = step1[3];
+ step2[4] = step1[4];
+ step2[5] = step1[5];
+ step2[6] = step1[6];
+ step2[7] = step1[7];
+
+ temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+ temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+ step2[8] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[15] = WRAPLOW(dct_const_round_shift(temp2));
+
+ temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+ temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+ step2[9] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[14] = WRAPLOW(dct_const_round_shift(temp2));
+
+ temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+ temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2));
+
+ temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+ temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+ step2[11] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[12] = WRAPLOW(dct_const_round_shift(temp2));
+
+ // stage 3
+ step1[0] = step2[0];
+ step1[1] = step2[1];
+ step1[2] = step2[2];
+ step1[3] = step2[3];
+
+ temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+ temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+ step1[4] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[7] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+ temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+
+ step1[8] = WRAPLOW(step2[8] + step2[9]);
+ step1[9] = WRAPLOW(step2[8] - step2[9]);
+ step1[10] = WRAPLOW(-step2[10] + step2[11]);
+ step1[11] = WRAPLOW(step2[10] + step2[11]);
+ step1[12] = WRAPLOW(step2[12] + step2[13]);
+ step1[13] = WRAPLOW(step2[12] - step2[13]);
+ step1[14] = WRAPLOW(-step2[14] + step2[15]);
+ step1[15] = WRAPLOW(step2[14] + step2[15]);
+
+ // stage 4
+ temp1 = (step1[0] + step1[1]) * cospi_16_64;
+ temp2 = (step1[0] - step1[1]) * cospi_16_64;
+ step2[0] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[1] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[3] = WRAPLOW(dct_const_round_shift(temp2));
+ step2[4] = WRAPLOW(step1[4] + step1[5]);
+ step2[5] = WRAPLOW(step1[4] - step1[5]);
+ step2[6] = WRAPLOW(-step1[6] + step1[7]);
+ step2[7] = WRAPLOW(step1[6] + step1[7]);
+
+ step2[8] = step1[8];
+ step2[15] = step1[15];
+ temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+ temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+ step2[9] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[14] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+ temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2));
+ step2[11] = step1[11];
+ step2[12] = step1[12];
+
+ // stage 5
+ step1[0] = WRAPLOW(step2[0] + step2[3]);
+ step1[1] = WRAPLOW(step2[1] + step2[2]);
+ step1[2] = WRAPLOW(step2[1] - step2[2]);
+ step1[3] = WRAPLOW(step2[0] - step2[3]);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+ step1[7] = step2[7];
+
+ step1[8] = WRAPLOW(step2[8] + step2[11]);
+ step1[9] = WRAPLOW(step2[9] + step2[10]);
+ step1[10] = WRAPLOW(step2[9] - step2[10]);
+ step1[11] = WRAPLOW(step2[8] - step2[11]);
+ step1[12] = WRAPLOW(-step2[12] + step2[15]);
+ step1[13] = WRAPLOW(-step2[13] + step2[14]);
+ step1[14] = WRAPLOW(step2[13] + step2[14]);
+ step1[15] = WRAPLOW(step2[12] + step2[15]);
+
+ // stage 6
+ step2[0] = WRAPLOW(step1[0] + step1[7]);
+ step2[1] = WRAPLOW(step1[1] + step1[6]);
+ step2[2] = WRAPLOW(step1[2] + step1[5]);
+ step2[3] = WRAPLOW(step1[3] + step1[4]);
+ step2[4] = WRAPLOW(step1[3] - step1[4]);
+ step2[5] = WRAPLOW(step1[2] - step1[5]);
+ step2[6] = WRAPLOW(step1[1] - step1[6]);
+ step2[7] = WRAPLOW(step1[0] - step1[7]);
+ step2[8] = step1[8];
+ step2[9] = step1[9];
+ temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+ temp2 = (step1[10] + step1[13]) * cospi_16_64;
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+ temp2 = (step1[11] + step1[12]) * cospi_16_64;
+ step2[11] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[12] = WRAPLOW(dct_const_round_shift(temp2));
+ step2[14] = step1[14];
+ step2[15] = step1[15];
+
+ // stage 7
+ output[0] = WRAPLOW(step2[0] + step2[15]);
+ output[1] = WRAPLOW(step2[1] + step2[14]);
+ output[2] = WRAPLOW(step2[2] + step2[13]);
+ output[3] = WRAPLOW(step2[3] + step2[12]);
+ output[4] = WRAPLOW(step2[4] + step2[11]);
+ output[5] = WRAPLOW(step2[5] + step2[10]);
+ output[6] = WRAPLOW(step2[6] + step2[9]);
+ output[7] = WRAPLOW(step2[7] + step2[8]);
+ output[8] = WRAPLOW(step2[7] - step2[8]);
+ output[9] = WRAPLOW(step2[6] - step2[9]);
+ output[10] = WRAPLOW(step2[5] - step2[10]);
+ output[11] = WRAPLOW(step2[4] - step2[11]);
+ output[12] = WRAPLOW(step2[3] - step2[12]);
+ output[13] = WRAPLOW(step2[2] - step2[13]);
+ output[14] = WRAPLOW(step2[1] - step2[14]);
+ output[15] = WRAPLOW(step2[0] - step2[15]);
+}
+
+void vp10_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
+ int stride) {
+ tran_low_t out[16 * 16];
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[16], temp_out[16];
+
+ // First transform rows
+ for (i = 0; i < 16; ++i) {
+ vp10_idct16_c(input, outptr);
+ input += 16;
+ outptr += 16;
+ }
+
+ // Then transform columns
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+ vp10_idct16_c(temp_in, temp_out);
+ for (j = 0; j < 16; ++j) {
+ dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+ ROUND_POWER_OF_TWO(temp_out[j], 6));
+ }
+ }
+}
+
+void vp10_iadst16_c(const tran_low_t *input, tran_low_t *output) {
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
+ tran_high_t s9, s10, s11, s12, s13, s14, s15;
+
+ tran_high_t x0 = input[15];
+ tran_high_t x1 = input[0];
+ tran_high_t x2 = input[13];
+ tran_high_t x3 = input[2];
+ tran_high_t x4 = input[11];
+ tran_high_t x5 = input[4];
+ tran_high_t x6 = input[9];
+ tran_high_t x7 = input[6];
+ tran_high_t x8 = input[7];
+ tran_high_t x9 = input[8];
+ tran_high_t x10 = input[5];
+ tran_high_t x11 = input[10];
+ tran_high_t x12 = input[3];
+ tran_high_t x13 = input[12];
+ tran_high_t x14 = input[1];
+ tran_high_t x15 = input[14];
+
+ if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 |
+ x13 | x14 | x15)) {
+ output[0] = output[1] = output[2] = output[3] = output[4] = output[5] =
+ output[6] = output[7] = output[8] = output[9] = output[10] =
+ output[11] = output[12] = output[13] = output[14] = output[15] = 0;
+ return;
+ }
+
+ // stage 1
+ s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
+ s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
+ s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
+ s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
+ s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
+ s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
+ s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
+ s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
+ s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
+ s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
+ s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
+ s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
+ s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
+ s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
+ s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
+ s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
+
+ x0 = WRAPLOW(dct_const_round_shift(s0 + s8));
+ x1 = WRAPLOW(dct_const_round_shift(s1 + s9));
+ x2 = WRAPLOW(dct_const_round_shift(s2 + s10));
+ x3 = WRAPLOW(dct_const_round_shift(s3 + s11));
+ x4 = WRAPLOW(dct_const_round_shift(s4 + s12));
+ x5 = WRAPLOW(dct_const_round_shift(s5 + s13));
+ x6 = WRAPLOW(dct_const_round_shift(s6 + s14));
+ x7 = WRAPLOW(dct_const_round_shift(s7 + s15));
+ x8 = WRAPLOW(dct_const_round_shift(s0 - s8));
+ x9 = WRAPLOW(dct_const_round_shift(s1 - s9));
+ x10 = WRAPLOW(dct_const_round_shift(s2 - s10));
+ x11 = WRAPLOW(dct_const_round_shift(s3 - s11));
+ x12 = WRAPLOW(dct_const_round_shift(s4 - s12));
+ x13 = WRAPLOW(dct_const_round_shift(s5 - s13));
+ x14 = WRAPLOW(dct_const_round_shift(s6 - s14));
+ x15 = WRAPLOW(dct_const_round_shift(s7 - s15));
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4;
+ s5 = x5;
+ s6 = x6;
+ s7 = x7;
+ s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
+ s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
+ s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
+ s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
+ s12 = -x12 * cospi_28_64 + x13 * cospi_4_64;
+ s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
+ s14 = -x14 * cospi_12_64 + x15 * cospi_20_64;
+ s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
+
+ x0 = WRAPLOW(s0 + s4);
+ x1 = WRAPLOW(s1 + s5);
+ x2 = WRAPLOW(s2 + s6);
+ x3 = WRAPLOW(s3 + s7);
+ x4 = WRAPLOW(s0 - s4);
+ x5 = WRAPLOW(s1 - s5);
+ x6 = WRAPLOW(s2 - s6);
+ x7 = WRAPLOW(s3 - s7);
+ x8 = WRAPLOW(dct_const_round_shift(s8 + s12));
+ x9 = WRAPLOW(dct_const_round_shift(s9 + s13));
+ x10 = WRAPLOW(dct_const_round_shift(s10 + s14));
+ x11 = WRAPLOW(dct_const_round_shift(s11 + s15));
+ x12 = WRAPLOW(dct_const_round_shift(s8 - s12));
+ x13 = WRAPLOW(dct_const_round_shift(s9 - s13));
+ x14 = WRAPLOW(dct_const_round_shift(s10 - s14));
+ x15 = WRAPLOW(dct_const_round_shift(s11 - s15));
+
+ // stage 3
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
+ s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
+ s6 = -x6 * cospi_24_64 + x7 * cospi_8_64;
+ s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
+ s8 = x8;
+ s9 = x9;
+ s10 = x10;
+ s11 = x11;
+ s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
+ s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
+ s14 = -x14 * cospi_24_64 + x15 * cospi_8_64;
+ s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
+
+ x0 = WRAPLOW(s0 + s2);
+ x1 = WRAPLOW(s1 + s3);
+ x2 = WRAPLOW(s0 - s2);
+ x3 = WRAPLOW(s1 - s3);
+ x4 = WRAPLOW(dct_const_round_shift(s4 + s6));
+ x5 = WRAPLOW(dct_const_round_shift(s5 + s7));
+ x6 = WRAPLOW(dct_const_round_shift(s4 - s6));
+ x7 = WRAPLOW(dct_const_round_shift(s5 - s7));
+ x8 = WRAPLOW(s8 + s10);
+ x9 = WRAPLOW(s9 + s11);
+ x10 = WRAPLOW(s8 - s10);
+ x11 = WRAPLOW(s9 - s11);
+ x12 = WRAPLOW(dct_const_round_shift(s12 + s14));
+ x13 = WRAPLOW(dct_const_round_shift(s13 + s15));
+ x14 = WRAPLOW(dct_const_round_shift(s12 - s14));
+ x15 = WRAPLOW(dct_const_round_shift(s13 - s15));
+
+ // stage 4
+ s2 = (-cospi_16_64) * (x2 + x3);
+ s3 = cospi_16_64 * (x2 - x3);
+ s6 = cospi_16_64 * (x6 + x7);
+ s7 = cospi_16_64 * (-x6 + x7);
+ s10 = cospi_16_64 * (x10 + x11);
+ s11 = cospi_16_64 * (-x10 + x11);
+ s14 = (-cospi_16_64) * (x14 + x15);
+ s15 = cospi_16_64 * (x14 - x15);
+
+ x2 = WRAPLOW(dct_const_round_shift(s2));
+ x3 = WRAPLOW(dct_const_round_shift(s3));
+ x6 = WRAPLOW(dct_const_round_shift(s6));
+ x7 = WRAPLOW(dct_const_round_shift(s7));
+ x10 = WRAPLOW(dct_const_round_shift(s10));
+ x11 = WRAPLOW(dct_const_round_shift(s11));
+ x14 = WRAPLOW(dct_const_round_shift(s14));
+ x15 = WRAPLOW(dct_const_round_shift(s15));
+
+ output[0] = WRAPLOW(x0);
+ output[1] = WRAPLOW(-x8);
+ output[2] = WRAPLOW(x12);
+ output[3] = WRAPLOW(-x4);
+ output[4] = WRAPLOW(x6);
+ output[5] = WRAPLOW(x14);
+ output[6] = WRAPLOW(x10);
+ output[7] = WRAPLOW(x2);
+ output[8] = WRAPLOW(x3);
+ output[9] = WRAPLOW(x11);
+ output[10] = WRAPLOW(x15);
+ output[11] = WRAPLOW(x7);
+ output[12] = WRAPLOW(x5);
+ output[13] = WRAPLOW(-x13);
+ output[14] = WRAPLOW(x9);
+ output[15] = WRAPLOW(-x1);
+}
+
+void vp10_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest,
+ int stride) {
+ tran_low_t out[16 * 16] = { 0 };
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[16], temp_out[16];
+
+ // First transform rows. Since all non-zero dct coefficients are in
+ // upper-left 4x4 area, we only need to calculate first 4 rows here.
+ for (i = 0; i < 4; ++i) {
+ vp10_idct16_c(input, outptr);
+ input += 16;
+ outptr += 16;
+ }
+
+ // Then transform columns
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+ vp10_idct16_c(temp_in, temp_out);
+ for (j = 0; j < 16; ++j) {
+ dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+ ROUND_POWER_OF_TWO(temp_out[j], 6));
+ }
+ }
+}
+
+void vp10_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest,
+ int stride) {
+ int i, j;
+ tran_high_t a1;
+ tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+ out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
+ a1 = ROUND_POWER_OF_TWO(out, 6);
+ for (j = 0; j < 16; ++j) {
+ for (i = 0; i < 16; ++i) dest[i] = clip_pixel_add(dest[i], a1);
+ dest += stride;
+ }
+}
+
+void vp10_idct32_c(const tran_low_t *input, tran_low_t *output) {
+ tran_low_t step1[32], step2[32];
+ tran_high_t temp1, temp2;
+
+ // stage 1
+ step1[0] = input[0];
+ step1[1] = input[16];
+ step1[2] = input[8];
+ step1[3] = input[24];
+ step1[4] = input[4];
+ step1[5] = input[20];
+ step1[6] = input[12];
+ step1[7] = input[28];
+ step1[8] = input[2];
+ step1[9] = input[18];
+ step1[10] = input[10];
+ step1[11] = input[26];
+ step1[12] = input[6];
+ step1[13] = input[22];
+ step1[14] = input[14];
+ step1[15] = input[30];
+
+ temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
+ temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
+ step1[16] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[31] = WRAPLOW(dct_const_round_shift(temp2));
+
+ temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
+ temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
+ step1[17] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[30] = WRAPLOW(dct_const_round_shift(temp2));
+
+ temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
+ temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
+ step1[18] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[29] = WRAPLOW(dct_const_round_shift(temp2));
+
+ temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
+ temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
+ step1[19] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[28] = WRAPLOW(dct_const_round_shift(temp2));
+
+ temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
+ temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
+ step1[20] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[27] = WRAPLOW(dct_const_round_shift(temp2));
+
+ temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
+ temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
+ step1[21] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[26] = WRAPLOW(dct_const_round_shift(temp2));
+
+ temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
+ temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
+ step1[22] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[25] = WRAPLOW(dct_const_round_shift(temp2));
+
+ temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
+ temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
+ step1[23] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[24] = WRAPLOW(dct_const_round_shift(temp2));
+
+ // stage 2
+ step2[0] = step1[0];
+ step2[1] = step1[1];
+ step2[2] = step1[2];
+ step2[3] = step1[3];
+ step2[4] = step1[4];
+ step2[5] = step1[5];
+ step2[6] = step1[6];
+ step2[7] = step1[7];
+
+ temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+ temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+ step2[8] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[15] = WRAPLOW(dct_const_round_shift(temp2));
+
+ temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+ temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+ step2[9] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[14] = WRAPLOW(dct_const_round_shift(temp2));
+
+ temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+ temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2));
+
+ temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+ temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+ step2[11] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[12] = WRAPLOW(dct_const_round_shift(temp2));
+
+ step2[16] = WRAPLOW(step1[16] + step1[17]);
+ step2[17] = WRAPLOW(step1[16] - step1[17]);
+ step2[18] = WRAPLOW(-step1[18] + step1[19]);
+ step2[19] = WRAPLOW(step1[18] + step1[19]);
+ step2[20] = WRAPLOW(step1[20] + step1[21]);
+ step2[21] = WRAPLOW(step1[20] - step1[21]);
+ step2[22] = WRAPLOW(-step1[22] + step1[23]);
+ step2[23] = WRAPLOW(step1[22] + step1[23]);
+ step2[24] = WRAPLOW(step1[24] + step1[25]);
+ step2[25] = WRAPLOW(step1[24] - step1[25]);
+ step2[26] = WRAPLOW(-step1[26] + step1[27]);
+ step2[27] = WRAPLOW(step1[26] + step1[27]);
+ step2[28] = WRAPLOW(step1[28] + step1[29]);
+ step2[29] = WRAPLOW(step1[28] - step1[29]);
+ step2[30] = WRAPLOW(-step1[30] + step1[31]);
+ step2[31] = WRAPLOW(step1[30] + step1[31]);
+
+ // stage 3
+ step1[0] = step2[0];
+ step1[1] = step2[1];
+ step1[2] = step2[2];
+ step1[3] = step2[3];
+
+ temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+ temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+ step1[4] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[7] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+ temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+
+ step1[8] = WRAPLOW(step2[8] + step2[9]);
+ step1[9] = WRAPLOW(step2[8] - step2[9]);
+ step1[10] = WRAPLOW(-step2[10] + step2[11]);
+ step1[11] = WRAPLOW(step2[10] + step2[11]);
+ step1[12] = WRAPLOW(step2[12] + step2[13]);
+ step1[13] = WRAPLOW(step2[12] - step2[13]);
+ step1[14] = WRAPLOW(-step2[14] + step2[15]);
+ step1[15] = WRAPLOW(step2[14] + step2[15]);
+
+ step1[16] = step2[16];
+ step1[31] = step2[31];
+ temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
+ temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
+ step1[17] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[30] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
+ temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
+ step1[18] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[29] = WRAPLOW(dct_const_round_shift(temp2));
+ step1[19] = step2[19];
+ step1[20] = step2[20];
+ temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
+ temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
+ step1[21] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[26] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
+ temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
+ step1[22] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[25] = WRAPLOW(dct_const_round_shift(temp2));
+ step1[23] = step2[23];
+ step1[24] = step2[24];
+ step1[27] = step2[27];
+ step1[28] = step2[28];
+
+ // stage 4
+ temp1 = (step1[0] + step1[1]) * cospi_16_64;
+ temp2 = (step1[0] - step1[1]) * cospi_16_64;
+ step2[0] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[1] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[3] = WRAPLOW(dct_const_round_shift(temp2));
+ step2[4] = WRAPLOW(step1[4] + step1[5]);
+ step2[5] = WRAPLOW(step1[4] - step1[5]);
+ step2[6] = WRAPLOW(-step1[6] + step1[7]);
+ step2[7] = WRAPLOW(step1[6] + step1[7]);
+
+ step2[8] = step1[8];
+ step2[15] = step1[15];
+ temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+ temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+ step2[9] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[14] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+ temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2));
+ step2[11] = step1[11];
+ step2[12] = step1[12];
+
+ step2[16] = WRAPLOW(step1[16] + step1[19]);
+ step2[17] = WRAPLOW(step1[17] + step1[18]);
+ step2[18] = WRAPLOW(step1[17] - step1[18]);
+ step2[19] = WRAPLOW(step1[16] - step1[19]);
+ step2[20] = WRAPLOW(-step1[20] + step1[23]);
+ step2[21] = WRAPLOW(-step1[21] + step1[22]);
+ step2[22] = WRAPLOW(step1[21] + step1[22]);
+ step2[23] = WRAPLOW(step1[20] + step1[23]);
+
+ step2[24] = WRAPLOW(step1[24] + step1[27]);
+ step2[25] = WRAPLOW(step1[25] + step1[26]);
+ step2[26] = WRAPLOW(step1[25] - step1[26]);
+ step2[27] = WRAPLOW(step1[24] - step1[27]);
+ step2[28] = WRAPLOW(-step1[28] + step1[31]);
+ step2[29] = WRAPLOW(-step1[29] + step1[30]);
+ step2[30] = WRAPLOW(step1[29] + step1[30]);
+ step2[31] = WRAPLOW(step1[28] + step1[31]);
+
+ // stage 5
+ step1[0] = WRAPLOW(step2[0] + step2[3]);
+ step1[1] = WRAPLOW(step2[1] + step2[2]);
+ step1[2] = WRAPLOW(step2[1] - step2[2]);
+ step1[3] = WRAPLOW(step2[0] - step2[3]);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[6] = WRAPLOW(dct_const_round_shift(temp2));
+ step1[7] = step2[7];
+
+ step1[8] = WRAPLOW(step2[8] + step2[11]);
+ step1[9] = WRAPLOW(step2[9] + step2[10]);
+ step1[10] = WRAPLOW(step2[9] - step2[10]);
+ step1[11] = WRAPLOW(step2[8] - step2[11]);
+ step1[12] = WRAPLOW(-step2[12] + step2[15]);
+ step1[13] = WRAPLOW(-step2[13] + step2[14]);
+ step1[14] = WRAPLOW(step2[13] + step2[14]);
+ step1[15] = WRAPLOW(step2[12] + step2[15]);
+
+ step1[16] = step2[16];
+ step1[17] = step2[17];
+ temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
+ temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
+ step1[18] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[29] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
+ temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
+ step1[19] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[28] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
+ temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
+ step1[20] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[27] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
+ temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
+ step1[21] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[26] = WRAPLOW(dct_const_round_shift(temp2));
+ step1[22] = step2[22];
+ step1[23] = step2[23];
+ step1[24] = step2[24];
+ step1[25] = step2[25];
+ step1[30] = step2[30];
+ step1[31] = step2[31];
+
+ // stage 6
+ step2[0] = WRAPLOW(step1[0] + step1[7]);
+ step2[1] = WRAPLOW(step1[1] + step1[6]);
+ step2[2] = WRAPLOW(step1[2] + step1[5]);
+ step2[3] = WRAPLOW(step1[3] + step1[4]);
+ step2[4] = WRAPLOW(step1[3] - step1[4]);
+ step2[5] = WRAPLOW(step1[2] - step1[5]);
+ step2[6] = WRAPLOW(step1[1] - step1[6]);
+ step2[7] = WRAPLOW(step1[0] - step1[7]);
+ step2[8] = step1[8];
+ step2[9] = step1[9];
+ temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+ temp2 = (step1[10] + step1[13]) * cospi_16_64;
+ step2[10] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[13] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+ temp2 = (step1[11] + step1[12]) * cospi_16_64;
+ step2[11] = WRAPLOW(dct_const_round_shift(temp1));
+ step2[12] = WRAPLOW(dct_const_round_shift(temp2));
+ step2[14] = step1[14];
+ step2[15] = step1[15];
+
+ step2[16] = WRAPLOW(step1[16] + step1[23]);
+ step2[17] = WRAPLOW(step1[17] + step1[22]);
+ step2[18] = WRAPLOW(step1[18] + step1[21]);
+ step2[19] = WRAPLOW(step1[19] + step1[20]);
+ step2[20] = WRAPLOW(step1[19] - step1[20]);
+ step2[21] = WRAPLOW(step1[18] - step1[21]);
+ step2[22] = WRAPLOW(step1[17] - step1[22]);
+ step2[23] = WRAPLOW(step1[16] - step1[23]);
+
+ step2[24] = WRAPLOW(-step1[24] + step1[31]);
+ step2[25] = WRAPLOW(-step1[25] + step1[30]);
+ step2[26] = WRAPLOW(-step1[26] + step1[29]);
+ step2[27] = WRAPLOW(-step1[27] + step1[28]);
+ step2[28] = WRAPLOW(step1[27] + step1[28]);
+ step2[29] = WRAPLOW(step1[26] + step1[29]);
+ step2[30] = WRAPLOW(step1[25] + step1[30]);
+ step2[31] = WRAPLOW(step1[24] + step1[31]);
+
+ // stage 7
+ step1[0] = WRAPLOW(step2[0] + step2[15]);
+ step1[1] = WRAPLOW(step2[1] + step2[14]);
+ step1[2] = WRAPLOW(step2[2] + step2[13]);
+ step1[3] = WRAPLOW(step2[3] + step2[12]);
+ step1[4] = WRAPLOW(step2[4] + step2[11]);
+ step1[5] = WRAPLOW(step2[5] + step2[10]);
+ step1[6] = WRAPLOW(step2[6] + step2[9]);
+ step1[7] = WRAPLOW(step2[7] + step2[8]);
+ step1[8] = WRAPLOW(step2[7] - step2[8]);
+ step1[9] = WRAPLOW(step2[6] - step2[9]);
+ step1[10] = WRAPLOW(step2[5] - step2[10]);
+ step1[11] = WRAPLOW(step2[4] - step2[11]);
+ step1[12] = WRAPLOW(step2[3] - step2[12]);
+ step1[13] = WRAPLOW(step2[2] - step2[13]);
+ step1[14] = WRAPLOW(step2[1] - step2[14]);
+ step1[15] = WRAPLOW(step2[0] - step2[15]);
+
+ step1[16] = step2[16];
+ step1[17] = step2[17];
+ step1[18] = step2[18];
+ step1[19] = step2[19];
+ temp1 = (-step2[20] + step2[27]) * cospi_16_64;
+ temp2 = (step2[20] + step2[27]) * cospi_16_64;
+ step1[20] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[27] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = (-step2[21] + step2[26]) * cospi_16_64;
+ temp2 = (step2[21] + step2[26]) * cospi_16_64;
+ step1[21] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[26] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = (-step2[22] + step2[25]) * cospi_16_64;
+ temp2 = (step2[22] + step2[25]) * cospi_16_64;
+ step1[22] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[25] = WRAPLOW(dct_const_round_shift(temp2));
+ temp1 = (-step2[23] + step2[24]) * cospi_16_64;
+ temp2 = (step2[23] + step2[24]) * cospi_16_64;
+ step1[23] = WRAPLOW(dct_const_round_shift(temp1));
+ step1[24] = WRAPLOW(dct_const_round_shift(temp2));
+ step1[28] = step2[28];
+ step1[29] = step2[29];
+ step1[30] = step2[30];
+ step1[31] = step2[31];
+
+ // final stage
+ output[0] = WRAPLOW(step1[0] + step1[31]);
+ output[1] = WRAPLOW(step1[1] + step1[30]);
+ output[2] = WRAPLOW(step1[2] + step1[29]);
+ output[3] = WRAPLOW(step1[3] + step1[28]);
+ output[4] = WRAPLOW(step1[4] + step1[27]);
+ output[5] = WRAPLOW(step1[5] + step1[26]);
+ output[6] = WRAPLOW(step1[6] + step1[25]);
+ output[7] = WRAPLOW(step1[7] + step1[24]);
+ output[8] = WRAPLOW(step1[8] + step1[23]);
+ output[9] = WRAPLOW(step1[9] + step1[22]);
+ output[10] = WRAPLOW(step1[10] + step1[21]);
+ output[11] = WRAPLOW(step1[11] + step1[20]);
+ output[12] = WRAPLOW(step1[12] + step1[19]);
+ output[13] = WRAPLOW(step1[13] + step1[18]);
+ output[14] = WRAPLOW(step1[14] + step1[17]);
+ output[15] = WRAPLOW(step1[15] + step1[16]);
+ output[16] = WRAPLOW(step1[15] - step1[16]);
+ output[17] = WRAPLOW(step1[14] - step1[17]);
+ output[18] = WRAPLOW(step1[13] - step1[18]);
+ output[19] = WRAPLOW(step1[12] - step1[19]);
+ output[20] = WRAPLOW(step1[11] - step1[20]);
+ output[21] = WRAPLOW(step1[10] - step1[21]);
+ output[22] = WRAPLOW(step1[9] - step1[22]);
+ output[23] = WRAPLOW(step1[8] - step1[23]);
+ output[24] = WRAPLOW(step1[7] - step1[24]);
+ output[25] = WRAPLOW(step1[6] - step1[25]);
+ output[26] = WRAPLOW(step1[5] - step1[26]);
+ output[27] = WRAPLOW(step1[4] - step1[27]);
+ output[28] = WRAPLOW(step1[3] - step1[28]);
+ output[29] = WRAPLOW(step1[2] - step1[29]);
+ output[30] = WRAPLOW(step1[1] - step1[30]);
+ output[31] = WRAPLOW(step1[0] - step1[31]);
+}
+
+void vp10_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
+ int stride) {
+ tran_low_t out[32 * 32];
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[32], temp_out[32];
+
+ // Rows
+ for (i = 0; i < 32; ++i) {
+ int16_t zero_coeff[16];
+ for (j = 0; j < 16; ++j) zero_coeff[j] = input[2 * j] | input[2 * j + 1];
+ for (j = 0; j < 8; ++j)
+ zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
+ for (j = 0; j < 4; ++j)
+ zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
+ for (j = 0; j < 2; ++j)
+ zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
+
+ if (zero_coeff[0] | zero_coeff[1])
+ vp10_idct32_c(input, outptr);
+ else
+ memset(outptr, 0, sizeof(tran_low_t) * 32);
+ input += 32;
+ outptr += 32;
+ }
+
+ // Columns
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i];
+ vp10_idct32_c(temp_in, temp_out);
+ for (j = 0; j < 32; ++j) {
+ dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+ ROUND_POWER_OF_TWO(temp_out[j], 6));
+ }
+ }
+}
+
+void vp10_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,
+ int stride) {
+ tran_low_t out[32 * 32] = { 0 };
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[32], temp_out[32];
+
+ // Rows
+ // only upper-left 8x8 has non-zero coeff
+ for (i = 0; i < 8; ++i) {
+ vp10_idct32_c(input, outptr);
+ input += 32;
+ outptr += 32;
+ }
+
+ // Columns
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i];
+ vp10_idct32_c(temp_in, temp_out);
+ for (j = 0; j < 32; ++j) {
+ dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
+ ROUND_POWER_OF_TWO(temp_out[j], 6));
+ }
+ }
+}
+
+void vp10_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest,
+ int stride) {
+ int i, j;
+ tran_high_t a1;
+
+ tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+ out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
+ a1 = ROUND_POWER_OF_TWO(out, 6);
+
+ for (j = 0; j < 32; ++j) {
+ for (i = 0; i < 32; ++i) dest[i] = clip_pixel_add(dest[i], a1);
+ dest += stride;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
+ 0.5 shifts per pixel. */
+ int i;
+ tran_low_t output[16];
+ tran_high_t a1, b1, c1, d1, e1;
+ const tran_low_t *ip = input;
+ tran_low_t *op = output;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0] >> UNIT_QUANT_SHIFT;
+ c1 = ip[1] >> UNIT_QUANT_SHIFT;
+ d1 = ip[2] >> UNIT_QUANT_SHIFT;
+ b1 = ip[3] >> UNIT_QUANT_SHIFT;
+ a1 += c1;
+ d1 -= b1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= b1;
+ d1 += c1;
+ op[0] = HIGHBD_WRAPLOW(a1, bd);
+ op[1] = HIGHBD_WRAPLOW(b1, bd);
+ op[2] = HIGHBD_WRAPLOW(c1, bd);
+ op[3] = HIGHBD_WRAPLOW(d1, bd);
+ ip += 4;
+ op += 4;
+ }
+
+ ip = output;
+ for (i = 0; i < 4; i++) {
+ a1 = ip[4 * 0];
+ c1 = ip[4 * 1];
+ d1 = ip[4 * 2];
+ b1 = ip[4 * 3];
+ a1 += c1;
+ d1 -= b1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= b1;
+ d1 += c1;
+ dest[stride * 0] =
+ highbd_clip_pixel_add(dest[stride * 0], HIGHBD_WRAPLOW(a1, bd), bd);
+ dest[stride * 1] =
+ highbd_clip_pixel_add(dest[stride * 1], HIGHBD_WRAPLOW(b1, bd), bd);
+ dest[stride * 2] =
+ highbd_clip_pixel_add(dest[stride * 2], HIGHBD_WRAPLOW(c1, bd), bd);
+ dest[stride * 3] =
+ highbd_clip_pixel_add(dest[stride * 3], HIGHBD_WRAPLOW(d1, bd), bd);
+ ip++;
+ dest++;
+ }
+}
+
+void vp10_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
+ int dest_stride, int bd) {
+ int i;
+ tran_high_t a1, e1;
+ tran_low_t tmp[4];
+ const tran_low_t *ip = in;
+ tran_low_t *op = tmp;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ (void)bd;
+
+ a1 = ip[0] >> UNIT_QUANT_SHIFT;
+ e1 = a1 >> 1;
+ a1 -= e1;
+ op[0] = HIGHBD_WRAPLOW(a1, bd);
+ op[1] = op[2] = op[3] = HIGHBD_WRAPLOW(e1, bd);
+
+ ip = tmp;
+ for (i = 0; i < 4; i++) {
+ e1 = ip[0] >> 1;
+ a1 = ip[0] - e1;
+ dest[dest_stride * 0] =
+ highbd_clip_pixel_add(dest[dest_stride * 0], a1, bd);
+ dest[dest_stride * 1] =
+ highbd_clip_pixel_add(dest[dest_stride * 1], e1, bd);
+ dest[dest_stride * 2] =
+ highbd_clip_pixel_add(dest[dest_stride * 2], e1, bd);
+ dest[dest_stride * 3] =
+ highbd_clip_pixel_add(dest[dest_stride * 3], e1, bd);
+ ip++;
+ dest++;
+ }
+}
+
+void vp10_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ tran_low_t step[4];
+ tran_high_t temp1, temp2;
+ (void)bd;
+ // stage 1
+ temp1 = (input[0] + input[2]) * cospi_16_64;
+ temp2 = (input[0] - input[2]) * cospi_16_64;
+ step[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
+ temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
+ step[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ // stage 2
+ output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd);
+ output[1] = HIGHBD_WRAPLOW(step[1] + step[2], bd);
+ output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd);
+ output[3] = HIGHBD_WRAPLOW(step[0] - step[3], bd);
+}
+
+void vp10_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ tran_low_t out[4 * 4];
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[4], temp_out[4];
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ // Rows
+ for (i = 0; i < 4; ++i) {
+ vp10_highbd_idct4_c(input, outptr, bd);
+ input += 4;
+ outptr += 4;
+ }
+
+ // Columns
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
+ vp10_highbd_idct4_c(temp_in, temp_out, bd);
+ for (j = 0; j < 4; ++j) {
+ dest[j * stride + i] = highbd_clip_pixel_add(
+ dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
+ }
+ }
+}
+
+void vp10_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
+ int dest_stride, int bd) {
+ int i;
+ tran_high_t a1;
+ tran_low_t out =
+ HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
+ a1 = ROUND_POWER_OF_TWO(out, 4);
+
+ for (i = 0; i < 4; i++) {
+ dest[0] = highbd_clip_pixel_add(dest[0], a1, bd);
+ dest[1] = highbd_clip_pixel_add(dest[1], a1, bd);
+ dest[2] = highbd_clip_pixel_add(dest[2], a1, bd);
+ dest[3] = highbd_clip_pixel_add(dest[3], a1, bd);
+ dest += dest_stride;
+ }
+}
+
+void vp10_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ tran_low_t step1[8], step2[8];
+ tran_high_t temp1, temp2;
+ // stage 1
+ step1[0] = input[0];
+ step1[2] = input[4];
+ step1[1] = input[2];
+ step1[3] = input[6];
+ temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
+ temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
+ step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
+ temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
+ step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ // stage 2 & stage 3 - even half
+ vp10_highbd_idct4_c(step1, step1, bd);
+
+ // stage 2 - odd half
+ step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
+ step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
+ step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
+ step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
+
+ // stage 3 - odd half
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[7] = step2[7];
+
+ // stage 4
+ output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
+ output[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd);
+ output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
+ output[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd);
+ output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
+ output[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd);
+ output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
+ output[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd);
+}
+
+void vp10_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ tran_low_t out[8 * 8];
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[8], temp_out[8];
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ // First transform rows.
+ for (i = 0; i < 8; ++i) {
+ vp10_highbd_idct8_c(input, outptr, bd);
+ input += 8;
+ outptr += 8;
+ }
+
+ // Then transform columns.
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+ vp10_highbd_idct8_c(temp_in, temp_out, bd);
+ for (j = 0; j < 8; ++j) {
+ dest[j * stride + i] = highbd_clip_pixel_add(
+ dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
+ }
+ }
+}
+
+void vp10_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ int i, j;
+ tran_high_t a1;
+ tran_low_t out =
+ HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
+ a1 = ROUND_POWER_OF_TWO(out, 5);
+ for (j = 0; j < 8; ++j) {
+ for (i = 0; i < 8; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
+ dest += stride;
+ }
+}
+
+void vp10_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+ tran_low_t x0 = input[0];
+ tran_low_t x1 = input[1];
+ tran_low_t x2 = input[2];
+ tran_low_t x3 = input[3];
+ (void)bd;
+
+ if (!(x0 | x1 | x2 | x3)) {
+ memset(output, 0, 4 * sizeof(*output));
+ return;
+ }
+
+ s0 = sinpi_1_9 * x0;
+ s1 = sinpi_2_9 * x0;
+ s2 = sinpi_3_9 * x1;
+ s3 = sinpi_4_9 * x2;
+ s4 = sinpi_1_9 * x2;
+ s5 = sinpi_2_9 * x3;
+ s6 = sinpi_4_9 * x3;
+ s7 = (tran_high_t)HIGHBD_WRAPLOW(x0 - x2 + x3, bd);
+
+ s0 = s0 + s3 + s5;
+ s1 = s1 - s4 - s6;
+ s3 = s2;
+ s2 = sinpi_3_9 * s7;
+
+ // 1-D transform scaling factor is sqrt(2).
+ // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
+ // + 1b (addition) = 29b.
+ // Hence the output bit depth is 15b.
+ output[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s3), bd);
+ output[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s3), bd);
+ output[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd);
+ output[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3), bd);
+}
+
+void vp10_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+ tran_low_t x0 = input[7];
+ tran_low_t x1 = input[0];
+ tran_low_t x2 = input[5];
+ tran_low_t x3 = input[2];
+ tran_low_t x4 = input[3];
+ tran_low_t x5 = input[4];
+ tran_low_t x6 = input[1];
+ tran_low_t x7 = input[6];
+ (void)bd;
+
+ if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
+ memset(output, 0, 8 * sizeof(*output));
+ return;
+ }
+
+ // stage 1
+ s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
+ s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
+ s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
+ s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
+ s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
+ s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
+ s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
+ s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
+
+ x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s4), bd);
+ x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s5), bd);
+ x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s6), bd);
+ x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s7), bd);
+ x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s4), bd);
+ x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s5), bd);
+ x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s6), bd);
+ x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s7), bd);
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
+ s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
+ s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
+ s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
+
+ x0 = HIGHBD_WRAPLOW(s0 + s2, bd);
+ x1 = HIGHBD_WRAPLOW(s1 + s3, bd);
+ x2 = HIGHBD_WRAPLOW(s0 - s2, bd);
+ x3 = HIGHBD_WRAPLOW(s1 - s3, bd);
+ x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd);
+ x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd);
+ x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd);
+ x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd);
+
+ // stage 3
+ s2 = cospi_16_64 * (x2 + x3);
+ s3 = cospi_16_64 * (x2 - x3);
+ s6 = cospi_16_64 * (x6 + x7);
+ s7 = cospi_16_64 * (x6 - x7);
+
+ x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd);
+ x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd);
+ x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd);
+ x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd);
+
+ output[0] = HIGHBD_WRAPLOW(x0, bd);
+ output[1] = HIGHBD_WRAPLOW(-x4, bd);
+ output[2] = HIGHBD_WRAPLOW(x6, bd);
+ output[3] = HIGHBD_WRAPLOW(-x2, bd);
+ output[4] = HIGHBD_WRAPLOW(x3, bd);
+ output[5] = HIGHBD_WRAPLOW(-x7, bd);
+ output[6] = HIGHBD_WRAPLOW(x5, bd);
+ output[7] = HIGHBD_WRAPLOW(-x1, bd);
+}
+
+void vp10_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ tran_low_t out[8 * 8] = { 0 };
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[8], temp_out[8];
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ // First transform rows.
+ // Only first 4 row has non-zero coefs.
+ for (i = 0; i < 4; ++i) {
+ vp10_highbd_idct8_c(input, outptr, bd);
+ input += 8;
+ outptr += 8;
+ }
+ // Then transform columns.
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+ vp10_highbd_idct8_c(temp_in, temp_out, bd);
+ for (j = 0; j < 8; ++j) {
+ dest[j * stride + i] = highbd_clip_pixel_add(
+ dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
+ }
+ }
+}
+
+void vp10_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
+ tran_low_t step1[16], step2[16];
+ tran_high_t temp1, temp2;
+ (void)bd;
+
+ // stage 1
+ step1[0] = input[0 / 2];
+ step1[1] = input[16 / 2];
+ step1[2] = input[8 / 2];
+ step1[3] = input[24 / 2];
+ step1[4] = input[4 / 2];
+ step1[5] = input[20 / 2];
+ step1[6] = input[12 / 2];
+ step1[7] = input[28 / 2];
+ step1[8] = input[2 / 2];
+ step1[9] = input[18 / 2];
+ step1[10] = input[10 / 2];
+ step1[11] = input[26 / 2];
+ step1[12] = input[6 / 2];
+ step1[13] = input[22 / 2];
+ step1[14] = input[14 / 2];
+ step1[15] = input[30 / 2];
+
+ // stage 2
+ step2[0] = step1[0];
+ step2[1] = step1[1];
+ step2[2] = step1[2];
+ step2[3] = step1[3];
+ step2[4] = step1[4];
+ step2[5] = step1[5];
+ step2[6] = step1[6];
+ step2[7] = step1[7];
+
+ temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+ temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+ step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+ temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+ step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+ temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+ step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+ temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+ step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ // stage 3
+ step1[0] = step2[0];
+ step1[1] = step2[1];
+ step1[2] = step2[2];
+ step1[3] = step2[3];
+
+ temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+ temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+ step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+ temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+ step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd);
+ step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd);
+ step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd);
+ step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd);
+ step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd);
+ step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd);
+ step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd);
+ step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd);
+
+ // stage 4
+ temp1 = (step1[0] + step1[1]) * cospi_16_64;
+ temp2 = (step1[0] - step1[1]) * cospi_16_64;
+ step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
+ step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
+ step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
+ step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
+
+ step2[8] = step1[8];
+ step2[15] = step1[15];
+ temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+ temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+ step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+ temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+ step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[11] = step1[11];
+ step2[12] = step1[12];
+
+ // stage 5
+ step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
+ step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
+ step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
+ step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[7] = step2[7];
+
+ step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd);
+ step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd);
+ step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd);
+ step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd);
+ step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd);
+ step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd);
+ step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd);
+ step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd);
+
+ // stage 6
+ step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
+ step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd);
+ step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
+ step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd);
+ step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
+ step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd);
+ step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
+ step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd);
+ step2[8] = step1[8];
+ step2[9] = step1[9];
+ temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+ temp2 = (step1[10] + step1[13]) * cospi_16_64;
+ step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+ temp2 = (step1[11] + step1[12]) * cospi_16_64;
+ step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[14] = step1[14];
+ step2[15] = step1[15];
+
+ // stage 7
+ output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd);
+ output[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd);
+ output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd);
+ output[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd);
+ output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd);
+ output[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd);
+ output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd);
+ output[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd);
+ output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd);
+ output[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd);
+ output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd);
+ output[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd);
+ output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd);
+ output[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd);
+ output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd);
+ output[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd);
+}
+
+void vp10_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ tran_low_t out[16 * 16];
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[16], temp_out[16];
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ // First transform rows.
+ for (i = 0; i < 16; ++i) {
+ vp10_highbd_idct16_c(input, outptr, bd);
+ input += 16;
+ outptr += 16;
+ }
+
+ // Then transform columns.
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+ vp10_highbd_idct16_c(temp_in, temp_out, bd);
+ for (j = 0; j < 16; ++j) {
+ dest[j * stride + i] = highbd_clip_pixel_add(
+ dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+ }
+ }
+}
+
+void vp10_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
+ tran_high_t s9, s10, s11, s12, s13, s14, s15;
+
+ tran_low_t x0 = input[15];
+ tran_low_t x1 = input[0];
+ tran_low_t x2 = input[13];
+ tran_low_t x3 = input[2];
+ tran_low_t x4 = input[11];
+ tran_low_t x5 = input[4];
+ tran_low_t x6 = input[9];
+ tran_low_t x7 = input[6];
+ tran_low_t x8 = input[7];
+ tran_low_t x9 = input[8];
+ tran_low_t x10 = input[5];
+ tran_low_t x11 = input[10];
+ tran_low_t x12 = input[3];
+ tran_low_t x13 = input[12];
+ tran_low_t x14 = input[1];
+ tran_low_t x15 = input[14];
+ (void)bd;
+
+ if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 |
+ x13 | x14 | x15)) {
+ memset(output, 0, 16 * sizeof(*output));
+ return;
+ }
+
+ // stage 1
+ s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
+ s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
+ s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
+ s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
+ s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
+ s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
+ s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
+ s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
+ s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
+ s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
+ s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
+ s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
+ s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
+ s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
+ s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
+ s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
+
+ x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s8), bd);
+ x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s9), bd);
+ x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s10), bd);
+ x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s11), bd);
+ x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s12), bd);
+ x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s13), bd);
+ x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 + s14), bd);
+ x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 + s15), bd);
+ x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s8), bd);
+ x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s9), bd);
+ x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s10), bd);
+ x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s11), bd);
+ x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s12), bd);
+ x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s13), bd);
+ x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 - s14), bd);
+ x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 - s15), bd);
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4;
+ s5 = x5;
+ s6 = x6;
+ s7 = x7;
+ s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
+ s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
+ s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
+ s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
+ s12 = -x12 * cospi_28_64 + x13 * cospi_4_64;
+ s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
+ s14 = -x14 * cospi_12_64 + x15 * cospi_20_64;
+ s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
+
+ x0 = HIGHBD_WRAPLOW(s0 + s4, bd);
+ x1 = HIGHBD_WRAPLOW(s1 + s5, bd);
+ x2 = HIGHBD_WRAPLOW(s2 + s6, bd);
+ x3 = HIGHBD_WRAPLOW(s3 + s7, bd);
+ x4 = HIGHBD_WRAPLOW(s0 - s4, bd);
+ x5 = HIGHBD_WRAPLOW(s1 - s5, bd);
+ x6 = HIGHBD_WRAPLOW(s2 - s6, bd);
+ x7 = HIGHBD_WRAPLOW(s3 - s7, bd);
+ x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 + s12), bd);
+ x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 + s13), bd);
+ x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 + s14), bd);
+ x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 + s15), bd);
+ x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 - s12), bd);
+ x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 - s13), bd);
+ x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 - s14), bd);
+ x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 - s15), bd);
+
+ // stage 3
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
+ s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
+ s6 = -x6 * cospi_24_64 + x7 * cospi_8_64;
+ s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
+ s8 = x8;
+ s9 = x9;
+ s10 = x10;
+ s11 = x11;
+ s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
+ s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
+ s14 = -x14 * cospi_24_64 + x15 * cospi_8_64;
+ s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
+
+ x0 = HIGHBD_WRAPLOW(s0 + s2, bd);
+ x1 = HIGHBD_WRAPLOW(s1 + s3, bd);
+ x2 = HIGHBD_WRAPLOW(s0 - s2, bd);
+ x3 = HIGHBD_WRAPLOW(s1 - s3, bd);
+ x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd);
+ x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd);
+ x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd);
+ x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd);
+ x8 = HIGHBD_WRAPLOW(s8 + s10, bd);
+ x9 = HIGHBD_WRAPLOW(s9 + s11, bd);
+ x10 = HIGHBD_WRAPLOW(s8 - s10, bd);
+ x11 = HIGHBD_WRAPLOW(s9 - s11, bd);
+ x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 + s14), bd);
+ x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 + s15), bd);
+ x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 - s14), bd);
+ x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 - s15), bd);
+
+ // stage 4
+ s2 = (-cospi_16_64) * (x2 + x3);
+ s3 = cospi_16_64 * (x2 - x3);
+ s6 = cospi_16_64 * (x6 + x7);
+ s7 = cospi_16_64 * (-x6 + x7);
+ s10 = cospi_16_64 * (x10 + x11);
+ s11 = cospi_16_64 * (-x10 + x11);
+ s14 = (-cospi_16_64) * (x14 + x15);
+ s15 = cospi_16_64 * (x14 - x15);
+
+ x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd);
+ x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd);
+ x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd);
+ x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd);
+ x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10), bd);
+ x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11), bd);
+ x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s14), bd);
+ x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s15), bd);
+
+ output[0] = HIGHBD_WRAPLOW(x0, bd);
+ output[1] = HIGHBD_WRAPLOW(-x8, bd);
+ output[2] = HIGHBD_WRAPLOW(x12, bd);
+ output[3] = HIGHBD_WRAPLOW(-x4, bd);
+ output[4] = HIGHBD_WRAPLOW(x6, bd);
+ output[5] = HIGHBD_WRAPLOW(x14, bd);
+ output[6] = HIGHBD_WRAPLOW(x10, bd);
+ output[7] = HIGHBD_WRAPLOW(x2, bd);
+ output[8] = HIGHBD_WRAPLOW(x3, bd);
+ output[9] = HIGHBD_WRAPLOW(x11, bd);
+ output[10] = HIGHBD_WRAPLOW(x15, bd);
+ output[11] = HIGHBD_WRAPLOW(x7, bd);
+ output[12] = HIGHBD_WRAPLOW(x5, bd);
+ output[13] = HIGHBD_WRAPLOW(-x13, bd);
+ output[14] = HIGHBD_WRAPLOW(x9, bd);
+ output[15] = HIGHBD_WRAPLOW(-x1, bd);
+}
+
+void vp10_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ tran_low_t out[16 * 16] = { 0 };
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[16], temp_out[16];
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ // First transform rows. Since all non-zero dct coefficients are in
+ // upper-left 4x4 area, we only need to calculate first 4 rows here.
+ for (i = 0; i < 4; ++i) {
+ vp10_highbd_idct16_c(input, outptr, bd);
+ input += 16;
+ outptr += 16;
+ }
+
+ // Then transform columns.
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+ vp10_highbd_idct16_c(temp_in, temp_out, bd);
+ for (j = 0; j < 16; ++j) {
+ dest[j * stride + i] = highbd_clip_pixel_add(
+ dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+ }
+ }
+}
+
+void vp10_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ int i, j;
+ tran_high_t a1;
+ tran_low_t out =
+ HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
+ a1 = ROUND_POWER_OF_TWO(out, 6);
+ for (j = 0; j < 16; ++j) {
+ for (i = 0; i < 16; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
+ dest += stride;
+ }
+}
+
+static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ tran_low_t step1[32], step2[32];
+ tran_high_t temp1, temp2;
+ (void)bd;
+
+ // stage 1
+ step1[0] = input[0];
+ step1[1] = input[16];
+ step1[2] = input[8];
+ step1[3] = input[24];
+ step1[4] = input[4];
+ step1[5] = input[20];
+ step1[6] = input[12];
+ step1[7] = input[28];
+ step1[8] = input[2];
+ step1[9] = input[18];
+ step1[10] = input[10];
+ step1[11] = input[26];
+ step1[12] = input[6];
+ step1[13] = input[22];
+ step1[14] = input[14];
+ step1[15] = input[30];
+
+ temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
+ temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
+ step1[16] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[31] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
+ temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
+ step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
+ temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
+ step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
+ temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
+ step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
+ temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
+ step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
+ temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
+ step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
+ temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
+ step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
+ temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
+ step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ // stage 2
+ step2[0] = step1[0];
+ step2[1] = step1[1];
+ step2[2] = step1[2];
+ step2[3] = step1[3];
+ step2[4] = step1[4];
+ step2[5] = step1[5];
+ step2[6] = step1[6];
+ step2[7] = step1[7];
+
+ temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
+ temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
+ step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
+ temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
+ step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
+ temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
+ step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
+ temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
+ step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[17], bd);
+ step2[17] = HIGHBD_WRAPLOW(step1[16] - step1[17], bd);
+ step2[18] = HIGHBD_WRAPLOW(-step1[18] + step1[19], bd);
+ step2[19] = HIGHBD_WRAPLOW(step1[18] + step1[19], bd);
+ step2[20] = HIGHBD_WRAPLOW(step1[20] + step1[21], bd);
+ step2[21] = HIGHBD_WRAPLOW(step1[20] - step1[21], bd);
+ step2[22] = HIGHBD_WRAPLOW(-step1[22] + step1[23], bd);
+ step2[23] = HIGHBD_WRAPLOW(step1[22] + step1[23], bd);
+ step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[25], bd);
+ step2[25] = HIGHBD_WRAPLOW(step1[24] - step1[25], bd);
+ step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[27], bd);
+ step2[27] = HIGHBD_WRAPLOW(step1[26] + step1[27], bd);
+ step2[28] = HIGHBD_WRAPLOW(step1[28] + step1[29], bd);
+ step2[29] = HIGHBD_WRAPLOW(step1[28] - step1[29], bd);
+ step2[30] = HIGHBD_WRAPLOW(-step1[30] + step1[31], bd);
+ step2[31] = HIGHBD_WRAPLOW(step1[30] + step1[31], bd);
+
+ // stage 3
+ step1[0] = step2[0];
+ step1[1] = step2[1];
+ step1[2] = step2[2];
+ step1[3] = step2[3];
+
+ temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
+ temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
+ step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
+ temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
+ step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+
+ step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd);
+ step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd);
+ step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd);
+ step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd);
+ step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd);
+ step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd);
+ step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd);
+ step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd);
+
+ step1[16] = step2[16];
+ step1[31] = step2[31];
+ temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
+ temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
+ step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
+ temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
+ step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[19] = step2[19];
+ step1[20] = step2[20];
+ temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
+ temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
+ step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
+ temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
+ step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[23] = step2[23];
+ step1[24] = step2[24];
+ step1[27] = step2[27];
+ step1[28] = step2[28];
+
+ // stage 4
+ temp1 = (step1[0] + step1[1]) * cospi_16_64;
+ temp2 = (step1[0] - step1[1]) * cospi_16_64;
+ step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
+ step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
+ step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
+ step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd);
+
+ step2[8] = step1[8];
+ step2[15] = step1[15];
+ temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
+ temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
+ step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
+ temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
+ step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[11] = step1[11];
+ step2[12] = step1[12];
+
+ step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[19], bd);
+ step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[18], bd);
+ step2[18] = HIGHBD_WRAPLOW(step1[17] - step1[18], bd);
+ step2[19] = HIGHBD_WRAPLOW(step1[16] - step1[19], bd);
+ step2[20] = HIGHBD_WRAPLOW(-step1[20] + step1[23], bd);
+ step2[21] = HIGHBD_WRAPLOW(-step1[21] + step1[22], bd);
+ step2[22] = HIGHBD_WRAPLOW(step1[21] + step1[22], bd);
+ step2[23] = HIGHBD_WRAPLOW(step1[20] + step1[23], bd);
+
+ step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[27], bd);
+ step2[25] = HIGHBD_WRAPLOW(step1[25] + step1[26], bd);
+ step2[26] = HIGHBD_WRAPLOW(step1[25] - step1[26], bd);
+ step2[27] = HIGHBD_WRAPLOW(step1[24] - step1[27], bd);
+ step2[28] = HIGHBD_WRAPLOW(-step1[28] + step1[31], bd);
+ step2[29] = HIGHBD_WRAPLOW(-step1[29] + step1[30], bd);
+ step2[30] = HIGHBD_WRAPLOW(step1[29] + step1[30], bd);
+ step2[31] = HIGHBD_WRAPLOW(step1[28] + step1[31], bd);
+
+ // stage 5
+ step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd);
+ step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd);
+ step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd);
+ step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd);
+ step1[4] = step2[4];
+ temp1 = (step2[6] - step2[5]) * cospi_16_64;
+ temp2 = (step2[5] + step2[6]) * cospi_16_64;
+ step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[7] = step2[7];
+
+ step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd);
+ step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd);
+ step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd);
+ step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd);
+ step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd);
+ step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd);
+ step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd);
+ step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd);
+
+ step1[16] = step2[16];
+ step1[17] = step2[17];
+ temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
+ temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
+ step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
+ temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
+ step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
+ temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
+ step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
+ temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
+ step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[22] = step2[22];
+ step1[23] = step2[23];
+ step1[24] = step2[24];
+ step1[25] = step2[25];
+ step1[30] = step2[30];
+ step1[31] = step2[31];
+
+ // stage 6
+ step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd);
+ step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd);
+ step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd);
+ step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd);
+ step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd);
+ step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd);
+ step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd);
+ step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd);
+ step2[8] = step1[8];
+ step2[9] = step1[9];
+ temp1 = (-step1[10] + step1[13]) * cospi_16_64;
+ temp2 = (step1[10] + step1[13]) * cospi_16_64;
+ step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = (-step1[11] + step1[12]) * cospi_16_64;
+ temp2 = (step1[11] + step1[12]) * cospi_16_64;
+ step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[14] = step1[14];
+ step2[15] = step1[15];
+
+ step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[23], bd);
+ step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[22], bd);
+ step2[18] = HIGHBD_WRAPLOW(step1[18] + step1[21], bd);
+ step2[19] = HIGHBD_WRAPLOW(step1[19] + step1[20], bd);
+ step2[20] = HIGHBD_WRAPLOW(step1[19] - step1[20], bd);
+ step2[21] = HIGHBD_WRAPLOW(step1[18] - step1[21], bd);
+ step2[22] = HIGHBD_WRAPLOW(step1[17] - step1[22], bd);
+ step2[23] = HIGHBD_WRAPLOW(step1[16] - step1[23], bd);
+
+ step2[24] = HIGHBD_WRAPLOW(-step1[24] + step1[31], bd);
+ step2[25] = HIGHBD_WRAPLOW(-step1[25] + step1[30], bd);
+ step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[29], bd);
+ step2[27] = HIGHBD_WRAPLOW(-step1[27] + step1[28], bd);
+ step2[28] = HIGHBD_WRAPLOW(step1[27] + step1[28], bd);
+ step2[29] = HIGHBD_WRAPLOW(step1[26] + step1[29], bd);
+ step2[30] = HIGHBD_WRAPLOW(step1[25] + step1[30], bd);
+ step2[31] = HIGHBD_WRAPLOW(step1[24] + step1[31], bd);
+
+ // stage 7
+ step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd);
+ step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd);
+ step1[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd);
+ step1[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd);
+ step1[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd);
+ step1[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd);
+ step1[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd);
+ step1[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd);
+ step1[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd);
+ step1[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd);
+ step1[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd);
+ step1[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd);
+ step1[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd);
+ step1[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd);
+ step1[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd);
+ step1[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd);
+
+ step1[16] = step2[16];
+ step1[17] = step2[17];
+ step1[18] = step2[18];
+ step1[19] = step2[19];
+ temp1 = (-step2[20] + step2[27]) * cospi_16_64;
+ temp2 = (step2[20] + step2[27]) * cospi_16_64;
+ step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = (-step2[21] + step2[26]) * cospi_16_64;
+ temp2 = (step2[21] + step2[26]) * cospi_16_64;
+ step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = (-step2[22] + step2[25]) * cospi_16_64;
+ temp2 = (step2[22] + step2[25]) * cospi_16_64;
+ step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ temp1 = (-step2[23] + step2[24]) * cospi_16_64;
+ temp2 = (step2[23] + step2[24]) * cospi_16_64;
+ step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
+ step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[28] = step2[28];
+ step1[29] = step2[29];
+ step1[30] = step2[30];
+ step1[31] = step2[31];
+
+ // final stage
+ output[0] = HIGHBD_WRAPLOW(step1[0] + step1[31], bd);
+ output[1] = HIGHBD_WRAPLOW(step1[1] + step1[30], bd);
+ output[2] = HIGHBD_WRAPLOW(step1[2] + step1[29], bd);
+ output[3] = HIGHBD_WRAPLOW(step1[3] + step1[28], bd);
+ output[4] = HIGHBD_WRAPLOW(step1[4] + step1[27], bd);
+ output[5] = HIGHBD_WRAPLOW(step1[5] + step1[26], bd);
+ output[6] = HIGHBD_WRAPLOW(step1[6] + step1[25], bd);
+ output[7] = HIGHBD_WRAPLOW(step1[7] + step1[24], bd);
+ output[8] = HIGHBD_WRAPLOW(step1[8] + step1[23], bd);
+ output[9] = HIGHBD_WRAPLOW(step1[9] + step1[22], bd);
+ output[10] = HIGHBD_WRAPLOW(step1[10] + step1[21], bd);
+ output[11] = HIGHBD_WRAPLOW(step1[11] + step1[20], bd);
+ output[12] = HIGHBD_WRAPLOW(step1[12] + step1[19], bd);
+ output[13] = HIGHBD_WRAPLOW(step1[13] + step1[18], bd);
+ output[14] = HIGHBD_WRAPLOW(step1[14] + step1[17], bd);
+ output[15] = HIGHBD_WRAPLOW(step1[15] + step1[16], bd);
+ output[16] = HIGHBD_WRAPLOW(step1[15] - step1[16], bd);
+ output[17] = HIGHBD_WRAPLOW(step1[14] - step1[17], bd);
+ output[18] = HIGHBD_WRAPLOW(step1[13] - step1[18], bd);
+ output[19] = HIGHBD_WRAPLOW(step1[12] - step1[19], bd);
+ output[20] = HIGHBD_WRAPLOW(step1[11] - step1[20], bd);
+ output[21] = HIGHBD_WRAPLOW(step1[10] - step1[21], bd);
+ output[22] = HIGHBD_WRAPLOW(step1[9] - step1[22], bd);
+ output[23] = HIGHBD_WRAPLOW(step1[8] - step1[23], bd);
+ output[24] = HIGHBD_WRAPLOW(step1[7] - step1[24], bd);
+ output[25] = HIGHBD_WRAPLOW(step1[6] - step1[25], bd);
+ output[26] = HIGHBD_WRAPLOW(step1[5] - step1[26], bd);
+ output[27] = HIGHBD_WRAPLOW(step1[4] - step1[27], bd);
+ output[28] = HIGHBD_WRAPLOW(step1[3] - step1[28], bd);
+ output[29] = HIGHBD_WRAPLOW(step1[2] - step1[29], bd);
+ output[30] = HIGHBD_WRAPLOW(step1[1] - step1[30], bd);
+ output[31] = HIGHBD_WRAPLOW(step1[0] - step1[31], bd);
+}
+
+void vp10_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ tran_low_t out[32 * 32];
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[32], temp_out[32];
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ // Rows
+ for (i = 0; i < 32; ++i) {
+ tran_low_t zero_coeff[16];
+ for (j = 0; j < 16; ++j) zero_coeff[j] = input[2 * j] | input[2 * j + 1];
+ for (j = 0; j < 8; ++j)
+ zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
+ for (j = 0; j < 4; ++j)
+ zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
+ for (j = 0; j < 2; ++j)
+ zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
+
+ if (zero_coeff[0] | zero_coeff[1])
+ highbd_idct32_c(input, outptr, bd);
+ else
+ memset(outptr, 0, sizeof(tran_low_t) * 32);
+ input += 32;
+ outptr += 32;
+ }
+
+ // Columns
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i];
+ highbd_idct32_c(temp_in, temp_out, bd);
+ for (j = 0; j < 32; ++j) {
+ dest[j * stride + i] = highbd_clip_pixel_add(
+ dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+ }
+ }
+}
+
+void vp10_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ tran_low_t out[32 * 32] = { 0 };
+ tran_low_t *outptr = out;
+ int i, j;
+ tran_low_t temp_in[32], temp_out[32];
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ // Rows
+ // Only upper-left 8x8 has non-zero coeff.
+ for (i = 0; i < 8; ++i) {
+ highbd_idct32_c(input, outptr, bd);
+ input += 32;
+ outptr += 32;
+ }
+ // Columns
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i];
+ highbd_idct32_c(temp_in, temp_out, bd);
+ for (j = 0; j < 32; ++j) {
+ dest[j * stride + i] = highbd_clip_pixel_add(
+ dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+ }
+ }
+}
+
+void vp10_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ int i, j;
+ int a1;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ tran_low_t out =
+ HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
+ out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
+ a1 = ROUND_POWER_OF_TWO(out, 6);
+
+ for (j = 0; j < 32; ++j) {
+ for (i = 0; i < 32; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
+ dest += stride;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/vp10_inv_txfm.h b/av1/common/vp10_inv_txfm.h
new file mode 100644
index 0000000..b53db48
--- /dev/null
+++ b/av1/common/vp10_inv_txfm.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_INV_TXFM_H_
+#define VPX_DSP_INV_TXFM_H_
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_dsp/inv_txfm.h"
+#include "aom_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static INLINE tran_high_t check_range(tran_high_t input) {
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+ // For valid input streams, intermediate stage coefficients should always
+ // stay within the range of a signed 16 bit integer. Coefficients can go out
+ // of this range for invalid/corrupt streams. However, strictly checking
+ // this range for every intermediate coefficient can burdensome for a decoder,
+ // therefore the following assertion is only enabled when configured with
+ // --enable-coefficient-range-checking.
+ assert(INT16_MIN <= input);
+ assert(input <= INT16_MAX);
+#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
+ return input;
+}
+
+static INLINE tran_high_t dct_const_round_shift(tran_high_t input) {
+ tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
+ return rv;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE tran_high_t highbd_check_range(tran_high_t input, int bd) {
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+ // For valid highbitdepth streams, intermediate stage coefficients will
+ // stay within the ranges:
+ // - 8 bit: signed 16 bit integer
+ // - 10 bit: signed 18 bit integer
+ // - 12 bit: signed 20 bit integer
+ const int32_t int_max = (1 << (7 + bd)) - 1;
+ const int32_t int_min = -int_max - 1;
+ assert(int_min <= input);
+ assert(input <= int_max);
+ (void)int_min;
+#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
+ (void)bd;
+ return input;
+}
+
+static INLINE tran_high_t highbd_dct_const_round_shift(tran_high_t input) {
+ tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
+ return rv;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_EMULATE_HARDWARE
+// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
+// non-normative method to handle overflows. A stream that causes
+// overflows in the inverse transform is considered invalid,
+// and a hardware implementer is free to choose any reasonable
+// method to handle overflows. However to aid in hardware
+// verification they can use a specific implementation of the
+// WRAPLOW() macro below that is identical to their intended
+// hardware implementation (and also use configure options to trigger
+// the C-implementation of the transform).
+//
+// The particular WRAPLOW implementation below performs strict
+// overflow wrapping to match common hardware implementations.
+// bd of 8 uses trans_low with 16bits, need to remove 16bits
+// bd of 10 uses trans_low with 18bits, need to remove 14bits
+// bd of 12 uses trans_low with 20bits, need to remove 12bits
+// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
+
+#define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16)
+#if CONFIG_VP9_HIGHBITDEPTH
+#define HIGHBD_WRAPLOW(x, bd) \
+ ((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd))
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#else // CONFIG_EMULATE_HARDWARE
+
+#define WRAPLOW(x) ((int32_t)check_range(x))
+#if CONFIG_VP9_HIGHBITDEPTH
+#define HIGHBD_WRAPLOW(x, bd) ((int32_t)highbd_check_range((x), bd))
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#endif // CONFIG_EMULATE_HARDWARE
+
+void vp10_idct4_c(const tran_low_t *input, tran_low_t *output);
+void vp10_idct8_c(const tran_low_t *input, tran_low_t *output);
+void vp10_idct16_c(const tran_low_t *input, tran_low_t *output);
+void vp10_idct32_c(const tran_low_t *input, tran_low_t *output);
+void vp10_iadst4_c(const tran_low_t *input, tran_low_t *output);
+void vp10_iadst8_c(const tran_low_t *input, tran_low_t *output);
+void vp10_iadst16_c(const tran_low_t *input, tran_low_t *output);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vp10_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vp10_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);
+
+void vp10_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vp10_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vp10_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd);
+
+static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
+ int bd) {
+ trans = HIGHBD_WRAPLOW(trans, bd);
+ return clip_pixel_highbd(dest + (int)trans, bd);
+}
+#endif
+
+static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
+ trans = WRAPLOW(trans);
+ return clip_pixel(dest + (int)trans);
+}
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif // VPX_DSP_INV_TXFM_H_
diff --git a/av1/common/vp10_inv_txfm1d.c b/av1/common/vp10_inv_txfm1d.c
new file mode 100644
index 0000000..76fb623
--- /dev/null
+++ b/av1/common/vp10_inv_txfm1d.c
@@ -0,0 +1,1537 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include "av1/common/vp10_inv_txfm1d.h"
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+#define range_check(stage, input, buf, size, bit) \
+ { \
+ int i, j; \
+ for (i = 0; i < size; ++i) { \
+ int buf_bit = get_max_bit(abs(buf[i])) + 1; \
+ if (buf_bit > bit) { \
+ printf("======== %s %d overflow ========\n", __FILE__, __LINE__); \
+ printf("stage: %d node: %d\n", stage, i); \
+ printf("bit: %d buf_bit: %d buf[i]: %d\n", bit, buf_bit, buf[i]); \
+ printf("input:\n"); \
+ for (j = 0; j < size; j++) { \
+ printf("%d,", input[j]); \
+ } \
+ printf("\n"); \
+ assert(0); \
+ } \
+ } \
+ }
+#else
+#define range_check(stage, input, buf, size, bit) \
+ { \
+ (void) stage; \
+ (void) input; \
+ (void) buf; \
+ (void) size; \
+ (void) bit; \
+ }
+#endif
+
+void vp10_idct4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 4;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[4];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[2];
+ bf1[2] = input[1];
+ bf1[3] = input[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_idct8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 8;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[8];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[4];
+ bf1[2] = input[2];
+ bf1[3] = input[6];
+ bf1[4] = input[1];
+ bf1[5] = input[5];
+ bf1[6] = input[3];
+ bf1[7] = input[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = bf0[4] - bf0[5];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[6] + bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = bf0[3] - bf0[4];
+ bf1[5] = bf0[2] - bf0[5];
+ bf1[6] = bf0[1] - bf0[6];
+ bf1[7] = bf0[0] - bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_idct16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 16;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[16];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[8];
+ bf1[2] = input[4];
+ bf1[3] = input[12];
+ bf1[4] = input[2];
+ bf1[5] = input[10];
+ bf1[6] = input[6];
+ bf1[7] = input[14];
+ bf1[8] = input[1];
+ bf1[9] = input[9];
+ bf1[10] = input[5];
+ bf1[11] = input[13];
+ bf1[12] = input[3];
+ bf1[13] = input[11];
+ bf1[14] = input[7];
+ bf1[15] = input[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = bf0[8] - bf0[9];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[10] + bf0[11];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = bf0[12] - bf0[13];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[14] + bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = bf0[4] - bf0[5];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[6] + bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = bf0[9] - bf0[10];
+ bf1[11] = bf0[8] - bf0[11];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[13] + bf0[14];
+ bf1[15] = bf0[12] + bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = bf0[3] - bf0[4];
+ bf1[5] = bf0[2] - bf0[5];
+ bf1[6] = bf0[1] - bf0[6];
+ bf1[7] = bf0[0] - bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = bf0[7] - bf0[8];
+ bf1[9] = bf0[6] - bf0[9];
+ bf1[10] = bf0[5] - bf0[10];
+ bf1[11] = bf0[4] - bf0[11];
+ bf1[12] = bf0[3] - bf0[12];
+ bf1[13] = bf0[2] - bf0[13];
+ bf1[14] = bf0[1] - bf0[14];
+ bf1[15] = bf0[0] - bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_idct32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 32;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[32];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = input[16];
+ bf1[2] = input[8];
+ bf1[3] = input[24];
+ bf1[4] = input[4];
+ bf1[5] = input[20];
+ bf1[6] = input[12];
+ bf1[7] = input[28];
+ bf1[8] = input[2];
+ bf1[9] = input[18];
+ bf1[10] = input[10];
+ bf1[11] = input[26];
+ bf1[12] = input[6];
+ bf1[13] = input[22];
+ bf1[14] = input[14];
+ bf1[15] = input[30];
+ bf1[16] = input[1];
+ bf1[17] = input[17];
+ bf1[18] = input[9];
+ bf1[19] = input[25];
+ bf1[20] = input[5];
+ bf1[21] = input[21];
+ bf1[22] = input[13];
+ bf1[23] = input[29];
+ bf1[24] = input[3];
+ bf1[25] = input[19];
+ bf1[26] = input[11];
+ bf1[27] = input[27];
+ bf1[28] = input[7];
+ bf1[29] = input[23];
+ bf1[30] = input[15];
+ bf1[31] = input[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[62], bf0[16], -cospi[2], bf0[31], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[30], bf0[17], -cospi[34], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[46], bf0[18], -cospi[18], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[14], bf0[19], -cospi[50], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[54], bf0[20], -cospi[10], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[22], bf0[21], -cospi[42], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[38], bf0[22], -cospi[26], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[6], bf0[23], -cospi[58], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[58], bf0[23], cospi[6], bf0[24], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[26], bf0[22], cospi[38], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[42], bf0[21], cospi[22], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[10], bf0[20], cospi[54], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[50], bf0[19], cospi[14], bf0[28], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[18], bf0[18], cospi[46], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[34], bf0[17], cospi[30], bf0[30], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[2], bf0[16], cospi[62], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[60], bf0[8], -cospi[4], bf0[15], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[28], bf0[9], -cospi[36], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[44], bf0[10], -cospi[20], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[12], bf0[11], -cospi[52], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[52], bf0[11], cospi[12], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[20], bf0[10], cospi[44], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[36], bf0[9], cospi[28], bf0[14], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[4], bf0[8], cospi[60], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16] + bf0[17];
+ bf1[17] = bf0[16] - bf0[17];
+ bf1[18] = -bf0[18] + bf0[19];
+ bf1[19] = bf0[18] + bf0[19];
+ bf1[20] = bf0[20] + bf0[21];
+ bf1[21] = bf0[20] - bf0[21];
+ bf1[22] = -bf0[22] + bf0[23];
+ bf1[23] = bf0[22] + bf0[23];
+ bf1[24] = bf0[24] + bf0[25];
+ bf1[25] = bf0[24] - bf0[25];
+ bf1[26] = -bf0[26] + bf0[27];
+ bf1[27] = bf0[26] + bf0[27];
+ bf1[28] = bf0[28] + bf0[29];
+ bf1[29] = bf0[28] - bf0[29];
+ bf1[30] = -bf0[30] + bf0[31];
+ bf1[31] = bf0[30] + bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[56], bf0[4], -cospi[8], bf0[7], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[24], bf0[5], -cospi[40], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[40], bf0[5], cospi[24], bf0[6], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[8], bf0[4], cospi[56], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8] + bf0[9];
+ bf1[9] = bf0[8] - bf0[9];
+ bf1[10] = -bf0[10] + bf0[11];
+ bf1[11] = bf0[10] + bf0[11];
+ bf1[12] = bf0[12] + bf0[13];
+ bf1[13] = bf0[12] - bf0[13];
+ bf1[14] = -bf0[14] + bf0[15];
+ bf1[15] = bf0[14] + bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit[stage]);
+ bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit[stage]);
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit[stage]);
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = half_btf(-cospi[40], bf0[22], cospi[24], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[24], bf0[21], cospi[40], bf0[26], cos_bit[stage]);
+ bf1[27] = bf0[27];
+ bf1[28] = bf0[28];
+ bf1[29] = half_btf(-cospi[8], bf0[18], cospi[56], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[56], bf0[17], cospi[8], bf0[30], cos_bit[stage]);
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[32], bf0[0], -cospi[32], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[48], bf0[2], -cospi[16], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[16], bf0[2], cospi[48], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4] + bf0[5];
+ bf1[5] = bf0[4] - bf0[5];
+ bf1[6] = -bf0[6] + bf0[7];
+ bf1[7] = bf0[6] + bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit[stage]);
+ bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = half_btf(-cospi[16], bf0[10], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[48], bf0[9], cospi[16], bf0[14], cos_bit[stage]);
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[19];
+ bf1[17] = bf0[17] + bf0[18];
+ bf1[18] = bf0[17] - bf0[18];
+ bf1[19] = bf0[16] - bf0[19];
+ bf1[20] = -bf0[20] + bf0[23];
+ bf1[21] = -bf0[21] + bf0[22];
+ bf1[22] = bf0[21] + bf0[22];
+ bf1[23] = bf0[20] + bf0[23];
+ bf1[24] = bf0[24] + bf0[27];
+ bf1[25] = bf0[25] + bf0[26];
+ bf1[26] = bf0[25] - bf0[26];
+ bf1[27] = bf0[24] - bf0[27];
+ bf1[28] = -bf0[28] + bf0[31];
+ bf1[29] = -bf0[29] + bf0[30];
+ bf1[30] = bf0[29] + bf0[30];
+ bf1[31] = bf0[28] + bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[3];
+ bf1[1] = bf0[1] + bf0[2];
+ bf1[2] = bf0[1] - bf0[2];
+ bf1[3] = bf0[0] - bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[32], bf0[5], cospi[32], bf0[6], cos_bit[stage]);
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8] + bf0[11];
+ bf1[9] = bf0[9] + bf0[10];
+ bf1[10] = bf0[9] - bf0[10];
+ bf1[11] = bf0[8] - bf0[11];
+ bf1[12] = -bf0[12] + bf0[15];
+ bf1[13] = -bf0[13] + bf0[14];
+ bf1[14] = bf0[13] + bf0[14];
+ bf1[15] = bf0[12] + bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit[stage]);
+ bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit[stage]);
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(-cospi[16], bf0[21], cospi[48], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(-cospi[16], bf0[20], cospi[48], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[48], bf0[19], cospi[16], bf0[28], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[48], bf0[18], cospi[16], bf0[29], cos_bit[stage]);
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[7];
+ bf1[1] = bf0[1] + bf0[6];
+ bf1[2] = bf0[2] + bf0[5];
+ bf1[3] = bf0[3] + bf0[4];
+ bf1[4] = bf0[3] - bf0[4];
+ bf1[5] = bf0[2] - bf0[5];
+ bf1[6] = bf0[1] - bf0[6];
+ bf1[7] = bf0[0] - bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[32], bf0[11], cospi[32], bf0[12], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[32], bf0[10], cospi[32], bf0[13], cos_bit[stage]);
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = bf0[16] + bf0[23];
+ bf1[17] = bf0[17] + bf0[22];
+ bf1[18] = bf0[18] + bf0[21];
+ bf1[19] = bf0[19] + bf0[20];
+ bf1[20] = bf0[19] - bf0[20];
+ bf1[21] = bf0[18] - bf0[21];
+ bf1[22] = bf0[17] - bf0[22];
+ bf1[23] = bf0[16] - bf0[23];
+ bf1[24] = -bf0[24] + bf0[31];
+ bf1[25] = -bf0[25] + bf0[30];
+ bf1[26] = -bf0[26] + bf0[29];
+ bf1[27] = -bf0[27] + bf0[28];
+ bf1[28] = bf0[27] + bf0[28];
+ bf1[29] = bf0[26] + bf0[29];
+ bf1[30] = bf0[25] + bf0[30];
+ bf1[31] = bf0[24] + bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0] + bf0[15];
+ bf1[1] = bf0[1] + bf0[14];
+ bf1[2] = bf0[2] + bf0[13];
+ bf1[3] = bf0[3] + bf0[12];
+ bf1[4] = bf0[4] + bf0[11];
+ bf1[5] = bf0[5] + bf0[10];
+ bf1[6] = bf0[6] + bf0[9];
+ bf1[7] = bf0[7] + bf0[8];
+ bf1[8] = bf0[7] - bf0[8];
+ bf1[9] = bf0[6] - bf0[9];
+ bf1[10] = bf0[5] - bf0[10];
+ bf1[11] = bf0[4] - bf0[11];
+ bf1[12] = bf0[3] - bf0[12];
+ bf1[13] = bf0[2] - bf0[13];
+ bf1[14] = bf0[1] - bf0[14];
+ bf1[15] = bf0[0] - bf0[15];
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[32], bf0[23], cospi[32], bf0[24], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[32], bf0[22], cospi[32], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[32], bf0[21], cospi[32], bf0[26], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[20], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = bf0[30];
+ bf1[31] = bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[31];
+ bf1[1] = bf0[1] + bf0[30];
+ bf1[2] = bf0[2] + bf0[29];
+ bf1[3] = bf0[3] + bf0[28];
+ bf1[4] = bf0[4] + bf0[27];
+ bf1[5] = bf0[5] + bf0[26];
+ bf1[6] = bf0[6] + bf0[25];
+ bf1[7] = bf0[7] + bf0[24];
+ bf1[8] = bf0[8] + bf0[23];
+ bf1[9] = bf0[9] + bf0[22];
+ bf1[10] = bf0[10] + bf0[21];
+ bf1[11] = bf0[11] + bf0[20];
+ bf1[12] = bf0[12] + bf0[19];
+ bf1[13] = bf0[13] + bf0[18];
+ bf1[14] = bf0[14] + bf0[17];
+ bf1[15] = bf0[15] + bf0[16];
+ bf1[16] = bf0[15] - bf0[16];
+ bf1[17] = bf0[14] - bf0[17];
+ bf1[18] = bf0[13] - bf0[18];
+ bf1[19] = bf0[12] - bf0[19];
+ bf1[20] = bf0[11] - bf0[20];
+ bf1[21] = bf0[10] - bf0[21];
+ bf1[22] = bf0[9] - bf0[22];
+ bf1[23] = bf0[8] - bf0[23];
+ bf1[24] = bf0[7] - bf0[24];
+ bf1[25] = bf0[6] - bf0[25];
+ bf1[26] = bf0[5] - bf0[26];
+ bf1[27] = bf0[4] - bf0[27];
+ bf1[28] = bf0[3] - bf0[28];
+ bf1[29] = bf0[2] - bf0[29];
+ bf1[30] = bf0[1] - bf0[30];
+ bf1[31] = bf0[0] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 4;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[4];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = -input[3];
+ bf1[2] = -input[1];
+ bf1[3] = input[2];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = bf0[0] - bf0[2];
+ bf1[3] = bf0[1] - bf0[3];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[8], bf0[0], cospi[56], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[56], bf0[0], -cospi[8], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[40], bf0[2], cospi[24], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[24], bf0[2], -cospi[40], bf0[3], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[1];
+ bf1[1] = bf0[2];
+ bf1[2] = bf0[3];
+ bf1[3] = bf0[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 8;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[8];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = -input[7];
+ bf1[2] = -input[3];
+ bf1[3] = input[4];
+ bf1[4] = -input[1];
+ bf1[5] = input[6];
+ bf1[6] = input[2];
+ bf1[7] = -input[5];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = bf0[0] - bf0[2];
+ bf1[3] = bf0[1] - bf0[3];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = bf0[4] - bf0[6];
+ bf1[7] = bf0[5] - bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = bf0[0] - bf0[4];
+ bf1[5] = bf0[1] - bf0[5];
+ bf1[6] = bf0[2] - bf0[6];
+ bf1[7] = bf0[3] - bf0[7];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[1];
+ bf1[1] = bf0[6];
+ bf1[2] = bf0[3];
+ bf1[3] = bf0[4];
+ bf1[4] = bf0[5];
+ bf1[5] = bf0[2];
+ bf1[6] = bf0[7];
+ bf1[7] = bf0[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 16;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[16];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = -input[15];
+ bf1[2] = -input[7];
+ bf1[3] = input[8];
+ bf1[4] = -input[3];
+ bf1[5] = input[12];
+ bf1[6] = input[4];
+ bf1[7] = -input[11];
+ bf1[8] = -input[1];
+ bf1[9] = input[14];
+ bf1[10] = input[6];
+ bf1[11] = -input[9];
+ bf1[12] = input[2];
+ bf1[13] = -input[13];
+ bf1[14] = -input[5];
+ bf1[15] = input[10];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]);
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = bf0[0] - bf0[2];
+ bf1[3] = bf0[1] - bf0[3];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = bf0[4] - bf0[6];
+ bf1[7] = bf0[5] - bf0[7];
+ bf1[8] = bf0[8] + bf0[10];
+ bf1[9] = bf0[9] + bf0[11];
+ bf1[10] = bf0[8] - bf0[10];
+ bf1[11] = bf0[9] - bf0[11];
+ bf1[12] = bf0[12] + bf0[14];
+ bf1[13] = bf0[13] + bf0[15];
+ bf1[14] = bf0[12] - bf0[14];
+ bf1[15] = bf0[13] - bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = bf0[0] - bf0[4];
+ bf1[5] = bf0[1] - bf0[5];
+ bf1[6] = bf0[2] - bf0[6];
+ bf1[7] = bf0[3] - bf0[7];
+ bf1[8] = bf0[8] + bf0[12];
+ bf1[9] = bf0[9] + bf0[13];
+ bf1[10] = bf0[10] + bf0[14];
+ bf1[11] = bf0[11] + bf0[15];
+ bf1[12] = bf0[8] - bf0[12];
+ bf1[13] = bf0[9] - bf0[13];
+ bf1[14] = bf0[10] - bf0[14];
+ bf1[15] = bf0[11] - bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]);
+ bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[8];
+ bf1[1] = bf0[1] + bf0[9];
+ bf1[2] = bf0[2] + bf0[10];
+ bf1[3] = bf0[3] + bf0[11];
+ bf1[4] = bf0[4] + bf0[12];
+ bf1[5] = bf0[5] + bf0[13];
+ bf1[6] = bf0[6] + bf0[14];
+ bf1[7] = bf0[7] + bf0[15];
+ bf1[8] = bf0[0] - bf0[8];
+ bf1[9] = bf0[1] - bf0[9];
+ bf1[10] = bf0[2] - bf0[10];
+ bf1[11] = bf0[3] - bf0[11];
+ bf1[12] = bf0[4] - bf0[12];
+ bf1[13] = bf0[5] - bf0[13];
+ bf1[14] = bf0[6] - bf0[14];
+ bf1[15] = bf0[7] - bf0[15];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit[stage]);
+ bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[1];
+ bf1[1] = bf0[14];
+ bf1[2] = bf0[3];
+ bf1[3] = bf0[12];
+ bf1[4] = bf0[5];
+ bf1[5] = bf0[10];
+ bf1[6] = bf0[7];
+ bf1[7] = bf0[8];
+ bf1[8] = bf0[9];
+ bf1[9] = bf0[6];
+ bf1[10] = bf0[11];
+ bf1[11] = bf0[4];
+ bf1[12] = bf0[13];
+ bf1[13] = bf0[2];
+ bf1[14] = bf0[15];
+ bf1[15] = bf0[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
+
+void vp10_iadst32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int32_t size = 32;
+ const int32_t *cospi;
+
+ int32_t stage = 0;
+ int32_t *bf0, *bf1;
+ int32_t step[32];
+
+ // stage 0;
+ range_check(stage, input, input, size, stage_range[stage]);
+
+ // stage 1;
+ stage++;
+ bf1 = output;
+ bf1[0] = input[0];
+ bf1[1] = -input[31];
+ bf1[2] = -input[15];
+ bf1[3] = input[16];
+ bf1[4] = -input[7];
+ bf1[5] = input[24];
+ bf1[6] = input[8];
+ bf1[7] = -input[23];
+ bf1[8] = -input[3];
+ bf1[9] = input[28];
+ bf1[10] = input[12];
+ bf1[11] = -input[19];
+ bf1[12] = input[4];
+ bf1[13] = -input[27];
+ bf1[14] = -input[11];
+ bf1[15] = input[20];
+ bf1[16] = -input[1];
+ bf1[17] = input[30];
+ bf1[18] = input[14];
+ bf1[19] = -input[17];
+ bf1[20] = input[6];
+ bf1[21] = -input[25];
+ bf1[22] = -input[9];
+ bf1[23] = input[22];
+ bf1[24] = input[2];
+ bf1[25] = -input[29];
+ bf1[26] = -input[13];
+ bf1[27] = input[18];
+ bf1[28] = -input[5];
+ bf1[29] = input[26];
+ bf1[30] = input[10];
+ bf1[31] = -input[21];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 2
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit[stage]);
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit[stage]);
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = half_btf(cospi[32], bf0[18], cospi[32], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[32], bf0[18], -cospi[32], bf0[19], cos_bit[stage]);
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = half_btf(cospi[32], bf0[22], cospi[32], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[32], bf0[22], -cospi[32], bf0[23], cos_bit[stage]);
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[32], bf0[26], -cospi[32], bf0[27], cos_bit[stage]);
+ bf1[28] = bf0[28];
+ bf1[29] = bf0[29];
+ bf1[30] = half_btf(cospi[32], bf0[30], cospi[32], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[32], bf0[30], -cospi[32], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 3
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[2];
+ bf1[1] = bf0[1] + bf0[3];
+ bf1[2] = bf0[0] - bf0[2];
+ bf1[3] = bf0[1] - bf0[3];
+ bf1[4] = bf0[4] + bf0[6];
+ bf1[5] = bf0[5] + bf0[7];
+ bf1[6] = bf0[4] - bf0[6];
+ bf1[7] = bf0[5] - bf0[7];
+ bf1[8] = bf0[8] + bf0[10];
+ bf1[9] = bf0[9] + bf0[11];
+ bf1[10] = bf0[8] - bf0[10];
+ bf1[11] = bf0[9] - bf0[11];
+ bf1[12] = bf0[12] + bf0[14];
+ bf1[13] = bf0[13] + bf0[15];
+ bf1[14] = bf0[12] - bf0[14];
+ bf1[15] = bf0[13] - bf0[15];
+ bf1[16] = bf0[16] + bf0[18];
+ bf1[17] = bf0[17] + bf0[19];
+ bf1[18] = bf0[16] - bf0[18];
+ bf1[19] = bf0[17] - bf0[19];
+ bf1[20] = bf0[20] + bf0[22];
+ bf1[21] = bf0[21] + bf0[23];
+ bf1[22] = bf0[20] - bf0[22];
+ bf1[23] = bf0[21] - bf0[23];
+ bf1[24] = bf0[24] + bf0[26];
+ bf1[25] = bf0[25] + bf0[27];
+ bf1[26] = bf0[24] - bf0[26];
+ bf1[27] = bf0[25] - bf0[27];
+ bf1[28] = bf0[28] + bf0[30];
+ bf1[29] = bf0[29] + bf0[31];
+ bf1[30] = bf0[28] - bf0[30];
+ bf1[31] = bf0[29] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 4
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit[stage]);
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = half_btf(cospi[16], bf0[20], cospi[48], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[48], bf0[20], -cospi[16], bf0[21], cos_bit[stage]);
+ bf1[22] = half_btf(-cospi[48], bf0[22], cospi[16], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[16], bf0[22], cospi[48], bf0[23], cos_bit[stage]);
+ bf1[24] = bf0[24];
+ bf1[25] = bf0[25];
+ bf1[26] = bf0[26];
+ bf1[27] = bf0[27];
+ bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[48], bf0[28], -cospi[16], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[48], bf0[30], cospi[16], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[16], bf0[30], cospi[48], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 5
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[4];
+ bf1[1] = bf0[1] + bf0[5];
+ bf1[2] = bf0[2] + bf0[6];
+ bf1[3] = bf0[3] + bf0[7];
+ bf1[4] = bf0[0] - bf0[4];
+ bf1[5] = bf0[1] - bf0[5];
+ bf1[6] = bf0[2] - bf0[6];
+ bf1[7] = bf0[3] - bf0[7];
+ bf1[8] = bf0[8] + bf0[12];
+ bf1[9] = bf0[9] + bf0[13];
+ bf1[10] = bf0[10] + bf0[14];
+ bf1[11] = bf0[11] + bf0[15];
+ bf1[12] = bf0[8] - bf0[12];
+ bf1[13] = bf0[9] - bf0[13];
+ bf1[14] = bf0[10] - bf0[14];
+ bf1[15] = bf0[11] - bf0[15];
+ bf1[16] = bf0[16] + bf0[20];
+ bf1[17] = bf0[17] + bf0[21];
+ bf1[18] = bf0[18] + bf0[22];
+ bf1[19] = bf0[19] + bf0[23];
+ bf1[20] = bf0[16] - bf0[20];
+ bf1[21] = bf0[17] - bf0[21];
+ bf1[22] = bf0[18] - bf0[22];
+ bf1[23] = bf0[19] - bf0[23];
+ bf1[24] = bf0[24] + bf0[28];
+ bf1[25] = bf0[25] + bf0[29];
+ bf1[26] = bf0[26] + bf0[30];
+ bf1[27] = bf0[27] + bf0[31];
+ bf1[28] = bf0[24] - bf0[28];
+ bf1[29] = bf0[25] - bf0[29];
+ bf1[30] = bf0[26] - bf0[30];
+ bf1[31] = bf0[27] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 6
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit[stage]);
+ bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit[stage]);
+ bf1[16] = bf0[16];
+ bf1[17] = bf0[17];
+ bf1[18] = bf0[18];
+ bf1[19] = bf0[19];
+ bf1[20] = bf0[20];
+ bf1[21] = bf0[21];
+ bf1[22] = bf0[22];
+ bf1[23] = bf0[23];
+ bf1[24] = half_btf(cospi[8], bf0[24], cospi[56], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[56], bf0[24], -cospi[8], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[24], bf0[26], -cospi[40], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(-cospi[56], bf0[28], cospi[8], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[8], bf0[28], cospi[56], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[24], bf0[30], cospi[40], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[40], bf0[30], cospi[24], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 7
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[8];
+ bf1[1] = bf0[1] + bf0[9];
+ bf1[2] = bf0[2] + bf0[10];
+ bf1[3] = bf0[3] + bf0[11];
+ bf1[4] = bf0[4] + bf0[12];
+ bf1[5] = bf0[5] + bf0[13];
+ bf1[6] = bf0[6] + bf0[14];
+ bf1[7] = bf0[7] + bf0[15];
+ bf1[8] = bf0[0] - bf0[8];
+ bf1[9] = bf0[1] - bf0[9];
+ bf1[10] = bf0[2] - bf0[10];
+ bf1[11] = bf0[3] - bf0[11];
+ bf1[12] = bf0[4] - bf0[12];
+ bf1[13] = bf0[5] - bf0[13];
+ bf1[14] = bf0[6] - bf0[14];
+ bf1[15] = bf0[7] - bf0[15];
+ bf1[16] = bf0[16] + bf0[24];
+ bf1[17] = bf0[17] + bf0[25];
+ bf1[18] = bf0[18] + bf0[26];
+ bf1[19] = bf0[19] + bf0[27];
+ bf1[20] = bf0[20] + bf0[28];
+ bf1[21] = bf0[21] + bf0[29];
+ bf1[22] = bf0[22] + bf0[30];
+ bf1[23] = bf0[23] + bf0[31];
+ bf1[24] = bf0[16] - bf0[24];
+ bf1[25] = bf0[17] - bf0[25];
+ bf1[26] = bf0[18] - bf0[26];
+ bf1[27] = bf0[19] - bf0[27];
+ bf1[28] = bf0[20] - bf0[28];
+ bf1[29] = bf0[21] - bf0[29];
+ bf1[30] = bf0[22] - bf0[30];
+ bf1[31] = bf0[23] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 8
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = bf0[0];
+ bf1[1] = bf0[1];
+ bf1[2] = bf0[2];
+ bf1[3] = bf0[3];
+ bf1[4] = bf0[4];
+ bf1[5] = bf0[5];
+ bf1[6] = bf0[6];
+ bf1[7] = bf0[7];
+ bf1[8] = bf0[8];
+ bf1[9] = bf0[9];
+ bf1[10] = bf0[10];
+ bf1[11] = bf0[11];
+ bf1[12] = bf0[12];
+ bf1[13] = bf0[13];
+ bf1[14] = bf0[14];
+ bf1[15] = bf0[15];
+ bf1[16] = half_btf(cospi[4], bf0[16], cospi[60], bf0[17], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[60], bf0[16], -cospi[4], bf0[17], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[20], bf0[18], cospi[44], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[44], bf0[18], -cospi[20], bf0[19], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[36], bf0[20], cospi[28], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[28], bf0[20], -cospi[36], bf0[21], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[52], bf0[22], cospi[12], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[12], bf0[22], -cospi[52], bf0[23], cos_bit[stage]);
+ bf1[24] = half_btf(-cospi[60], bf0[24], cospi[4], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[4], bf0[24], cospi[60], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(-cospi[44], bf0[26], cospi[20], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[20], bf0[26], cospi[44], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(-cospi[28], bf0[28], cospi[36], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[36], bf0[28], cospi[28], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(-cospi[12], bf0[30], cospi[52], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[52], bf0[30], cospi[12], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 9
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[0] + bf0[16];
+ bf1[1] = bf0[1] + bf0[17];
+ bf1[2] = bf0[2] + bf0[18];
+ bf1[3] = bf0[3] + bf0[19];
+ bf1[4] = bf0[4] + bf0[20];
+ bf1[5] = bf0[5] + bf0[21];
+ bf1[6] = bf0[6] + bf0[22];
+ bf1[7] = bf0[7] + bf0[23];
+ bf1[8] = bf0[8] + bf0[24];
+ bf1[9] = bf0[9] + bf0[25];
+ bf1[10] = bf0[10] + bf0[26];
+ bf1[11] = bf0[11] + bf0[27];
+ bf1[12] = bf0[12] + bf0[28];
+ bf1[13] = bf0[13] + bf0[29];
+ bf1[14] = bf0[14] + bf0[30];
+ bf1[15] = bf0[15] + bf0[31];
+ bf1[16] = bf0[0] - bf0[16];
+ bf1[17] = bf0[1] - bf0[17];
+ bf1[18] = bf0[2] - bf0[18];
+ bf1[19] = bf0[3] - bf0[19];
+ bf1[20] = bf0[4] - bf0[20];
+ bf1[21] = bf0[5] - bf0[21];
+ bf1[22] = bf0[6] - bf0[22];
+ bf1[23] = bf0[7] - bf0[23];
+ bf1[24] = bf0[8] - bf0[24];
+ bf1[25] = bf0[9] - bf0[25];
+ bf1[26] = bf0[10] - bf0[26];
+ bf1[27] = bf0[11] - bf0[27];
+ bf1[28] = bf0[12] - bf0[28];
+ bf1[29] = bf0[13] - bf0[29];
+ bf1[30] = bf0[14] - bf0[30];
+ bf1[31] = bf0[15] - bf0[31];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 10
+ stage++;
+ cospi = cospi_arr[cos_bit[stage] - cos_bit_min];
+ bf0 = output;
+ bf1 = step;
+ bf1[0] = half_btf(cospi[1], bf0[0], cospi[63], bf0[1], cos_bit[stage]);
+ bf1[1] = half_btf(cospi[63], bf0[0], -cospi[1], bf0[1], cos_bit[stage]);
+ bf1[2] = half_btf(cospi[5], bf0[2], cospi[59], bf0[3], cos_bit[stage]);
+ bf1[3] = half_btf(cospi[59], bf0[2], -cospi[5], bf0[3], cos_bit[stage]);
+ bf1[4] = half_btf(cospi[9], bf0[4], cospi[55], bf0[5], cos_bit[stage]);
+ bf1[5] = half_btf(cospi[55], bf0[4], -cospi[9], bf0[5], cos_bit[stage]);
+ bf1[6] = half_btf(cospi[13], bf0[6], cospi[51], bf0[7], cos_bit[stage]);
+ bf1[7] = half_btf(cospi[51], bf0[6], -cospi[13], bf0[7], cos_bit[stage]);
+ bf1[8] = half_btf(cospi[17], bf0[8], cospi[47], bf0[9], cos_bit[stage]);
+ bf1[9] = half_btf(cospi[47], bf0[8], -cospi[17], bf0[9], cos_bit[stage]);
+ bf1[10] = half_btf(cospi[21], bf0[10], cospi[43], bf0[11], cos_bit[stage]);
+ bf1[11] = half_btf(cospi[43], bf0[10], -cospi[21], bf0[11], cos_bit[stage]);
+ bf1[12] = half_btf(cospi[25], bf0[12], cospi[39], bf0[13], cos_bit[stage]);
+ bf1[13] = half_btf(cospi[39], bf0[12], -cospi[25], bf0[13], cos_bit[stage]);
+ bf1[14] = half_btf(cospi[29], bf0[14], cospi[35], bf0[15], cos_bit[stage]);
+ bf1[15] = half_btf(cospi[35], bf0[14], -cospi[29], bf0[15], cos_bit[stage]);
+ bf1[16] = half_btf(cospi[33], bf0[16], cospi[31], bf0[17], cos_bit[stage]);
+ bf1[17] = half_btf(cospi[31], bf0[16], -cospi[33], bf0[17], cos_bit[stage]);
+ bf1[18] = half_btf(cospi[37], bf0[18], cospi[27], bf0[19], cos_bit[stage]);
+ bf1[19] = half_btf(cospi[27], bf0[18], -cospi[37], bf0[19], cos_bit[stage]);
+ bf1[20] = half_btf(cospi[41], bf0[20], cospi[23], bf0[21], cos_bit[stage]);
+ bf1[21] = half_btf(cospi[23], bf0[20], -cospi[41], bf0[21], cos_bit[stage]);
+ bf1[22] = half_btf(cospi[45], bf0[22], cospi[19], bf0[23], cos_bit[stage]);
+ bf1[23] = half_btf(cospi[19], bf0[22], -cospi[45], bf0[23], cos_bit[stage]);
+ bf1[24] = half_btf(cospi[49], bf0[24], cospi[15], bf0[25], cos_bit[stage]);
+ bf1[25] = half_btf(cospi[15], bf0[24], -cospi[49], bf0[25], cos_bit[stage]);
+ bf1[26] = half_btf(cospi[53], bf0[26], cospi[11], bf0[27], cos_bit[stage]);
+ bf1[27] = half_btf(cospi[11], bf0[26], -cospi[53], bf0[27], cos_bit[stage]);
+ bf1[28] = half_btf(cospi[57], bf0[28], cospi[7], bf0[29], cos_bit[stage]);
+ bf1[29] = half_btf(cospi[7], bf0[28], -cospi[57], bf0[29], cos_bit[stage]);
+ bf1[30] = half_btf(cospi[61], bf0[30], cospi[3], bf0[31], cos_bit[stage]);
+ bf1[31] = half_btf(cospi[3], bf0[30], -cospi[61], bf0[31], cos_bit[stage]);
+ range_check(stage, input, bf1, size, stage_range[stage]);
+
+ // stage 11
+ stage++;
+ bf0 = step;
+ bf1 = output;
+ bf1[0] = bf0[1];
+ bf1[1] = bf0[30];
+ bf1[2] = bf0[3];
+ bf1[3] = bf0[28];
+ bf1[4] = bf0[5];
+ bf1[5] = bf0[26];
+ bf1[6] = bf0[7];
+ bf1[7] = bf0[24];
+ bf1[8] = bf0[9];
+ bf1[9] = bf0[22];
+ bf1[10] = bf0[11];
+ bf1[11] = bf0[20];
+ bf1[12] = bf0[13];
+ bf1[13] = bf0[18];
+ bf1[14] = bf0[15];
+ bf1[15] = bf0[16];
+ bf1[16] = bf0[17];
+ bf1[17] = bf0[14];
+ bf1[18] = bf0[19];
+ bf1[19] = bf0[12];
+ bf1[20] = bf0[21];
+ bf1[21] = bf0[10];
+ bf1[22] = bf0[23];
+ bf1[23] = bf0[8];
+ bf1[24] = bf0[25];
+ bf1[25] = bf0[6];
+ bf1[26] = bf0[27];
+ bf1[27] = bf0[4];
+ bf1[28] = bf0[29];
+ bf1[29] = bf0[2];
+ bf1[30] = bf0[31];
+ bf1[31] = bf0[0];
+ range_check(stage, input, bf1, size, stage_range[stage]);
+}
diff --git a/av1/common/vp10_inv_txfm1d.h b/av1/common/vp10_inv_txfm1d.h
new file mode 100644
index 0000000..21b80bf
--- /dev/null
+++ b/av1/common/vp10_inv_txfm1d.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_INV_TXFM1D_H_
+#define VP10_INV_TXFM1D_H_
+
+#include "av1/common/vp10_txfm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_idct4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct64_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_iadst4_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst8_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst16_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst32_new(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // VP10_INV_TXFM1D_H_
diff --git a/av1/common/vp10_inv_txfm2d.c b/av1/common/vp10_inv_txfm2d.c
new file mode 100644
index 0000000..60606c9
--- /dev/null
+++ b/av1/common/vp10_inv_txfm2d.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "av1/common/enums.h"
+#include "av1/common/vp10_txfm.h"
+#include "av1/common/vp10_inv_txfm1d.h"
+#include "av1/common/vp10_inv_txfm2d_cfg.h"
+
+static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
+ switch (txfm_type) {
+ case TXFM_TYPE_DCT4: return vp10_idct4_new;
+ case TXFM_TYPE_DCT8: return vp10_idct8_new;
+ case TXFM_TYPE_DCT16: return vp10_idct16_new;
+ case TXFM_TYPE_DCT32: return vp10_idct32_new;
+ case TXFM_TYPE_ADST4: return vp10_iadst4_new;
+ case TXFM_TYPE_ADST8: return vp10_iadst8_new;
+ case TXFM_TYPE_ADST16: return vp10_iadst16_new;
+ case TXFM_TYPE_ADST32: return vp10_iadst32_new;
+ default: assert(0); return NULL;
+ }
+}
+
+#if CONFIG_EXT_TX
+static const TXFM_2D_CFG *inv_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = {
+ { &inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_dct_8,
+ &inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_dct_32 },
+ { &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
+ &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
+ { &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
+ &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
+ { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+ &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+ { &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
+ &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
+ { &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
+ &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
+ { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+ &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+ { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+ &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+ { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+ &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+};
+#else
+static const TXFM_2D_CFG *inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
+ { &inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_dct_8,
+ &inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_dct_32 },
+ { &inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
+ &inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
+ { &inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
+ &inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
+ { &inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
+ &inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
+};
+#endif
+
+TXFM_2D_FLIP_CFG vp10_get_inv_txfm_cfg(int tx_type, int tx_size) {
+ TXFM_2D_FLIP_CFG cfg;
+ set_flip_cfg(tx_type, &cfg);
+ cfg.cfg = inv_txfm_cfg_ls[tx_type][tx_size];
+ return cfg;
+}
+
+TXFM_2D_FLIP_CFG vp10_get_inv_txfm_64x64_cfg(int tx_type) {
+ TXFM_2D_FLIP_CFG cfg = { 0, 0, NULL };
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg.cfg = &inv_txfm_2d_cfg_dct_dct_64;
+ set_flip_cfg(tx_type, &cfg);
+ break;
+ default: assert(0);
+ }
+ return cfg;
+}
+
+static INLINE void inv_txfm2d_add_c(const int32_t *input, int16_t *output,
+ int stride, TXFM_2D_FLIP_CFG *cfg,
+ int32_t *txfm_buf) {
+ const int txfm_size = cfg->cfg->txfm_size;
+ const int8_t *shift = cfg->cfg->shift;
+ const int8_t *stage_range_col = cfg->cfg->stage_range_col;
+ const int8_t *stage_range_row = cfg->cfg->stage_range_row;
+ const int8_t *cos_bit_col = cfg->cfg->cos_bit_col;
+ const int8_t *cos_bit_row = cfg->cfg->cos_bit_row;
+ const TxfmFunc txfm_func_col = inv_txfm_type_to_func(cfg->cfg->txfm_type_col);
+ const TxfmFunc txfm_func_row = inv_txfm_type_to_func(cfg->cfg->txfm_type_row);
+
+ // txfm_buf's length is txfm_size * txfm_size + 2 * txfm_size
+ // it is used for intermediate data buffering
+ int32_t *temp_in = txfm_buf;
+ int32_t *temp_out = temp_in + txfm_size;
+ int32_t *buf = temp_out + txfm_size;
+ int32_t *buf_ptr = buf;
+ int c, r;
+
+ // Rows
+ for (r = 0; r < txfm_size; ++r) {
+ txfm_func_row(input, buf_ptr, cos_bit_row, stage_range_row);
+ round_shift_array(buf_ptr, txfm_size, -shift[0]);
+ input += txfm_size;
+ buf_ptr += txfm_size;
+ }
+
+ // Columns
+ for (c = 0; c < txfm_size; ++c) {
+ if (cfg->lr_flip == 0) {
+ for (r = 0; r < txfm_size; ++r) temp_in[r] = buf[r * txfm_size + c];
+ } else {
+ // flip left right
+ for (r = 0; r < txfm_size; ++r)
+ temp_in[r] = buf[r * txfm_size + (txfm_size - c - 1)];
+ }
+ txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
+ round_shift_array(temp_out, txfm_size, -shift[1]);
+ if (cfg->ud_flip == 0) {
+ for (r = 0; r < txfm_size; ++r) output[r * stride + c] += temp_out[r];
+ } else {
+ // flip upside down
+ for (r = 0; r < txfm_size; ++r)
+ output[r * stride + c] += temp_out[txfm_size - r - 1];
+ }
+ }
+}
+
+void vp10_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
+ int stride, int tx_type, int bd) {
+ int txfm_buf[4 * 4 + 4 + 4];
+ // output contains the prediction signal which is always positive and smaller
+ // than (1 << bd) - 1
+ // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+ // int16_t*
+ TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_4X4);
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
+ clamp_block((int16_t *)output, 4, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output,
+ int stride, int tx_type, int bd) {
+ int txfm_buf[8 * 8 + 8 + 8];
+ // output contains the prediction signal which is always positive and smaller
+ // than (1 << bd) - 1
+ // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+ // int16_t*
+ TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_8X8);
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
+ clamp_block((int16_t *)output, 8, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_16x16_c(const int32_t *input, uint16_t *output,
+ int stride, int tx_type, int bd) {
+ int txfm_buf[16 * 16 + 16 + 16];
+ // output contains the prediction signal which is always positive and smaller
+ // than (1 << bd) - 1
+ // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+ // int16_t*
+ TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_16X16);
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
+ clamp_block((int16_t *)output, 16, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output,
+ int stride, int tx_type, int bd) {
+ int txfm_buf[32 * 32 + 32 + 32];
+ // output contains the prediction signal which is always positive and smaller
+ // than (1 << bd) - 1
+ // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+ // int16_t*
+ TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_cfg(tx_type, TX_32X32);
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
+ clamp_block((int16_t *)output, 32, stride, 0, (1 << bd) - 1);
+}
+
+void vp10_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output,
+ int stride, int tx_type, int bd) {
+ int txfm_buf[64 * 64 + 64 + 64];
+ // output contains the prediction signal which is always positive and smaller
+ // than (1 << bd) - 1
+ // since bd < 16-1, therefore we can treat the uint16_t* output buffer as an
+ // int16_t*
+ TXFM_2D_FLIP_CFG cfg = vp10_get_inv_txfm_64x64_cfg(tx_type);
+ inv_txfm2d_add_c(input, (int16_t *)output, stride, &cfg, txfm_buf);
+ clamp_block((int16_t *)output, 64, stride, 0, (1 << bd) - 1);
+}
diff --git a/av1/common/vp10_inv_txfm2d_cfg.h b/av1/common/vp10_inv_txfm2d_cfg.h
new file mode 100644
index 0000000..9bfa420
--- /dev/null
+++ b/av1/common/vp10_inv_txfm2d_cfg.h
@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_INV_TXFM2D_CFG_H_
+#define VP10_INV_TXFM2D_CFG_H_
+#include "av1/common/vp10_inv_txfm1d.h"
+// ---------------- config inv_dct_dct_4 ----------------
+static const int8_t inv_shift_dct_dct_4[2] = { 0, -4 };
+static const int8_t inv_stage_range_col_dct_dct_4[4] = { 18, 18, 17, 17 };
+static const int8_t inv_stage_range_row_dct_dct_4[4] = { 18, 18, 18, 18 };
+static const int8_t inv_cos_bit_col_dct_dct_4[4] = { 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_dct_4[4] = { 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_4 = {
+ 4, // .txfm_size
+ 4, // .stage_num_col
+ 4, // .stage_num_row
+ // 0, // .log_scale
+ inv_shift_dct_dct_4, // .shift
+ inv_stage_range_col_dct_dct_4, // .stage_range_col
+ inv_stage_range_row_dct_dct_4, // .stage_range_row
+ inv_cos_bit_col_dct_dct_4, // .cos_bit_col
+ inv_cos_bit_row_dct_dct_4, // .cos_bit_row
+ TXFM_TYPE_DCT4, // .txfm_type_col
+ TXFM_TYPE_DCT4
+}; // .txfm_type_row
+
+// ---------------- config inv_dct_dct_8 ----------------
+static const int8_t inv_shift_dct_dct_8[2] = { 0, -5 };
+static const int8_t inv_stage_range_col_dct_dct_8[6] = {
+ 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_dct_dct_8[6] = {
+ 19, 19, 19, 19, 19, 19
+};
+static const int8_t inv_cos_bit_col_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_8 = {
+ 8, // .txfm_size
+ 6, // .stage_num_col
+ 6, // .stage_num_row
+ // 0, // .log_scale
+ inv_shift_dct_dct_8, // .shift
+ inv_stage_range_col_dct_dct_8, // .stage_range_col
+ inv_stage_range_row_dct_dct_8, // .stage_range_row
+ inv_cos_bit_col_dct_dct_8, // .cos_bit_col
+ inv_cos_bit_row_dct_dct_8, // .cos_bit_row
+ TXFM_TYPE_DCT8, // .txfm_type_col
+ TXFM_TYPE_DCT8
+}; // .txfm_type_row
+
+// ---------------- config inv_dct_dct_16 ----------------
+static const int8_t inv_shift_dct_dct_16[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_dct_dct_16[8] = { 19, 19, 19, 19,
+ 19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_dct_dct_16[8] = { 20, 20, 20, 20,
+ 20, 20, 20, 20 };
+static const int8_t inv_cos_bit_col_dct_dct_16[8] = { 13, 13, 13, 13,
+ 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_dct_16[8] = { 12, 12, 12, 12,
+ 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_16 = {
+ 16, // .txfm_size
+ 8, // .stage_num_col
+ 8, // .stage_num_row
+ // 0, // .log_scale
+ inv_shift_dct_dct_16, // .shift
+ inv_stage_range_col_dct_dct_16, // .stage_range_col
+ inv_stage_range_row_dct_dct_16, // .stage_range_row
+ inv_cos_bit_col_dct_dct_16, // .cos_bit_col
+ inv_cos_bit_row_dct_dct_16, // .cos_bit_row
+ TXFM_TYPE_DCT16, // .txfm_type_col
+ TXFM_TYPE_DCT16
+}; // .txfm_type_row
+
+// ---------------- config inv_dct_dct_32 ----------------
+static const int8_t inv_shift_dct_dct_32[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_dct_dct_32[10] = { 19, 19, 19, 19, 19,
+ 19, 19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_dct_dct_32[10] = { 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20 };
+static const int8_t inv_cos_bit_col_dct_dct_32[10] = { 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_dct_32[10] = { 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_32 = {
+ 32, // .txfm_size
+ 10, // .stage_num_col
+ 10, // .stage_num_row
+ // 1, // .log_scale
+ inv_shift_dct_dct_32, // .shift
+ inv_stage_range_col_dct_dct_32, // .stage_range_col
+ inv_stage_range_row_dct_dct_32, // .stage_range_row
+ inv_cos_bit_col_dct_dct_32, // .cos_bit_col
+ inv_cos_bit_row_dct_dct_32, // .cos_bit_row
+ TXFM_TYPE_DCT32, // .txfm_type_col
+ TXFM_TYPE_DCT32
+}; // .txfm_type_row
+
+// ---------------- config inv_dct_dct_64 ----------------
+static const int8_t inv_shift_dct_dct_64[2] = { -1, -7 };
+static const int8_t inv_stage_range_col_dct_dct_64[12] = {
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_dct_dct_64[12] = {
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+static const int8_t inv_cos_bit_col_dct_dct_64[12] = { 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_dct_64[12] = { 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_dct_64 = {
+ 64, // .txfm_size
+ 12, // .stage_num_col
+ 12, // .stage_num_row
+ inv_shift_dct_dct_64, // .shift
+ inv_stage_range_col_dct_dct_64, // .stage_range_col
+ inv_stage_range_row_dct_dct_64, // .stage_range_row
+ inv_cos_bit_col_dct_dct_64, // .cos_bit_col
+ inv_cos_bit_row_dct_dct_64, // .cos_bit_row
+ TXFM_TYPE_DCT64, // .txfm_type_col
+ TXFM_TYPE_DCT64
+}; // .txfm_type_row
+
+// ---------------- config inv_dct_adst_4 ----------------
+static const int8_t inv_shift_dct_adst_4[2] = { 0, -4 };
+static const int8_t inv_stage_range_col_dct_adst_4[4] = { 18, 18, 17, 17 };
+static const int8_t inv_stage_range_row_dct_adst_4[6] = {
+ 18, 18, 18, 18, 18, 18
+};
+static const int8_t inv_cos_bit_col_dct_adst_4[4] = { 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_4 = {
+ 4, // .txfm_size
+ 4, // .stage_num_col
+ 6, // .stage_num_row
+ // 0, // .log_scale
+ inv_shift_dct_adst_4, // .shift
+ inv_stage_range_col_dct_adst_4, // .stage_range_col
+ inv_stage_range_row_dct_adst_4, // .stage_range_row
+ inv_cos_bit_col_dct_adst_4, // .cos_bit_col
+ inv_cos_bit_row_dct_adst_4, // .cos_bit_row
+ TXFM_TYPE_DCT4, // .txfm_type_col
+ TXFM_TYPE_ADST4
+}; // .txfm_type_row
+
+// ---------------- config inv_dct_adst_8 ----------------
+static const int8_t inv_shift_dct_adst_8[2] = { 0, -5 };
+static const int8_t inv_stage_range_col_dct_adst_8[6] = {
+ 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_dct_adst_8[8] = { 19, 19, 19, 19,
+ 19, 19, 19, 19 };
+static const int8_t inv_cos_bit_col_dct_adst_8[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_adst_8[8] = { 13, 13, 13, 13,
+ 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_8 = {
+ 8, // .txfm_size
+ 6, // .stage_num_col
+ 8, // .stage_num_row
+ // 0, // .log_scale
+ inv_shift_dct_adst_8, // .shift
+ inv_stage_range_col_dct_adst_8, // .stage_range_col
+ inv_stage_range_row_dct_adst_8, // .stage_range_row
+ inv_cos_bit_col_dct_adst_8, // .cos_bit_col
+ inv_cos_bit_row_dct_adst_8, // .cos_bit_row
+ TXFM_TYPE_DCT8, // .txfm_type_col
+ TXFM_TYPE_ADST8
+}; // .txfm_type_row
+
+// ---------------- config inv_dct_adst_16 ----------------
+static const int8_t inv_shift_dct_adst_16[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_dct_adst_16[8] = { 19, 19, 19, 19,
+ 19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_dct_adst_16[10] = {
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+static const int8_t inv_cos_bit_col_dct_adst_16[8] = { 13, 13, 13, 13,
+ 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_adst_16[10] = { 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_16 = {
+ 16, // .txfm_size
+ 8, // .stage_num_col
+ 10, // .stage_num_row
+ // 0, // .log_scale
+ inv_shift_dct_adst_16, // .shift
+ inv_stage_range_col_dct_adst_16, // .stage_range_col
+ inv_stage_range_row_dct_adst_16, // .stage_range_row
+ inv_cos_bit_col_dct_adst_16, // .cos_bit_col
+ inv_cos_bit_row_dct_adst_16, // .cos_bit_row
+ TXFM_TYPE_DCT16, // .txfm_type_col
+ TXFM_TYPE_ADST16
+}; // .txfm_type_row
+
+// ---------------- config inv_dct_adst_32 ----------------
+static const int8_t inv_shift_dct_adst_32[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_dct_adst_32[10] = {
+ 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_dct_adst_32[12] = {
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+static const int8_t inv_cos_bit_col_dct_adst_32[10] = { 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_dct_adst_32[12] = {
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
+};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_dct_adst_32 = {
+ 32, // .txfm_size
+ 10, // .stage_num_col
+ 12, // .stage_num_row
+ // 1, // .log_scale
+ inv_shift_dct_adst_32, // .shift
+ inv_stage_range_col_dct_adst_32, // .stage_range_col
+ inv_stage_range_row_dct_adst_32, // .stage_range_row
+ inv_cos_bit_col_dct_adst_32, // .cos_bit_col
+ inv_cos_bit_row_dct_adst_32, // .cos_bit_row
+ TXFM_TYPE_DCT32, // .txfm_type_col
+ TXFM_TYPE_ADST32
+}; // .txfm_type_row
+
+// ---------------- config inv_adst_adst_4 ----------------
+static const int8_t inv_shift_adst_adst_4[2] = { 0, -4 };
+static const int8_t inv_stage_range_col_adst_adst_4[6] = { 18, 18, 18,
+ 18, 17, 17 };
+static const int8_t inv_stage_range_row_adst_adst_4[6] = { 18, 18, 18,
+ 18, 18, 18 };
+static const int8_t inv_cos_bit_col_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_adst_4[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_4 = {
+ 4, // .txfm_size
+ 6, // .stage_num_col
+ 6, // .stage_num_row
+ // 0, // .log_scale
+ inv_shift_adst_adst_4, // .shift
+ inv_stage_range_col_adst_adst_4, // .stage_range_col
+ inv_stage_range_row_adst_adst_4, // .stage_range_row
+ inv_cos_bit_col_adst_adst_4, // .cos_bit_col
+ inv_cos_bit_row_adst_adst_4, // .cos_bit_row
+ TXFM_TYPE_ADST4, // .txfm_type_col
+ TXFM_TYPE_ADST4
+}; // .txfm_type_row
+
+// ---------------- config inv_adst_adst_8 ----------------
+static const int8_t inv_shift_adst_adst_8[2] = { 0, -5 };
+static const int8_t inv_stage_range_col_adst_adst_8[8] = { 19, 19, 19, 19,
+ 19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_adst_adst_8[8] = { 19, 19, 19, 19,
+ 19, 19, 19, 19 };
+static const int8_t inv_cos_bit_col_adst_adst_8[8] = { 13, 13, 13, 13,
+ 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_adst_8[8] = { 13, 13, 13, 13,
+ 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_8 = {
+ 8, // .txfm_size
+ 8, // .stage_num_col
+ 8, // .stage_num_row
+ // 0, // .log_scale
+ inv_shift_adst_adst_8, // .shift
+ inv_stage_range_col_adst_adst_8, // .stage_range_col
+ inv_stage_range_row_adst_adst_8, // .stage_range_row
+ inv_cos_bit_col_adst_adst_8, // .cos_bit_col
+ inv_cos_bit_row_adst_adst_8, // .cos_bit_row
+ TXFM_TYPE_ADST8, // .txfm_type_col
+ TXFM_TYPE_ADST8
+}; // .txfm_type_row
+
+// ---------------- config inv_adst_adst_16 ----------------
+static const int8_t inv_shift_adst_adst_16[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_adst_adst_16[10] = {
+ 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_adst_adst_16[10] = {
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+static const int8_t inv_cos_bit_col_adst_adst_16[10] = { 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_adst_16[10] = { 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_16 = {
+ 16, // .txfm_size
+ 10, // .stage_num_col
+ 10, // .stage_num_row
+ // 0, // .log_scale
+ inv_shift_adst_adst_16, // .shift
+ inv_stage_range_col_adst_adst_16, // .stage_range_col
+ inv_stage_range_row_adst_adst_16, // .stage_range_row
+ inv_cos_bit_col_adst_adst_16, // .cos_bit_col
+ inv_cos_bit_row_adst_adst_16, // .cos_bit_row
+ TXFM_TYPE_ADST16, // .txfm_type_col
+ TXFM_TYPE_ADST16
+}; // .txfm_type_row
+
+// ---------------- config inv_adst_adst_32 ----------------
+static const int8_t inv_shift_adst_adst_32[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_adst_adst_32[12] = {
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_adst_adst_32[12] = {
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+static const int8_t inv_cos_bit_col_adst_adst_32[12] = {
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
+};
+static const int8_t inv_cos_bit_row_adst_adst_32[12] = {
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
+};
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_adst_32 = {
+ 32, // .txfm_size
+ 12, // .stage_num_col
+ 12, // .stage_num_row
+ // 1, // .log_scale
+ inv_shift_adst_adst_32, // .shift
+ inv_stage_range_col_adst_adst_32, // .stage_range_col
+ inv_stage_range_row_adst_adst_32, // .stage_range_row
+ inv_cos_bit_col_adst_adst_32, // .cos_bit_col
+ inv_cos_bit_row_adst_adst_32, // .cos_bit_row
+ TXFM_TYPE_ADST32, // .txfm_type_col
+ TXFM_TYPE_ADST32
+}; // .txfm_type_row
+
+// ---------------- config inv_adst_dct_4 ----------------
+static const int8_t inv_shift_adst_dct_4[2] = { 0, -4 };
+static const int8_t inv_stage_range_col_adst_dct_4[6] = {
+ 18, 18, 18, 18, 17, 17
+};
+static const int8_t inv_stage_range_row_adst_dct_4[4] = { 18, 18, 18, 18 };
+static const int8_t inv_cos_bit_col_adst_dct_4[6] = { 13, 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_dct_4[4] = { 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_4 = {
+ 4, // .txfm_size
+ 6, // .stage_num_col
+ 4, // .stage_num_row
+ // 0, // .log_scale
+ inv_shift_adst_dct_4, // .shift
+ inv_stage_range_col_adst_dct_4, // .stage_range_col
+ inv_stage_range_row_adst_dct_4, // .stage_range_row
+ inv_cos_bit_col_adst_dct_4, // .cos_bit_col
+ inv_cos_bit_row_adst_dct_4, // .cos_bit_row
+ TXFM_TYPE_ADST4, // .txfm_type_col
+ TXFM_TYPE_DCT4
+}; // .txfm_type_row
+
+// ---------------- config inv_adst_dct_8 ----------------
+static const int8_t inv_shift_adst_dct_8[2] = { 0, -5 };
+static const int8_t inv_stage_range_col_adst_dct_8[8] = { 19, 19, 19, 19,
+ 19, 19, 18, 18 };
+static const int8_t inv_stage_range_row_adst_dct_8[6] = {
+ 19, 19, 19, 19, 19, 19
+};
+static const int8_t inv_cos_bit_col_adst_dct_8[8] = { 13, 13, 13, 13,
+ 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_dct_8[6] = { 13, 13, 13, 13, 13, 13 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_8 = {
+ 8, // .txfm_size
+ 8, // .stage_num_col
+ 6, // .stage_num_row
+ // 0, // .log_scale
+ inv_shift_adst_dct_8, // .shift
+ inv_stage_range_col_adst_dct_8, // .stage_range_col
+ inv_stage_range_row_adst_dct_8, // .stage_range_row
+ inv_cos_bit_col_adst_dct_8, // .cos_bit_col
+ inv_cos_bit_row_adst_dct_8, // .cos_bit_row
+ TXFM_TYPE_ADST8, // .txfm_type_col
+ TXFM_TYPE_DCT8
+}; // .txfm_type_row
+
+// ---------------- config inv_adst_dct_16 ----------------
+static const int8_t inv_shift_adst_dct_16[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_adst_dct_16[10] = {
+ 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_adst_dct_16[8] = { 20, 20, 20, 20,
+ 20, 20, 20, 20 };
+static const int8_t inv_cos_bit_col_adst_dct_16[10] = { 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13 };
+static const int8_t inv_cos_bit_row_adst_dct_16[8] = { 12, 12, 12, 12,
+ 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_16 = {
+ 16, // .txfm_size
+ 10, // .stage_num_col
+ 8, // .stage_num_row
+ // 0, // .log_scale
+ inv_shift_adst_dct_16, // .shift
+ inv_stage_range_col_adst_dct_16, // .stage_range_col
+ inv_stage_range_row_adst_dct_16, // .stage_range_row
+ inv_cos_bit_col_adst_dct_16, // .cos_bit_col
+ inv_cos_bit_row_adst_dct_16, // .cos_bit_row
+ TXFM_TYPE_ADST16, // .txfm_type_col
+ TXFM_TYPE_DCT16
+}; // .txfm_type_row
+
+// ---------------- config inv_adst_dct_32 ----------------
+static const int8_t inv_shift_adst_dct_32[2] = { -1, -5 };
+static const int8_t inv_stage_range_col_adst_dct_32[12] = {
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18
+};
+static const int8_t inv_stage_range_row_adst_dct_32[10] = {
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+};
+static const int8_t inv_cos_bit_col_adst_dct_32[12] = {
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
+};
+static const int8_t inv_cos_bit_row_adst_dct_32[10] = { 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12 };
+
+static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_32 = {
+ 32, // .txfm_size
+ 12, // .stage_num_col
+ 10, // .stage_num_row
+ // 1, // .log_scale
+ inv_shift_adst_dct_32, // .shift
+ inv_stage_range_col_adst_dct_32, // .stage_range_col
+ inv_stage_range_row_adst_dct_32, // .stage_range_row
+ inv_cos_bit_col_adst_dct_32, // .cos_bit_col
+ inv_cos_bit_row_adst_dct_32, // .cos_bit_row
+ TXFM_TYPE_ADST32, // .txfm_type_col
+ TXFM_TYPE_DCT32
+}; // .txfm_type_row
+
+#endif // VP10_INV_TXFM2D_CFG_H_
diff --git a/av1/common/vp10_rtcd.c b/av1/common/vp10_rtcd.c
new file mode 100644
index 0000000..7fce6b9
--- /dev/null
+++ b/av1/common/vp10_rtcd.c
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_config.h"
+#define RTCD_C
+#include "./vp10_rtcd.h"
+#include "aom_ports/vpx_once.h"
+
+void vp10_rtcd() {
+ // TODO(JBB): Remove this once, by insuring that both the encoder and
+ // decoder setup functions are protected by once();
+ once(setup_rtcd_internal);
+}
diff --git a/av1/common/vp10_rtcd_defs.pl b/av1/common/vp10_rtcd_defs.pl
new file mode 100644
index 0000000..4a16723
--- /dev/null
+++ b/av1/common/vp10_rtcd_defs.pl
@@ -0,0 +1,912 @@
+sub vp10_common_forward_decls() {
+print <<EOF
+/*
+ * VP10
+ */
+
+#include "aom/vpx_integer.h"
+#include "av1/common/common.h"
+#include "av1/common/enums.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/filter.h"
+#include "av1/common/vp10_txfm.h"
+
+struct macroblockd;
+
+/* Encoder forward decls */
+struct macroblock;
+struct vpx_variance_vtable;
+struct search_site_config;
+struct mv;
+union int_mv;
+struct yv12_buffer_config;
+EOF
+}
+forward_decls qw/vp10_common_forward_decls/;
+
+# functions that are 64 bit only.
+$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
+if ($opts{arch} eq "x86_64") {
+ $mmx_x86_64 = 'mmx';
+ $sse2_x86_64 = 'sse2';
+ $ssse3_x86_64 = 'ssse3';
+ $avx_x86_64 = 'avx';
+ $avx2_x86_64 = 'avx2';
+}
+
+#
+# 10/12-tap convolution filters
+#
+add_proto qw/void vp10_convolve_horiz/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg";
+specialize qw/vp10_convolve_horiz ssse3/;
+
+add_proto qw/void vp10_convolve_vert/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg";
+specialize qw/vp10_convolve_vert ssse3/;
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vp10_highbd_convolve_horiz/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
+ specialize qw/vp10_highbd_convolve_horiz sse4_1/;
+ add_proto qw/void vp10_highbd_convolve_vert/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
+ specialize qw/vp10_highbd_convolve_vert sse4_1/;
+}
+
+#
+# dct
+#
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ # Note as optimized versions of these functions are added we need to add a check to ensure
+ # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
+ if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+ add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht4x4_16_add/;
+
+ add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht4x8_32_add/;
+
+ add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x4_32_add/;
+
+ add_proto qw/void vp10_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x16_128_add/;
+
+ add_proto qw/void vp10_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht16x8_128_add/;
+
+ add_proto qw/void vp10_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht16x32_512_add/;
+
+ add_proto qw/void vp10_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht32x16_512_add/;
+
+ add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x8_64_add/;
+
+ add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/vp10_iht16x16_256_add/;
+
+ add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct4x4/;
+
+ add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct4x4_1/;
+
+ add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct8x8/;
+
+ add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct8x8_1/;
+
+ add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct16x16/;
+
+ add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct16x16_1/;
+
+ add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct32x32/;
+
+ add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct32x32_rd/;
+
+ add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct32x32_1/;
+
+ add_proto qw/void vp10_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct4x4/;
+
+ add_proto qw/void vp10_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct8x8/;
+
+ add_proto qw/void vp10_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct8x8_1/;
+
+ add_proto qw/void vp10_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct16x16/;
+
+ add_proto qw/void vp10_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct16x16_1/;
+
+ add_proto qw/void vp10_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct32x32/;
+
+ add_proto qw/void vp10_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct32x32_rd/;
+
+ add_proto qw/void vp10_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct32x32_1/;
+ } else {
+ add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht4x4_16_add sse2/;
+
+ add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht4x8_32_add/;
+
+ add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x4_32_add/;
+
+ add_proto qw/void vp10_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x16_128_add/;
+
+ add_proto qw/void vp10_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht16x8_128_add/;
+
+ add_proto qw/void vp10_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht16x32_512_add/;
+
+ add_proto qw/void vp10_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht32x16_512_add/;
+
+ add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x8_64_add sse2/;
+
+ add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/vp10_iht16x16_256_add sse2/;
+
+ add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct4x4 sse2/;
+
+ add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct4x4_1 sse2/;
+
+ add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct8x8 sse2/;
+
+ add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct8x8_1 sse2/;
+
+ add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct16x16 sse2/;
+
+ add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct16x16_1 sse2/;
+
+ add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct32x32 sse2/;
+
+ add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct32x32_rd sse2/;
+
+ add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct32x32_1 sse2/;
+
+ add_proto qw/void vp10_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct4x4 sse2/;
+
+ add_proto qw/void vp10_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct8x8 sse2/;
+
+ add_proto qw/void vp10_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct8x8_1/;
+
+ add_proto qw/void vp10_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct16x16 sse2/;
+
+ add_proto qw/void vp10_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct16x16_1/;
+
+ add_proto qw/void vp10_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct32x32 sse2/;
+
+ add_proto qw/void vp10_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct32x32_rd sse2/;
+
+ add_proto qw/void vp10_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fdct32x32_1/;
+ }
+} else {
+ # Force C versions if CONFIG_EMULATE_HARDWARE is 1
+ if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+ add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht4x4_16_add/;
+
+ add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht4x8_32_add/;
+
+ add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x4_32_add/;
+
+ add_proto qw/void vp10_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x16_128_add/;
+
+ add_proto qw/void vp10_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht16x8_128_add/;
+
+ add_proto qw/void vp10_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht16x32_512_add/;
+
+ add_proto qw/void vp10_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht32x16_512_add/;
+
+ add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x8_64_add/;
+
+ add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/vp10_iht16x16_256_add/;
+
+ add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct4x4/;
+
+ add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct4x4_1/;
+
+ add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct8x8/;
+
+ add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct8x8_1/;
+
+ add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct16x16/;
+
+ add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct16x16_1/;
+
+ add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct32x32/;
+
+ add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct32x32_rd/;
+
+ add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct32x32_1/;
+ } else {
+ add_proto qw/void vp10_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht4x4_16_add sse2 neon dspr2/;
+
+ add_proto qw/void vp10_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht4x8_32_add/;
+
+ add_proto qw/void vp10_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x4_32_add/;
+
+ add_proto qw/void vp10_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x16_128_add/;
+
+ add_proto qw/void vp10_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht16x8_128_add/;
+
+ add_proto qw/void vp10_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht16x32_512_add/;
+
+ add_proto qw/void vp10_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht32x16_512_add/;
+
+ add_proto qw/void vp10_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp10_iht8x8_64_add sse2 neon dspr2/;
+
+ add_proto qw/void vp10_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/vp10_iht16x16_256_add sse2 dspr2/;
+
+ if (vpx_config("CONFIG_EXT_TX") ne "yes") {
+ specialize qw/vp10_iht4x4_16_add msa/;
+ specialize qw/vp10_iht8x8_64_add msa/;
+ specialize qw/vp10_iht16x16_256_add msa/;
+ }
+
+ add_proto qw/void vp10_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct4x4 sse2/;
+
+ add_proto qw/void vp10_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct4x4_1 sse2/;
+
+ add_proto qw/void vp10_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct8x8 sse2/;
+
+ add_proto qw/void vp10_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct8x8_1 sse2/;
+
+ add_proto qw/void vp10_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct16x16 sse2/;
+
+ add_proto qw/void vp10_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct16x16_1 sse2/;
+
+ add_proto qw/void vp10_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct32x32 sse2/;
+
+ add_proto qw/void vp10_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct32x32_rd sse2/;
+
+ add_proto qw/void vp10_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fdct32x32_1 sse2/;
+ }
+}
+
+if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
+ add_proto qw/void quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+ specialize qw/quantize_nuq/;
+
+ add_proto qw/void quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+ specialize qw/quantize_fp_nuq/;
+
+ add_proto qw/void quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+ specialize qw/quantize_32x32_nuq/;
+
+ add_proto qw/void quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+ specialize qw/quantize_32x32_fp_nuq/;
+}
+
+# EXT_INTRA predictor functions
+if (vpx_config("CONFIG_EXT_INTRA") eq "yes") {
+ add_proto qw/void vp10_dc_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+ specialize qw/vp10_dc_filter_predictor sse4_1/;
+ add_proto qw/void vp10_v_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+ specialize qw/vp10_v_filter_predictor sse4_1/;
+ add_proto qw/void vp10_h_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+ specialize qw/vp10_h_filter_predictor sse4_1/;
+ add_proto qw/void vp10_d45_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+ specialize qw/vp10_d45_filter_predictor sse4_1/;
+ add_proto qw/void vp10_d135_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+ specialize qw/vp10_d135_filter_predictor sse4_1/;
+ add_proto qw/void vp10_d117_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+ specialize qw/vp10_d117_filter_predictor sse4_1/;
+ add_proto qw/void vp10_d153_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+ specialize qw/vp10_d153_filter_predictor sse4_1/;
+ add_proto qw/void vp10_d207_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+ specialize qw/vp10_d207_filter_predictor sse4_1/;
+ add_proto qw/void vp10_d63_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+ specialize qw/vp10_d63_filter_predictor sse4_1/;
+ add_proto qw/void vp10_tm_filter_predictor/, "uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left";
+ specialize qw/vp10_tm_filter_predictor sse4_1/;
+ # High bitdepth functions
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vp10_highbd_dc_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vp10_highbd_dc_filter_predictor sse4_1/;
+ add_proto qw/void vp10_highbd_v_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vp10_highbd_v_filter_predictor sse4_1/;
+ add_proto qw/void vp10_highbd_h_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vp10_highbd_h_filter_predictor sse4_1/;
+ add_proto qw/void vp10_highbd_d45_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vp10_highbd_d45_filter_predictor sse4_1/;
+ add_proto qw/void vp10_highbd_d135_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vp10_highbd_d135_filter_predictor sse4_1/;
+ add_proto qw/void vp10_highbd_d117_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vp10_highbd_d117_filter_predictor sse4_1/;
+ add_proto qw/void vp10_highbd_d153_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vp10_highbd_d153_filter_predictor sse4_1/;
+ add_proto qw/void vp10_highbd_d207_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vp10_highbd_d207_filter_predictor sse4_1/;
+ add_proto qw/void vp10_highbd_d63_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vp10_highbd_d63_filter_predictor sse4_1/;
+ add_proto qw/void vp10_highbd_tm_filter_predictor/, "uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vp10_highbd_tm_filter_predictor sse4_1/;
+ }
+}
+
+# High bitdepth functions
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ #
+ # Sub Pixel Filters
+ #
+ add_proto qw/void vp10_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ specialize qw/vp10_highbd_convolve_copy/;
+
+ add_proto qw/void vp10_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ specialize qw/vp10_highbd_convolve_avg/;
+
+ add_proto qw/void vp10_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ specialize qw/vp10_highbd_convolve8/, "$sse2_x86_64";
+
+ add_proto qw/void vp10_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ specialize qw/vp10_highbd_convolve8_horiz/, "$sse2_x86_64";
+
+ add_proto qw/void vp10_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ specialize qw/vp10_highbd_convolve8_vert/, "$sse2_x86_64";
+
+ add_proto qw/void vp10_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ specialize qw/vp10_highbd_convolve8_avg/, "$sse2_x86_64";
+
+ add_proto qw/void vp10_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ specialize qw/vp10_highbd_convolve8_avg_horiz/, "$sse2_x86_64";
+
+ add_proto qw/void vp10_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ specialize qw/vp10_highbd_convolve8_avg_vert/, "$sse2_x86_64";
+
+ #
+ # dct
+ #
+ # Note as optimized versions of these functions are added we need to add a check to ensure
+ # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
+ add_proto qw/void vp10_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+ specialize qw/vp10_highbd_iht4x4_16_add/;
+
+ add_proto qw/void vp10_highbd_iht4x8_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+ specialize qw/vp10_highbd_iht4x8_32_add/;
+
+ add_proto qw/void vp10_highbd_iht8x4_32_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+ specialize qw/vp10_highbd_iht8x4_32_add/;
+
+ add_proto qw/void vp10_highbd_iht8x16_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+ specialize qw/vp10_highbd_iht8x16_128_add/;
+
+ add_proto qw/void vp10_highbd_iht16x8_128_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+ specialize qw/vp10_highbd_iht16x8_128_add/;
+
+ add_proto qw/void vp10_highbd_iht16x32_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+ specialize qw/vp10_highbd_iht16x32_512_add/;
+
+ add_proto qw/void vp10_highbd_iht32x16_512_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+ specialize qw/vp10_highbd_iht32x16_512_add/;
+
+ add_proto qw/void vp10_highbd_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
+ specialize qw/vp10_highbd_iht8x8_64_add/;
+
+ add_proto qw/void vp10_highbd_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd";
+ specialize qw/vp10_highbd_iht16x16_256_add/;
+}
+
+#
+# Encoder functions below this point.
+#
+if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") {
+
+# ENCODEMB INVOKE
+
+if (vpx_config("CONFIG_AOM_QM") eq "yes") {
+ if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
+ # the transform coefficients are held in 32-bit
+ # values, so the assembler code for vp10_block_error can no longer be used.
+ add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+ specialize qw/vp10_block_error/;
+
+ add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+
+ add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+
+ add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+ specialize qw/vp10_fdct8x8_quant/;
+ } else {
+ add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+ specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
+
+ add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
+ specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
+
+ add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+
+ add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+
+ add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
+ }
+} else {
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ # the transform coefficients are held in 32-bit
+ # values, so the assembler code for vp10_block_error can no longer be used.
+ add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+ specialize qw/vp10_block_error/;
+
+ add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_quantize_fp/;
+
+ add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_quantize_fp_32x32/;
+
+ add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_fdct8x8_quant/;
+ } else {
+ add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+ specialize qw/vp10_block_error sse2 avx2 msa/;
+
+ add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
+ specialize qw/vp10_block_error_fp neon sse2/;
+
+ add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_quantize_fp neon sse2/, "$ssse3_x86_64";
+
+ add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_quantize_fp_32x32/, "$ssse3_x86_64";
+
+ add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/;
+ }
+
+}
+
+# fdct functions
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht4x4 sse2/;
+
+ add_proto qw/void vp10_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht4x8/;
+
+ add_proto qw/void vp10_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht8x4/;
+
+ add_proto qw/void vp10_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht8x16/;
+
+ add_proto qw/void vp10_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht16x8/;
+
+ add_proto qw/void vp10_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht16x32/;
+
+ add_proto qw/void vp10_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht32x16/;
+
+ add_proto qw/void vp10_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht8x8 sse2/;
+
+ add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht16x16 sse2/;
+
+ add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht32x32/;
+
+ add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fwht4x4/;
+} else {
+ add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht4x4 sse2/;
+
+ add_proto qw/void vp10_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht4x8/;
+
+ add_proto qw/void vp10_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht8x4/;
+
+ add_proto qw/void vp10_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht8x16/;
+
+ add_proto qw/void vp10_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht16x8/;
+
+ add_proto qw/void vp10_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht16x32/;
+
+ add_proto qw/void vp10_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht32x16/;
+
+ add_proto qw/void vp10_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht8x8 sse2/;
+
+ add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht16x16 sse2/;
+
+ if (vpx_config("CONFIG_EXT_TX") ne "yes") {
+ specialize qw/vp10_fht4x4 msa/;
+ specialize qw/vp10_fht8x8 msa/;
+ specialize qw/vp10_fht16x16 msa/;
+ }
+
+ add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht32x32/;
+
+ add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_fwht4x4/;
+}
+
+add_proto qw/void vp10_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type";
+ specialize qw/vp10_fwd_idtx/;
+
+# Inverse transform
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ # Note as optimized versions of these functions are added we need to add a check to ensure
+ # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
+ add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct4x4_1_add/;
+
+ add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct4x4_16_add/;
+
+ add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct8x8_1_add/;
+
+ add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct8x8_64_add/;
+
+ add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct8x8_12_add/;
+
+ add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct16x16_1_add/;
+
+ add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct16x16_256_add/;
+
+ add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct16x16_10_add/;
+
+ add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct32x32_1024_add/;
+
+ add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct32x32_34_add/;
+
+ add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct32x32_1_add/;
+
+ add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_iwht4x4_1_add/;
+
+ add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_iwht4x4_16_add/;
+
+ add_proto qw/void vp10_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct4x4_1_add/;
+
+ add_proto qw/void vp10_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct8x8_1_add/;
+
+ add_proto qw/void vp10_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct16x16_1_add/;
+
+ add_proto qw/void vp10_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct32x32_1024_add/;
+
+ add_proto qw/void vp10_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct32x32_34_add/;
+
+ add_proto qw/void vp10_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct32x32_1_add/;
+
+ add_proto qw/void vp10_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_iwht4x4_1_add/;
+
+ add_proto qw/void vp10_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_iwht4x4_16_add/;
+
+ # Force C versions if CONFIG_EMULATE_HARDWARE is 1
+ if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+ add_proto qw/void vp10_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct4x4_16_add/;
+
+ add_proto qw/void vp10_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct8x8_64_add/;
+
+ add_proto qw/void vp10_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct8x8_10_add/;
+
+ add_proto qw/void vp10_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct16x16_256_add/;
+
+ add_proto qw/void vp10_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct16x16_10_add/;
+ } else {
+ add_proto qw/void vp10_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct4x4_16_add sse2/;
+
+ add_proto qw/void vp10_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct8x8_64_add sse2/;
+
+ add_proto qw/void vp10_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct8x8_10_add sse2/;
+
+ add_proto qw/void vp10_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct16x16_256_add sse2/;
+
+ add_proto qw/void vp10_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vp10_highbd_idct16x16_10_add sse2/;
+ } # CONFIG_EMULATE_HARDWARE
+} else {
+ # Force C versions if CONFIG_EMULATE_HARDWARE is 1
+ if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+ add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct4x4_1_add/;
+
+ add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct4x4_16_add/;
+
+ add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct8x8_1_add/;
+
+ add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct8x8_64_add/;
+
+ add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct8x8_12_add/;
+
+ add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct16x16_1_add/;
+
+ add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct16x16_256_add/;
+
+ add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct16x16_10_add/;
+
+ add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct32x32_1024_add/;
+
+ add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct32x32_34_add/;
+
+ add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct32x32_1_add/;
+
+ add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_iwht4x4_1_add/;
+
+ add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_iwht4x4_16_add/;
+ } else {
+ add_proto qw/void vp10_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct4x4_1_add sse2/;
+
+ add_proto qw/void vp10_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct4x4_16_add sse2/;
+
+ add_proto qw/void vp10_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct8x8_1_add sse2/;
+
+ add_proto qw/void vp10_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct8x8_64_add sse2/;
+
+ add_proto qw/void vp10_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct8x8_12_add sse2/;
+
+ add_proto qw/void vp10_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct16x16_1_add sse2/;
+
+ add_proto qw/void vp10_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct16x16_256_add sse2/;
+
+ add_proto qw/void vp10_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct16x16_10_add sse2/;
+
+ add_proto qw/void vp10_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct32x32_1024_add sse2/;
+
+ add_proto qw/void vp10_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct32x32_34_add sse2/;
+
+ add_proto qw/void vp10_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_idct32x32_1_add sse2/;
+
+ add_proto qw/void vp10_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_iwht4x4_1_add/;
+
+ add_proto qw/void vp10_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vp10_iwht4x4_16_add/;
+ } # CONFIG_EMULATE_HARDWARE
+} # CONFIG_VP9_HIGHBITDEPTH
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ #fwd txfm
+ add_proto qw/void vp10_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
+ specialize qw/vp10_fwd_txfm2d_4x4 sse4_1/;
+ add_proto qw/void vp10_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
+ specialize qw/vp10_fwd_txfm2d_8x8 sse4_1/;
+ add_proto qw/void vp10_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
+ specialize qw/vp10_fwd_txfm2d_16x16 sse4_1/;
+ add_proto qw/void vp10_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
+ specialize qw/vp10_fwd_txfm2d_32x32 sse4_1/;
+ add_proto qw/void vp10_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
+ specialize qw/vp10_fwd_txfm2d_64x64 sse4_1/;
+
+ #inv txfm
+ add_proto qw/void vp10_inv_txfm2d_add_4x4/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
+ specialize qw/vp10_inv_txfm2d_add_4x4 sse4_1/;
+ add_proto qw/void vp10_inv_txfm2d_add_8x8/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
+ specialize qw/vp10_inv_txfm2d_add_8x8 sse4_1/;
+ add_proto qw/void vp10_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
+ specialize qw/vp10_inv_txfm2d_add_16x16 sse4_1/;
+ add_proto qw/void vp10_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
+ specialize qw/vp10_inv_txfm2d_add_32x32/;
+ add_proto qw/void vp10_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
+ specialize qw/vp10_inv_txfm2d_add_64x64/;
+}
+
+#
+# Motion search
+#
+add_proto qw/int vp10_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vpx_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
+specialize qw/vp10_full_search_sad sse3 sse4_1/;
+$vp10_full_search_sad_sse3=vp10_full_search_sadx3;
+$vp10_full_search_sad_sse4_1=vp10_full_search_sadx8;
+
+add_proto qw/int vp10_diamond_search_sad/, "struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vpx_variance_vtable *fn_ptr, const struct mv *center_mv";
+specialize qw/vp10_diamond_search_sad/;
+
+add_proto qw/int vp10_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vpx_variance_vtable *fn_ptr, const struct mv *center_mv";
+specialize qw/vp10_full_range_search/;
+
+add_proto qw/void vp10_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+specialize qw/vp10_temporal_filter_apply sse2 msa/;
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+
+ # ENCODEMB INVOKE
+ if (vpx_config("CONFIG_NEW_QUANT") eq "yes") {
+ add_proto qw/void highbd_quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+ specialize qw/highbd_quantize_nuq/;
+
+ add_proto qw/void highbd_quantize_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+ specialize qw/highbd_quantize_fp_nuq/;
+
+ add_proto qw/void highbd_quantize_32x32_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+ specialize qw/highbd_quantize_32x32_nuq/;
+
+ add_proto qw/void highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
+ specialize qw/highbd_quantize_32x32_fp_nuq/;
+ }
+
+ add_proto qw/int64_t vp10_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
+ specialize qw/vp10_highbd_block_error sse2/;
+
+ if (vpx_config("CONFIG_AOM_QM") eq "yes") {
+ add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
+
+ add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
+ } else {
+ add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
+ specialize qw/vp10_highbd_quantize_fp sse4_1/;
+
+ add_proto qw/void vp10_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
+ specialize qw/vp10_highbd_quantize_b/;
+ }
+
+ # fdct functions
+ add_proto qw/void vp10_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht4x4 sse4_1/;
+
+ add_proto qw/void vp10_highbd_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht4x8/;
+
+ add_proto qw/void vp10_highbd_fht8x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht8x4/;
+
+ add_proto qw/void vp10_highbd_fht8x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht8x16/;
+
+ add_proto qw/void vp10_highbd_fht16x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht16x8/;
+
+ add_proto qw/void vp10_highbd_fht16x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht16x32/;
+
+ add_proto qw/void vp10_highbd_fht32x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht32x16/;
+
+ add_proto qw/void vp10_highbd_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht8x8/;
+
+ add_proto qw/void vp10_highbd_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht16x16/;
+
+ add_proto qw/void vp10_highbd_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht32x32/;
+
+ add_proto qw/void vp10_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
+ specialize qw/vp10_highbd_fwht4x4/;
+
+ add_proto qw/void vp10_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+ specialize qw/vp10_highbd_temporal_filter_apply/;
+
+}
+# End vp10_high encoder functions
+
+if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
+ add_proto qw/uint64_t vp10_wedge_sse_from_residuals/, "const int16_t *r1, const int16_t *d, const uint8_t *m, int N";
+ specialize qw/vp10_wedge_sse_from_residuals sse2/;
+ add_proto qw/int vp10_wedge_sign_from_residuals/, "const int16_t *ds, const uint8_t *m, int N, int64_t limit";
+ specialize qw/vp10_wedge_sign_from_residuals sse2/;
+ add_proto qw/void vp10_wedge_compute_delta_squares/, "int16_t *d, const int16_t *a, const int16_t *b, int N";
+ specialize qw/vp10_wedge_compute_delta_squares sse2/;
+}
+
+}
+# end encoder functions
+1;
diff --git a/av1/common/vp10_txfm.h b/av1/common/vp10_txfm.h
new file mode 100644
index 0000000..bfeb3ea
--- /dev/null
+++ b/av1/common/vp10_txfm.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VP10_TXFM_H_
+#define VP10_TXFM_H_
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "av1/common/enums.h"
+#include "aom/vpx_integer.h"
+#include "aom_dsp/vpx_dsp_common.h"
+
+static const int cos_bit_min = 10;
+static const int cos_bit_max = 16;
+
+// cospi_arr[i][j] = (int)round(cos(M_PI*j/128) * (1<<(cos_bit_min+i)));
+static const int32_t cospi_arr[7][64] = {
+ { 1024, 1024, 1023, 1021, 1019, 1016, 1013, 1009, 1004, 999, 993, 987, 980,
+ 972, 964, 955, 946, 936, 926, 915, 903, 891, 878, 865, 851, 837, 822, 807,
+ 792, 775, 759, 742, 724, 706, 688, 669, 650, 630, 610, 590, 569, 548, 526,
+ 505, 483, 460, 438, 415, 392, 369, 345, 321, 297, 273, 249, 224, 200, 175,
+ 150, 125, 100, 75, 50, 25 },
+ { 2048, 2047, 2046, 2042, 2038, 2033, 2026, 2018, 2009, 1998, 1987, 1974,
+ 1960, 1945, 1928, 1911, 1892, 1872, 1851, 1829, 1806, 1782, 1757, 1730,
+ 1703, 1674, 1645, 1615, 1583, 1551, 1517, 1483, 1448, 1412, 1375, 1338,
+ 1299, 1260, 1220, 1179, 1138, 1096, 1053, 1009, 965, 921, 876, 830, 784,
+ 737, 690, 642, 595, 546, 498, 449, 400, 350, 301, 251, 201, 151, 100, 50 },
+ { 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, 4017, 3996, 3973, 3948,
+ 3920, 3889, 3857, 3822, 3784, 3745, 3703, 3659, 3612, 3564, 3513, 3461,
+ 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, 2896, 2824, 2751, 2675,
+ 2598, 2520, 2440, 2359, 2276, 2191, 2106, 2019, 1931, 1842, 1751, 1660,
+ 1567, 1474, 1380, 1285, 1189, 1092, 995, 897, 799, 700, 601, 501, 401, 301,
+ 201, 101 },
+ { 8192, 8190, 8182, 8170, 8153, 8130, 8103, 8071, 8035, 7993, 7946, 7895,
+ 7839, 7779, 7713, 7643, 7568, 7489, 7405, 7317, 7225, 7128, 7027, 6921,
+ 6811, 6698, 6580, 6458, 6333, 6203, 6070, 5933, 5793, 5649, 5501, 5351,
+ 5197, 5040, 4880, 4717, 4551, 4383, 4212, 4038, 3862, 3683, 3503, 3320,
+ 3135, 2948, 2760, 2570, 2378, 2185, 1990, 1795, 1598, 1401, 1202, 1003, 803,
+ 603, 402, 201 },
+ { 16384, 16379, 16364, 16340, 16305, 16261, 16207, 16143, 16069, 15986, 15893,
+ 15791, 15679, 15557, 15426, 15286, 15137, 14978, 14811, 14635, 14449, 14256,
+ 14053, 13842, 13623, 13395, 13160, 12916, 12665, 12406, 12140, 11866, 11585,
+ 11297, 11003, 10702, 10394, 10080, 9760, 9434, 9102, 8765, 8423, 8076, 7723,
+ 7366, 7005, 6639, 6270, 5897, 5520, 5139, 4756, 4370, 3981, 3590, 3196,
+ 2801, 2404, 2006, 1606, 1205, 804, 402 },
+ { 32768, 32758, 32729, 32679, 32610, 32522, 32413, 32286, 32138, 31972, 31786,
+ 31581, 31357, 31114, 30853, 30572, 30274, 29957, 29622, 29269, 28899, 28511,
+ 28106, 27684, 27246, 26791, 26320, 25833, 25330, 24812, 24279, 23732, 23170,
+ 22595, 22006, 21403, 20788, 20160, 19520, 18868, 18205, 17531, 16846, 16151,
+ 15447, 14733, 14010, 13279, 12540, 11793, 11039, 10279, 9512, 8740, 7962,
+ 7180, 6393, 5602, 4808, 4011, 3212, 2411, 1608, 804 },
+ { 65536, 65516, 65457, 65358, 65220, 65043, 64827, 64571, 64277, 63944, 63572,
+ 63162, 62714, 62228, 61705, 61145, 60547, 59914, 59244, 58538, 57798, 57022,
+ 56212, 55368, 54491, 53581, 52639, 51665, 50660, 49624, 48559, 47464, 46341,
+ 45190, 44011, 42806, 41576, 40320, 39040, 37736, 36410, 35062, 33692, 32303,
+ 30893, 29466, 28020, 26558, 25080, 23586, 22078, 20557, 19024, 17479, 15924,
+ 14359, 12785, 11204, 9616, 8022, 6424, 4821, 3216, 1608 }
+};
+
+static INLINE int32_t round_shift(int32_t value, int bit) {
+ return (value + (1 << (bit - 1))) >> bit;
+}
+
+static INLINE void round_shift_array(int32_t *arr, int size, int bit) {
+ int i;
+ if (bit == 0) {
+ return;
+ } else {
+ if (bit > 0) {
+ for (i = 0; i < size; i++) {
+ arr[i] = round_shift(arr[i], bit);
+ }
+ } else {
+ for (i = 0; i < size; i++) {
+ arr[i] = arr[i] << (-bit);
+ }
+ }
+ }
+}
+
+static INLINE int32_t half_btf(int32_t w0, int32_t in0, int32_t w1, int32_t in1,
+ int bit) {
+ int32_t result_32 = w0 * in0 + w1 * in1;
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+ int64_t result_64 = (int64_t)w0 * (int64_t)in0 + (int64_t)w1 * (int64_t)in1;
+ if (result_32 != result_64) {
+ printf("%s %d overflow result_32: %d result_64: %" PRId64
+ " w0: %d in0: %d w1: %d in1: "
+ "%d\n",
+ __FILE__, __LINE__, result_32, result_64, w0, in0, w1, in1);
+ assert(0 && "half_btf overflow");
+ }
+#endif
+ return round_shift(result_32, bit);
+}
+
+static INLINE int get_max_bit(int x) {
+ int max_bit = -1;
+ while (x) {
+ x = x >> 1;
+ max_bit++;
+ }
+ return max_bit;
+}
+
+// TODO(angiebird): implement SSE
+static INLINE void clamp_block(int16_t *block, int block_size, int stride,
+ int low, int high) {
+ int i, j;
+ for (i = 0; i < block_size; ++i) {
+ for (j = 0; j < block_size; ++j) {
+ block[i * stride + j] = clamp(block[i * stride + j], low, high);
+ }
+ }
+}
+
+typedef void (*TxfmFunc)(const int32_t *input, int32_t *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+typedef enum TXFM_TYPE {
+ TXFM_TYPE_DCT4,
+ TXFM_TYPE_DCT8,
+ TXFM_TYPE_DCT16,
+ TXFM_TYPE_DCT32,
+ TXFM_TYPE_DCT64,
+ TXFM_TYPE_ADST4,
+ TXFM_TYPE_ADST8,
+ TXFM_TYPE_ADST16,
+ TXFM_TYPE_ADST32,
+} TXFM_TYPE;
+
+typedef struct TXFM_2D_CFG {
+ const int txfm_size;
+ const int stage_num_col;
+ const int stage_num_row;
+
+ const int8_t *shift;
+ const int8_t *stage_range_col;
+ const int8_t *stage_range_row;
+ const int8_t *cos_bit_col;
+ const int8_t *cos_bit_row;
+ const TXFM_TYPE txfm_type_col;
+ const TXFM_TYPE txfm_type_row;
+} TXFM_2D_CFG;
+
+typedef struct TXFM_2D_FLIP_CFG {
+ int ud_flip; // flip upside down
+ int lr_flip; // flip left to right
+ const TXFM_2D_CFG *cfg;
+} TXFM_2D_FLIP_CFG;
+
+static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ cfg->ud_flip = 0;
+ cfg->lr_flip = 0;
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ cfg->ud_flip = 1;
+ cfg->lr_flip = 0;
+ break;
+ case DCT_FLIPADST:
+ cfg->ud_flip = 0;
+ cfg->lr_flip = 1;
+ break;
+ case FLIPADST_FLIPADST:
+ cfg->ud_flip = 1;
+ cfg->lr_flip = 1;
+ break;
+ case ADST_FLIPADST:
+ cfg->ud_flip = 0;
+ cfg->lr_flip = 1;
+ break;
+ case FLIPADST_ADST:
+ cfg->ud_flip = 1;
+ cfg->lr_flip = 0;
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ cfg->ud_flip = 0;
+ cfg->lr_flip = 0;
+ assert(0);
+ }
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+TXFM_2D_FLIP_CFG vp10_get_fwd_txfm_cfg(int tx_type, int tx_size);
+TXFM_2D_FLIP_CFG vp10_get_fwd_txfm_64x64_cfg(int tx_type);
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#endif // VP10_TXFM_H_
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
new file mode 100644
index 0000000..5f76453
--- /dev/null
+++ b/av1/common/warped_motion.c
@@ -0,0 +1,642 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be
+ * found in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <math.h>
+#include <assert.h>
+
+#include "av1/common/warped_motion.h"
+
+static ProjectPointsType get_project_points_type(TransformationType type) {
+ switch (type) {
+ case HOMOGRAPHY: return projectPointsHomography;
+ case AFFINE: return projectPointsAffine;
+ case ROTZOOM: return projectPointsRotZoom;
+ case TRANSLATION: return projectPointsTranslation;
+ default: assert(0); return NULL;
+ }
+}
+
+void projectPointsTranslation(int *mat, int *points, int *proj, const int n,
+ const int stride_points, const int stride_proj,
+ const int subsampling_x,
+ const int subsampling_y) {
+ int i;
+ for (i = 0; i < n; ++i) {
+ const int x = *(points++), y = *(points++);
+ if (subsampling_x)
+ *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+ ((x << (WARPEDMODEL_PREC_BITS + 1)) + mat[0]),
+ WARPEDDIFF_PREC_BITS + 1);
+ else
+ *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+ ((x << WARPEDMODEL_PREC_BITS)) + mat[0], WARPEDDIFF_PREC_BITS);
+ if (subsampling_y)
+ *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+ ((y << (WARPEDMODEL_PREC_BITS + 1)) + mat[1]),
+ WARPEDDIFF_PREC_BITS + 1);
+ else
+ *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+ ((y << WARPEDMODEL_PREC_BITS)) + mat[1], WARPEDDIFF_PREC_BITS);
+ points += stride_points - 2;
+ proj += stride_proj - 2;
+ }
+}
+
+void projectPointsRotZoom(int *mat, int *points, int *proj, const int n,
+ const int stride_points, const int stride_proj,
+ const int subsampling_x, const int subsampling_y) {
+ int i;
+ for (i = 0; i < n; ++i) {
+ const int x = *(points++), y = *(points++);
+ if (subsampling_x)
+ *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+ mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
+ (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
+ WARPEDDIFF_PREC_BITS + 1);
+ else
+ *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
+ WARPEDDIFF_PREC_BITS);
+ if (subsampling_y)
+ *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+ -mat[3] * 2 * x + mat[2] * 2 * y + mat[1] +
+ (-mat[3] + mat[2] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
+ WARPEDDIFF_PREC_BITS + 1);
+ else
+ *(proj++) = ROUND_POWER_OF_TWO_SIGNED(-mat[3] * x + mat[2] * y + mat[1],
+ WARPEDDIFF_PREC_BITS);
+ points += stride_points - 2;
+ proj += stride_proj - 2;
+ }
+}
+
+void projectPointsAffine(int *mat, int *points, int *proj, const int n,
+ const int stride_points, const int stride_proj,
+ const int subsampling_x, const int subsampling_y) {
+ int i;
+ for (i = 0; i < n; ++i) {
+ const int x = *(points++), y = *(points++);
+ if (subsampling_x)
+ *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+ mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
+ (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
+ WARPEDDIFF_PREC_BITS + 1);
+ else
+ *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
+ WARPEDDIFF_PREC_BITS);
+ if (subsampling_y)
+ *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
+ mat[4] * 2 * x + mat[5] * 2 * y + mat[1] +
+ (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
+ WARPEDDIFF_PREC_BITS + 1);
+ else
+ *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[4] * x + mat[5] * y + mat[1],
+ WARPEDDIFF_PREC_BITS);
+ points += stride_points - 2;
+ proj += stride_proj - 2;
+ }
+}
+
+void projectPointsHomography(int *mat, int *points, int *proj, const int n,
+ const int stride_points, const int stride_proj,
+ const int subsampling_x, const int subsampling_y) {
+ int i;
+ int64_t x, y, Z;
+ int64_t xp, yp;
+ for (i = 0; i < n; ++i) {
+ x = *(points++), y = *(points++);
+ x = (subsampling_x ? 4 * x + 1 : 2 * x);
+ y = (subsampling_y ? 4 * y + 1 : 2 * y);
+
+ Z = (mat[6] * x + mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
+ xp = (mat[0] * x + mat[1] * y + 2 * mat[2])
+ << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
+ WARPEDMODEL_PREC_BITS);
+ yp = (mat[3] * x + mat[4] * y + 2 * mat[5])
+ << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
+ WARPEDMODEL_PREC_BITS);
+
+ xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
+ yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;
+
+ if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
+ if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
+ *(proj++) = xp;
+ *(proj++) = yp;
+
+ points += stride_points - 2;
+ proj += stride_proj - 2;
+ }
+}
+
+static const int16_t filter_4tap[WARPEDPIXEL_PREC_SHIFTS][4] = {
+ { 0, 128, 0, 0 }, { -1, 127, 2, 0 }, { -2, 127, 4, -1 },
+ { -3, 126, 6, -1 }, { -3, 125, 8, -2 }, { -4, 124, 11, -3 },
+ { -5, 123, 13, -3 }, { -5, 121, 15, -3 }, { -6, 120, 18, -4 },
+ { -7, 119, 20, -4 }, { -7, 118, 22, -5 }, { -8, 116, 25, -5 },
+ { -8, 115, 27, -6 }, { -9, 113, 30, -6 }, { -9, 112, 32, -7 },
+ { -9, 110, 34, -7 }, { -10, 108, 37, -7 }, { -10, 107, 39, -8 },
+ { -10, 105, 41, -8 }, { -11, 103, 44, -8 }, { -11, 101, 47, -9 },
+ { -11, 99, 49, -9 }, { -11, 97, 51, -9 }, { -11, 95, 54, -10 },
+ { -11, 93, 56, -10 }, { -12, 91, 59, -10 }, { -12, 89, 61, -10 },
+ { -12, 87, 64, -11 }, { -12, 85, 66, -11 }, { -12, 82, 69, -11 },
+ { -12, 80, 71, -11 }, { -12, 78, 73, -11 }, { -11, 75, 75, -11 },
+ { -11, 73, 78, -12 }, { -11, 71, 80, -12 }, { -11, 69, 82, -12 },
+ { -11, 66, 85, -12 }, { -11, 64, 87, -12 }, { -10, 61, 89, -12 },
+ { -10, 59, 91, -12 }, { -10, 56, 93, -11 }, { -10, 54, 95, -11 },
+ { -9, 51, 97, -11 }, { -9, 49, 99, -11 }, { -9, 47, 101, -11 },
+ { -8, 44, 103, -11 }, { -8, 41, 105, -10 }, { -8, 39, 107, -10 },
+ { -7, 37, 108, -10 }, { -7, 34, 110, -9 }, { -7, 32, 112, -9 },
+ { -6, 30, 113, -9 }, { -6, 27, 115, -8 }, { -5, 25, 116, -8 },
+ { -5, 22, 118, -7 }, { -4, 20, 119, -7 }, { -4, 18, 120, -6 },
+ { -3, 15, 121, -5 }, { -3, 13, 123, -5 }, { -3, 11, 124, -4 },
+ { -2, 8, 125, -3 }, { -1, 6, 126, -3 }, { -1, 4, 127, -2 },
+ { 0, 2, 127, -1 },
+};
+
+static const int16_t
+ filter_ntap[WARPEDPIXEL_PREC_SHIFTS][WARPEDPIXEL_FILTER_TAPS] = {
+ { 0, 0, 128, 0, 0, 0 }, { 0, -1, 128, 2, -1, 0 },
+ { 1, -3, 127, 4, -1, 0 }, { 1, -4, 126, 6, -2, 1 },
+ { 1, -5, 126, 8, -3, 1 }, { 1, -6, 125, 11, -4, 1 },
+ { 1, -7, 124, 13, -4, 1 }, { 2, -8, 123, 15, -5, 1 },
+ { 2, -9, 122, 18, -6, 1 }, { 2, -10, 121, 20, -6, 1 },
+ { 2, -11, 120, 22, -7, 2 }, { 2, -12, 119, 25, -8, 2 },
+ { 3, -13, 117, 27, -8, 2 }, { 3, -13, 116, 29, -9, 2 },
+ { 3, -14, 114, 32, -10, 3 }, { 3, -15, 113, 35, -10, 2 },
+ { 3, -15, 111, 37, -11, 3 }, { 3, -16, 109, 40, -11, 3 },
+ { 3, -16, 108, 42, -12, 3 }, { 4, -17, 106, 45, -13, 3 },
+ { 4, -17, 104, 47, -13, 3 }, { 4, -17, 102, 50, -14, 3 },
+ { 4, -17, 100, 52, -14, 3 }, { 4, -18, 98, 55, -15, 4 },
+ { 4, -18, 96, 58, -15, 3 }, { 4, -18, 94, 60, -16, 4 },
+ { 4, -18, 91, 63, -16, 4 }, { 4, -18, 89, 65, -16, 4 },
+ { 4, -18, 87, 68, -17, 4 }, { 4, -18, 85, 70, -17, 4 },
+ { 4, -18, 82, 73, -17, 4 }, { 4, -18, 80, 75, -17, 4 },
+ { 4, -18, 78, 78, -18, 4 }, { 4, -17, 75, 80, -18, 4 },
+ { 4, -17, 73, 82, -18, 4 }, { 4, -17, 70, 85, -18, 4 },
+ { 4, -17, 68, 87, -18, 4 }, { 4, -16, 65, 89, -18, 4 },
+ { 4, -16, 63, 91, -18, 4 }, { 4, -16, 60, 94, -18, 4 },
+ { 3, -15, 58, 96, -18, 4 }, { 4, -15, 55, 98, -18, 4 },
+ { 3, -14, 52, 100, -17, 4 }, { 3, -14, 50, 102, -17, 4 },
+ { 3, -13, 47, 104, -17, 4 }, { 3, -13, 45, 106, -17, 4 },
+ { 3, -12, 42, 108, -16, 3 }, { 3, -11, 40, 109, -16, 3 },
+ { 3, -11, 37, 111, -15, 3 }, { 2, -10, 35, 113, -15, 3 },
+ { 3, -10, 32, 114, -14, 3 }, { 2, -9, 29, 116, -13, 3 },
+ { 2, -8, 27, 117, -13, 3 }, { 2, -8, 25, 119, -12, 2 },
+ { 2, -7, 22, 120, -11, 2 }, { 1, -6, 20, 121, -10, 2 },
+ { 1, -6, 18, 122, -9, 2 }, { 1, -5, 15, 123, -8, 2 },
+ { 1, -4, 13, 124, -7, 1 }, { 1, -4, 11, 125, -6, 1 },
+ { 1, -3, 8, 126, -5, 1 }, { 1, -2, 6, 126, -4, 1 },
+ { 0, -1, 4, 127, -3, 1 }, { 0, -1, 2, 128, -1, 0 },
+ };
+
+static int32_t do_ntap_filter(int32_t *p, int x) {
+ int i;
+ int32_t sum = 0;
+ for (i = 0; i < WARPEDPIXEL_FILTER_TAPS; ++i) {
+ sum += p[i - WARPEDPIXEL_FILTER_TAPS / 2 + 1] * filter_ntap[x][i];
+ }
+ return sum;
+}
+
+static int32_t do_cubic_filter(int32_t *p, int x) {
+ if (x == 0) {
+ return p[0];
+ } else if (x == (1 << WARPEDPIXEL_PREC_BITS)) {
+ return p[1];
+ } else {
+ const int64_t v1 = x * x * x * (3 * (p[0] - p[1]) + p[2] - p[-1]);
+ const int64_t v2 = x * x * (2 * p[-1] - 5 * p[0] + 4 * p[1] - p[2]);
+ const int64_t v3 = x * (p[1] - p[-1]);
+ const int64_t v4 = 2 * p[0];
+ return (int32_t)ROUND_POWER_OF_TWO_SIGNED(
+ (v4 << (3 * WARPEDPIXEL_PREC_BITS)) +
+ (v3 << (2 * WARPEDPIXEL_PREC_BITS)) +
+ (v2 << WARPEDPIXEL_PREC_BITS) + v1,
+ 3 * WARPEDPIXEL_PREC_BITS + 1 - WARPEDPIXEL_FILTER_BITS);
+ }
+}
+
+/*
+static int32_t do_linear_filter(int32_t *p, int x) {
+ int32_t sum = 0;
+ sum = p[0] * (WARPEDPIXEL_PREC_SHIFTS - x) + p[1] * x;
+ sum <<= (WARPEDPIXEL_FILTER_BITS - WARPEDPIXEL_PREC_BITS);
+ return sum;
+}
+
+static int32_t do_4tap_filter(int32_t *p, int x) {
+ int i;
+ int32_t sum = 0;
+ for (i = 0; i < 4; ++i) {
+ sum += p[i - 1] * filter_4tap[x][i];
+ }
+ return sum;
+}
+*/
+
+static INLINE void get_subcolumn(int taps, uint8_t *ref, int32_t *col,
+ int stride, int x, int y_start) {
+ int i;
+ for (i = 0; i < taps; ++i) {
+ col[i] = ref[(i + y_start) * stride + x];
+ }
+}
+
+static uint8_t bi_ntap_filter(uint8_t *ref, int x, int y, int stride) {
+ int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
+ int k;
+ int i = (int)x >> WARPEDPIXEL_PREC_BITS;
+ int j = (int)y >> WARPEDPIXEL_PREC_BITS;
+ for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
+ int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
+ get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
+ i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
+ j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
+ arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
+ y - (j << WARPEDPIXEL_PREC_BITS));
+ }
+ val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
+ x - (i << WARPEDPIXEL_PREC_BITS));
+ val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
+ return (uint8_t)clip_pixel(val);
+}
+
+static uint8_t bi_cubic_filter(uint8_t *ref, int x, int y, int stride) {
+ int32_t val, arr[4];
+ int k;
+ int i = (int)x >> WARPEDPIXEL_PREC_BITS;
+ int j = (int)y >> WARPEDPIXEL_PREC_BITS;
+ for (k = 0; k < 4; ++k) {
+ int32_t arr_temp[4];
+ get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
+ arr[k] = do_cubic_filter(arr_temp + 1, y - (j << WARPEDPIXEL_PREC_BITS));
+ }
+ val = do_cubic_filter(arr + 1, x - (i << WARPEDPIXEL_PREC_BITS));
+ val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
+ return (uint8_t)clip_pixel(val);
+}
+
+static uint8_t bi_linear_filter(uint8_t *ref, int x, int y, int stride) {
+ const int ix = x >> WARPEDPIXEL_PREC_BITS;
+ const int iy = y >> WARPEDPIXEL_PREC_BITS;
+ const int sx = x - (ix << WARPEDPIXEL_PREC_BITS);
+ const int sy = y - (iy << WARPEDPIXEL_PREC_BITS);
+ int32_t val;
+ val = ROUND_POWER_OF_TWO_SIGNED(
+ ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
+ (WARPEDPIXEL_PREC_SHIFTS - sx) +
+ ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
+ ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
+ ref[(iy + 1) * stride + ix + 1] * sy * sx,
+ WARPEDPIXEL_PREC_BITS * 2);
+ return (uint8_t)clip_pixel(val);
+}
+
+static uint8_t warp_interpolate(uint8_t *ref, int x, int y, int width,
+ int height, int stride) {
+ int ix = x >> WARPEDPIXEL_PREC_BITS;
+ int iy = y >> WARPEDPIXEL_PREC_BITS;
+ int sx = x - (ix << WARPEDPIXEL_PREC_BITS);
+ int sy = y - (iy << WARPEDPIXEL_PREC_BITS);
+ int32_t v;
+
+ if (ix < 0 && iy < 0)
+ return ref[0];
+ else if (ix < 0 && iy > height - 1)
+ return ref[(height - 1) * stride];
+ else if (ix > width - 1 && iy < 0)
+ return ref[width - 1];
+ else if (ix > width - 1 && iy > height - 1)
+ return ref[(height - 1) * stride + (width - 1)];
+ else if (ix < 0) {
+ v = ROUND_POWER_OF_TWO_SIGNED(
+ ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
+ ref[(iy + 1) * stride] * sy,
+ WARPEDPIXEL_PREC_BITS);
+ return clip_pixel(v);
+ } else if (iy < 0) {
+ v = ROUND_POWER_OF_TWO_SIGNED(
+ ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
+ WARPEDPIXEL_PREC_BITS);
+ return clip_pixel(v);
+ } else if (ix > width - 1) {
+ v = ROUND_POWER_OF_TWO_SIGNED(
+ ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
+ ref[(iy + 1) * stride + width - 1] * sy,
+ WARPEDPIXEL_PREC_BITS);
+ return clip_pixel(v);
+ } else if (iy > height - 1) {
+ v = ROUND_POWER_OF_TWO_SIGNED(
+ ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
+ ref[(height - 1) * stride + ix + 1] * sx,
+ WARPEDPIXEL_PREC_BITS);
+ return clip_pixel(v);
+ } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
+ iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
+ ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
+ iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
+ return bi_ntap_filter(ref, x, y, stride);
+ } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
+ return bi_cubic_filter(ref, x, y, stride);
+ } else {
+ return bi_linear_filter(ref, x, y, stride);
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE void highbd_get_subcolumn(int taps, uint16_t *ref, int32_t *col,
+ int stride, int x, int y_start) {
+ int i;
+ for (i = 0; i < taps; ++i) {
+ col[i] = ref[(i + y_start) * stride + x];
+ }
+}
+
+static uint16_t highbd_bi_ntap_filter(uint16_t *ref, int x, int y, int stride,
+ int bd) {
+ int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
+ int k;
+ int i = (int)x >> WARPEDPIXEL_PREC_BITS;
+ int j = (int)y >> WARPEDPIXEL_PREC_BITS;
+ for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
+ int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
+ highbd_get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
+ i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
+ j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
+ arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
+ y - (j << WARPEDPIXEL_PREC_BITS));
+ }
+ val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
+ x - (i << WARPEDPIXEL_PREC_BITS));
+ val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
+ return (uint16_t)clip_pixel_highbd(val, bd);
+}
+
+static uint16_t highbd_bi_cubic_filter(uint16_t *ref, int x, int y, int stride,
+ int bd) {
+ int32_t val, arr[4];
+ int k;
+ int i = (int)x >> WARPEDPIXEL_PREC_BITS;
+ int j = (int)y >> WARPEDPIXEL_PREC_BITS;
+ for (k = 0; k < 4; ++k) {
+ int32_t arr_temp[4];
+ highbd_get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
+ arr[k] = do_cubic_filter(arr_temp + 1, y - (j << WARPEDPIXEL_PREC_BITS));
+ }
+ val = do_cubic_filter(arr + 1, x - (i << WARPEDPIXEL_PREC_BITS));
+ val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
+ return (uint16_t)clip_pixel_highbd(val, bd);
+}
+
+static uint16_t highbd_bi_linear_filter(uint16_t *ref, int x, int y, int stride,
+ int bd) {
+ const int ix = x >> WARPEDPIXEL_PREC_BITS;
+ const int iy = y >> WARPEDPIXEL_PREC_BITS;
+ const int sx = x - (ix << WARPEDPIXEL_PREC_BITS);
+ const int sy = y - (iy << WARPEDPIXEL_PREC_BITS);
+ int32_t val;
+ val = ROUND_POWER_OF_TWO_SIGNED(
+ ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
+ (WARPEDPIXEL_PREC_SHIFTS - sx) +
+ ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
+ ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
+ ref[(iy + 1) * stride + ix + 1] * sy * sx,
+ WARPEDPIXEL_PREC_BITS * 2);
+ return (uint16_t)clip_pixel_highbd(val, bd);
+}
+
+static uint16_t highbd_warp_interpolate(uint16_t *ref, int x, int y, int width,
+ int height, int stride, int bd) {
+ int ix = x >> WARPEDPIXEL_PREC_BITS;
+ int iy = y >> WARPEDPIXEL_PREC_BITS;
+ int sx = x - (ix << WARPEDPIXEL_PREC_BITS);
+ int sy = y - (iy << WARPEDPIXEL_PREC_BITS);
+ int32_t v;
+
+ if (ix < 0 && iy < 0)
+ return ref[0];
+ else if (ix < 0 && iy > height - 1)
+ return ref[(height - 1) * stride];
+ else if (ix > width - 1 && iy < 0)
+ return ref[width - 1];
+ else if (ix > width - 1 && iy > height - 1)
+ return ref[(height - 1) * stride + (width - 1)];
+ else if (ix < 0) {
+ v = ROUND_POWER_OF_TWO_SIGNED(
+ ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
+ ref[(iy + 1) * stride] * sy,
+ WARPEDPIXEL_PREC_BITS);
+ return clip_pixel_highbd(v, bd);
+ } else if (iy < 0) {
+ v = ROUND_POWER_OF_TWO_SIGNED(
+ ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
+ WARPEDPIXEL_PREC_BITS);
+ return clip_pixel_highbd(v, bd);
+ } else if (ix > width - 1) {
+ v = ROUND_POWER_OF_TWO_SIGNED(
+ ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
+ ref[(iy + 1) * stride + width - 1] * sy,
+ WARPEDPIXEL_PREC_BITS);
+ return clip_pixel_highbd(v, bd);
+ } else if (iy > height - 1) {
+ v = ROUND_POWER_OF_TWO_SIGNED(
+ ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
+ ref[(height - 1) * stride + ix + 1] * sx,
+ WARPEDPIXEL_PREC_BITS);
+ return clip_pixel_highbd(v, bd);
+ } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
+ iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
+ ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
+ iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
+ return highbd_bi_ntap_filter(ref, x, y, stride, bd);
+ } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
+ return highbd_bi_cubic_filter(ref, x, y, stride, bd);
+ } else {
+ return highbd_bi_linear_filter(ref, x, y, stride, bd);
+ }
+}
+
+static double highbd_warp_erroradv(WarpedMotionParams *wm, uint8_t *ref8,
+ int width, int height, int stride,
+ uint8_t *dst8, int p_col, int p_row,
+ int p_width, int p_height, int p_stride,
+ int subsampling_x, int subsampling_y,
+ int x_scale, int y_scale, int bd) {
+ int i, j;
+ ProjectPointsType projectpoints = get_project_points_type(wm->wmtype);
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ int gm_err = 0, no_gm_err = 0;
+ int gm_sumerr = 0, no_gm_sumerr = 0;
+ for (i = p_row; i < p_row + p_height; ++i) {
+ for (j = p_col; j < p_col + p_width; ++j) {
+ int in[2], out[2];
+ in[0] = j;
+ in[1] = i;
+ projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
+ out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
+ out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
+ gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
+ highbd_warp_interpolate(ref, out[0], out[1], width, height,
+ stride, bd);
+ no_gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
+ ref[(j - p_col) + (i - p_row) * stride];
+ gm_sumerr += gm_err * gm_err;
+ no_gm_sumerr += no_gm_err * no_gm_err;
+ }
+ }
+ return (double)gm_sumerr / no_gm_sumerr;
+}
+
+static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
+ int height, int stride, uint8_t *pred8, int p_col,
+ int p_row, int p_width, int p_height,
+ int p_stride, int subsampling_x,
+ int subsampling_y, int x_scale, int y_scale,
+ int bd) {
+ int i, j;
+ ProjectPointsType projectpoints = get_project_points_type(wm->wmtype);
+ uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ if (projectpoints == NULL) return;
+ for (i = p_row; i < p_row + p_height; ++i) {
+ for (j = p_col; j < p_col + p_width; ++j) {
+ int in[2], out[2];
+ in[0] = j;
+ in[1] = i;
+ projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
+ out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
+ out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
+ pred[(j - p_col) + (i - p_row) * p_stride] = highbd_warp_interpolate(
+ ref, out[0], out[1], width, height, stride, bd);
+ }
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static double warp_erroradv(WarpedMotionParams *wm, uint8_t *ref, int width,
+ int height, int stride, uint8_t *dst, int p_col,
+ int p_row, int p_width, int p_height, int p_stride,
+ int subsampling_x, int subsampling_y, int x_scale,
+ int y_scale) {
+ int gm_err = 0, no_gm_err = 0;
+ int gm_sumerr = 0, no_gm_sumerr = 0;
+ int i, j;
+ ProjectPointsType projectpoints = get_project_points_type(wm->wmtype);
+ for (i = p_row; i < p_row + p_height; ++i) {
+ for (j = p_col; j < p_col + p_width; ++j) {
+ int in[2], out[2];
+ in[0] = j;
+ in[1] = i;
+ projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
+ out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
+ out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
+ gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
+ warp_interpolate(ref, out[0], out[1], width, height, stride);
+ no_gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
+ ref[(j - p_col) + (i - p_row) * stride];
+ gm_sumerr += gm_err * gm_err;
+ no_gm_sumerr += no_gm_err * no_gm_err;
+ }
+ }
+ return (double)gm_sumerr / no_gm_sumerr;
+}
+
+static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width,
+ int height, int stride, uint8_t *pred, int p_col,
+ int p_row, int p_width, int p_height, int p_stride,
+ int subsampling_x, int subsampling_y, int x_scale,
+ int y_scale) {
+ int i, j;
+ ProjectPointsType projectpoints = get_project_points_type(wm->wmtype);
+ if (projectpoints == NULL) return;
+ for (i = p_row; i < p_row + p_height; ++i) {
+ for (j = p_col; j < p_col + p_width; ++j) {
+ int in[2], out[2];
+ in[0] = j;
+ in[1] = i;
+ projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
+ out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
+ out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
+ pred[(j - p_col) + (i - p_row) * p_stride] =
+ warp_interpolate(ref, out[0], out[1], width, height, stride);
+ }
+ }
+}
+
+double vp10_warp_erroradv(WarpedMotionParams *wm,
+#if CONFIG_VP9_HIGHBITDEPTH
+ int use_hbd, int bd,
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *ref, int width, int height, int stride,
+ uint8_t *dst, int p_col, int p_row, int p_width,
+ int p_height, int p_stride, int subsampling_x,
+ int subsampling_y, int x_scale, int y_scale) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (use_hbd)
+ return highbd_warp_erroradv(
+ wm, ref, width, height, stride, dst, p_col, p_row, p_width, p_height,
+ p_stride, subsampling_x, subsampling_y, x_scale, y_scale, bd);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ return warp_erroradv(wm, ref, width, height, stride, dst, p_col, p_row,
+ p_width, p_height, p_stride, subsampling_x,
+ subsampling_y, x_scale, y_scale);
+}
+
+void vp10_warp_plane(WarpedMotionParams *wm,
+#if CONFIG_VP9_HIGHBITDEPTH
+ int use_hbd, int bd,
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *ref, int width, int height, int stride,
+ uint8_t *pred, int p_col, int p_row, int p_width,
+ int p_height, int p_stride, int subsampling_x,
+ int subsampling_y, int x_scale, int y_scale) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (use_hbd)
+ highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row,
+ p_width, p_height, p_stride, subsampling_x, subsampling_y,
+ x_scale, y_scale, bd);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
+ p_height, p_stride, subsampling_x, subsampling_y, x_scale,
+ y_scale);
+}
+
+void vp10_integerize_model(const double *model, TransformationType wmtype,
+ WarpedMotionParams *wm) {
+ wm->wmtype = wmtype;
+ switch (wmtype) {
+ case HOMOGRAPHY:
+ assert(fabs(model[8] - 1.0) < 1e-12);
+ wm->wmmat[7] =
+ (int)lrint(model[7] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
+ wm->wmmat[6] =
+ (int)lrint(model[6] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
+ /* fallthrough intended */
+ case AFFINE:
+ wm->wmmat[5] = (int)lrint(model[5] * (1 << WARPEDMODEL_PREC_BITS));
+ wm->wmmat[4] = (int)lrint(model[4] * (1 << WARPEDMODEL_PREC_BITS));
+ /* fallthrough intended */
+ case ROTZOOM:
+ wm->wmmat[3] = (int)lrint(model[3] * (1 << WARPEDMODEL_PREC_BITS));
+ wm->wmmat[2] = (int)lrint(model[2] * (1 << WARPEDMODEL_PREC_BITS));
+ /* fallthrough intended */
+ case TRANSLATION:
+ wm->wmmat[1] = (int)lrint(model[1] * (1 << WARPEDMODEL_PREC_BITS));
+ wm->wmmat[0] = (int)lrint(model[0] * (1 << WARPEDMODEL_PREC_BITS));
+ break;
+ default: assert(0 && "Invalid TransformationType");
+ }
+}
diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
new file mode 100644
index 0000000..a9c57f9
--- /dev/null
+++ b/av1/common/warped_motion.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be
+ * found in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_COMMON_WARPED_MOTION_H
+#define VP10_COMMON_WARPED_MOTION_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <math.h>
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "aom_ports/mem.h"
+#include "aom_dsp/vpx_dsp_common.h"
+
+// Bits of precision used for the model
+#define WARPEDMODEL_PREC_BITS 8
+#define WARPEDMODEL_ROW3HOMO_PREC_BITS 12
+
+// Bits of subpel precision for warped interpolation
+#define WARPEDPIXEL_PREC_BITS 6
+#define WARPEDPIXEL_PREC_SHIFTS (1 << WARPEDPIXEL_PREC_BITS)
+
+// Taps for ntap filter
+#define WARPEDPIXEL_FILTER_TAPS 6
+
+// Precision of filter taps
+#define WARPEDPIXEL_FILTER_BITS 7
+
+#define WARPEDDIFF_PREC_BITS (WARPEDMODEL_PREC_BITS - WARPEDPIXEL_PREC_BITS)
+
+typedef void (*ProjectPointsType)(int *mat, int *points, int *proj, const int n,
+ const int stride_points,
+ const int stride_proj,
+ const int subsampling_x,
+ const int subsampling_y);
+void projectPointsHomography(int *mat, int *points, int *proj, const int n,
+ const int stride_points, const int stride_proj,
+ const int subsampling_x, const int subsampling_y);
+void projectPointsAffine(int *mat, int *points, int *proj, const int n,
+ const int stride_points, const int stride_proj,
+ const int subsampling_x, const int subsampling_y);
+void projectPointsRotZoom(int *mat, int *points, int *proj, const int n,
+ const int stride_points, const int stride_proj,
+ const int subsampling_x, const int subsampling_y);
+void projectPointsTranslation(int *mat, int *points, int *proj, const int n,
+ const int stride_points, const int stride_proj,
+ const int subsampling_x, const int subsampling_y);
+
+typedef enum {
+ UNKNOWN_TRANSFORM = -1,
+ HOMOGRAPHY, // homography, 8-parameter
+ AFFINE, // affine, 6-parameter
+ ROTZOOM, // simplified affine with rotation and zoom only, 4-parameter
+ TRANSLATION, // translational motion 2-parameter
+ TRANS_TYPES
+} TransformationType;
+
+// number of parameters used by each transformation in TransformationTypes
+static const int n_trans_model_params[TRANS_TYPES] = { 9, 6, 4, 2 };
+
+typedef struct {
+ TransformationType wmtype;
+ int wmmat[8]; // For homography wmmat[9] is assumed to be 1
+} WarpedMotionParams;
+
+double vp10_warp_erroradv(WarpedMotionParams *wm,
+#if CONFIG_VP9_HIGHBITDEPTH
+ int use_hbd, int bd,
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *ref, int width, int height, int stride,
+ uint8_t *dst, int p_col, int p_row, int p_width,
+ int p_height, int p_stride, int subsampling_x,
+ int subsampling_y, int x_scale, int y_scale);
+
+void vp10_warp_plane(WarpedMotionParams *wm,
+#if CONFIG_VP9_HIGHBITDEPTH
+ int use_hbd, int bd,
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *ref, int width, int height, int stride,
+ uint8_t *pred, int p_col, int p_row, int p_width,
+ int p_height, int p_stride, int subsampling_x,
+ int subsampling_y, int x_scale, int y_scale);
+
+// Integerize model into the WarpedMotionParams structure
+void vp10_integerize_model(const double *model, TransformationType wmtype,
+ WarpedMotionParams *wm);
+#endif // VP10_COMMON_WARPED_MOTION_H
diff --git a/av1/common/x86/highbd_inv_txfm_sse4.c b/av1/common/x86/highbd_inv_txfm_sse4.c
new file mode 100644
index 0000000..f3686eb
--- /dev/null
+++ b/av1/common/x86/highbd_inv_txfm_sse4.c
@@ -0,0 +1,1398 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <smmintrin.h> /* SSE4.1 */
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "av1/common/vp10_inv_txfm2d_cfg.h"
+#include "av1/common/x86/highbd_txfm_utility_sse4.h"
+
+static INLINE void load_buffer_4x4(const int32_t *coeff, __m128i *in) {
+ in[0] = _mm_load_si128((const __m128i *)(coeff + 0));
+ in[1] = _mm_load_si128((const __m128i *)(coeff + 4));
+ in[2] = _mm_load_si128((const __m128i *)(coeff + 8));
+ in[3] = _mm_load_si128((const __m128i *)(coeff + 12));
+}
+
+static void idct4x4_sse4_1(__m128i *in, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ __m128i u0, u1, u2, u3;
+ __m128i v0, v1, v2, v3, x, y;
+
+ v0 = _mm_unpacklo_epi32(in[0], in[1]);
+ v1 = _mm_unpackhi_epi32(in[0], in[1]);
+ v2 = _mm_unpacklo_epi32(in[2], in[3]);
+ v3 = _mm_unpackhi_epi32(in[2], in[3]);
+
+ u0 = _mm_unpacklo_epi64(v0, v2);
+ u1 = _mm_unpackhi_epi64(v0, v2);
+ u2 = _mm_unpacklo_epi64(v1, v3);
+ u3 = _mm_unpackhi_epi64(v1, v3);
+
+ x = _mm_mullo_epi32(u0, cospi32);
+ y = _mm_mullo_epi32(u2, cospi32);
+ v0 = _mm_add_epi32(x, y);
+ v0 = _mm_add_epi32(v0, rnding);
+ v0 = _mm_srai_epi32(v0, bit);
+
+ v1 = _mm_sub_epi32(x, y);
+ v1 = _mm_add_epi32(v1, rnding);
+ v1 = _mm_srai_epi32(v1, bit);
+
+ x = _mm_mullo_epi32(u1, cospi48);
+ y = _mm_mullo_epi32(u3, cospim16);
+ v2 = _mm_add_epi32(x, y);
+ v2 = _mm_add_epi32(v2, rnding);
+ v2 = _mm_srai_epi32(v2, bit);
+
+ x = _mm_mullo_epi32(u1, cospi16);
+ y = _mm_mullo_epi32(u3, cospi48);
+ v3 = _mm_add_epi32(x, y);
+ v3 = _mm_add_epi32(v3, rnding);
+ v3 = _mm_srai_epi32(v3, bit);
+
+ in[0] = _mm_add_epi32(v0, v3);
+ in[1] = _mm_add_epi32(v1, v2);
+ in[2] = _mm_sub_epi32(v1, v2);
+ in[3] = _mm_sub_epi32(v0, v3);
+}
+
+static void iadst4x4_sse4_1(__m128i *in, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ const __m128i zero = _mm_setzero_si128();
+ __m128i u0, u1, u2, u3;
+ __m128i v0, v1, v2, v3, x, y;
+
+ v0 = _mm_unpacklo_epi32(in[0], in[1]);
+ v1 = _mm_unpackhi_epi32(in[0], in[1]);
+ v2 = _mm_unpacklo_epi32(in[2], in[3]);
+ v3 = _mm_unpackhi_epi32(in[2], in[3]);
+
+ u0 = _mm_unpacklo_epi64(v0, v2);
+ u1 = _mm_unpackhi_epi64(v0, v2);
+ u2 = _mm_unpacklo_epi64(v1, v3);
+ u3 = _mm_unpackhi_epi64(v1, v3);
+
+ // stage 0
+ // stage 1
+ u1 = _mm_sub_epi32(zero, u1);
+ u3 = _mm_sub_epi32(zero, u3);
+
+ // stage 2
+ v0 = u0;
+ v1 = u3;
+ x = _mm_mullo_epi32(u1, cospi32);
+ y = _mm_mullo_epi32(u2, cospi32);
+ v2 = _mm_add_epi32(x, y);
+ v2 = _mm_add_epi32(v2, rnding);
+ v2 = _mm_srai_epi32(v2, bit);
+
+ v3 = _mm_sub_epi32(x, y);
+ v3 = _mm_add_epi32(v3, rnding);
+ v3 = _mm_srai_epi32(v3, bit);
+
+ // stage 3
+ u0 = _mm_add_epi32(v0, v2);
+ u1 = _mm_add_epi32(v1, v3);
+ u2 = _mm_sub_epi32(v0, v2);
+ u3 = _mm_sub_epi32(v1, v3);
+
+ // stage 4
+ x = _mm_mullo_epi32(u0, cospi8);
+ y = _mm_mullo_epi32(u1, cospi56);
+ in[3] = _mm_add_epi32(x, y);
+ in[3] = _mm_add_epi32(in[3], rnding);
+ in[3] = _mm_srai_epi32(in[3], bit);
+
+ x = _mm_mullo_epi32(u0, cospi56);
+ y = _mm_mullo_epi32(u1, cospim8);
+ in[0] = _mm_add_epi32(x, y);
+ in[0] = _mm_add_epi32(in[0], rnding);
+ in[0] = _mm_srai_epi32(in[0], bit);
+
+ x = _mm_mullo_epi32(u2, cospi40);
+ y = _mm_mullo_epi32(u3, cospi24);
+ in[1] = _mm_add_epi32(x, y);
+ in[1] = _mm_add_epi32(in[1], rnding);
+ in[1] = _mm_srai_epi32(in[1], bit);
+
+ x = _mm_mullo_epi32(u2, cospi24);
+ y = _mm_mullo_epi32(u3, cospim40);
+ in[2] = _mm_add_epi32(x, y);
+ in[2] = _mm_add_epi32(in[2], rnding);
+ in[2] = _mm_srai_epi32(in[2], bit);
+}
+
+static INLINE void round_shift_4x4(__m128i *in, int shift) {
+ __m128i rnding = _mm_set1_epi32(1 << (shift - 1));
+
+ in[0] = _mm_add_epi32(in[0], rnding);
+ in[1] = _mm_add_epi32(in[1], rnding);
+ in[2] = _mm_add_epi32(in[2], rnding);
+ in[3] = _mm_add_epi32(in[3], rnding);
+
+ in[0] = _mm_srai_epi32(in[0], shift);
+ in[1] = _mm_srai_epi32(in[1], shift);
+ in[2] = _mm_srai_epi32(in[2], shift);
+ in[3] = _mm_srai_epi32(in[3], shift);
+}
+
+static INLINE __m128i highbd_clamp_epi16(__m128i u, int bd) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i one = _mm_set1_epi16(1);
+ const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one);
+ __m128i clamped, mask;
+
+ mask = _mm_cmpgt_epi16(u, max);
+ clamped = _mm_andnot_si128(mask, u);
+ mask = _mm_and_si128(mask, max);
+ clamped = _mm_or_si128(mask, clamped);
+ mask = _mm_cmpgt_epi16(clamped, zero);
+ clamped = _mm_and_si128(clamped, mask);
+
+ return clamped;
+}
+
+static void write_buffer_4x4(__m128i *in, uint16_t *output, int stride,
+ int fliplr, int flipud, int shift, int bd) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i u0, u1, u2, u3;
+ __m128i v0, v1, v2, v3;
+
+ round_shift_4x4(in, shift);
+
+ v0 = _mm_loadl_epi64((__m128i const *)(output + 0 * stride));
+ v1 = _mm_loadl_epi64((__m128i const *)(output + 1 * stride));
+ v2 = _mm_loadl_epi64((__m128i const *)(output + 2 * stride));
+ v3 = _mm_loadl_epi64((__m128i const *)(output + 3 * stride));
+
+ v0 = _mm_unpacklo_epi16(v0, zero);
+ v1 = _mm_unpacklo_epi16(v1, zero);
+ v2 = _mm_unpacklo_epi16(v2, zero);
+ v3 = _mm_unpacklo_epi16(v3, zero);
+
+ if (fliplr) {
+ in[0] = _mm_shuffle_epi32(in[0], 0x1B);
+ in[1] = _mm_shuffle_epi32(in[1], 0x1B);
+ in[2] = _mm_shuffle_epi32(in[2], 0x1B);
+ in[3] = _mm_shuffle_epi32(in[3], 0x1B);
+ }
+
+ if (flipud) {
+ u0 = _mm_add_epi32(in[3], v0);
+ u1 = _mm_add_epi32(in[2], v1);
+ u2 = _mm_add_epi32(in[1], v2);
+ u3 = _mm_add_epi32(in[0], v3);
+ } else {
+ u0 = _mm_add_epi32(in[0], v0);
+ u1 = _mm_add_epi32(in[1], v1);
+ u2 = _mm_add_epi32(in[2], v2);
+ u3 = _mm_add_epi32(in[3], v3);
+ }
+
+ v0 = _mm_packus_epi32(u0, u1);
+ v2 = _mm_packus_epi32(u2, u3);
+
+ u0 = highbd_clamp_epi16(v0, bd);
+ u2 = highbd_clamp_epi16(v2, bd);
+
+ v0 = _mm_unpacklo_epi64(u0, u0);
+ v1 = _mm_unpackhi_epi64(u0, u0);
+ v2 = _mm_unpacklo_epi64(u2, u2);
+ v3 = _mm_unpackhi_epi64(u2, u2);
+
+ _mm_storel_epi64((__m128i *)(output + 0 * stride), v0);
+ _mm_storel_epi64((__m128i *)(output + 1 * stride), v1);
+ _mm_storel_epi64((__m128i *)(output + 2 * stride), v2);
+ _mm_storel_epi64((__m128i *)(output + 3 * stride), v3);
+}
+
+void vp10_inv_txfm2d_add_4x4_sse4_1(const int32_t *coeff, uint16_t *output,
+ int stride, int tx_type, int bd) {
+ __m128i in[4];
+ const TXFM_2D_CFG *cfg = NULL;
+
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &inv_txfm_2d_cfg_dct_dct_4;
+ load_buffer_4x4(coeff, in);
+ idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
+ break;
+ case ADST_DCT:
+ cfg = &inv_txfm_2d_cfg_adst_dct_4;
+ load_buffer_4x4(coeff, in);
+ idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
+ break;
+ case DCT_ADST:
+ cfg = &inv_txfm_2d_cfg_dct_adst_4;
+ load_buffer_4x4(coeff, in);
+ iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
+ break;
+ case ADST_ADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_4;
+ load_buffer_4x4(coeff, in);
+ iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ write_buffer_4x4(in, output, stride, 0, 0, -cfg->shift[1], bd);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ cfg = &inv_txfm_2d_cfg_adst_dct_4;
+ load_buffer_4x4(coeff, in);
+ idct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ write_buffer_4x4(in, output, stride, 0, 1, -cfg->shift[1], bd);
+ break;
+ case DCT_FLIPADST:
+ cfg = &inv_txfm_2d_cfg_dct_adst_4;
+ load_buffer_4x4(coeff, in);
+ iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ idct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ write_buffer_4x4(in, output, stride, 1, 0, -cfg->shift[1], bd);
+ break;
+ case FLIPADST_FLIPADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_4;
+ load_buffer_4x4(coeff, in);
+ iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ write_buffer_4x4(in, output, stride, 1, 1, -cfg->shift[1], bd);
+ break;
+ case ADST_FLIPADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_4;
+ load_buffer_4x4(coeff, in);
+ iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ write_buffer_4x4(in, output, stride, 1, 0, -cfg->shift[1], bd);
+ break;
+ case FLIPADST_ADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_4;
+ load_buffer_4x4(coeff, in);
+ iadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ iadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ write_buffer_4x4(in, output, stride, 0, 1, -cfg->shift[1], bd);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0);
+ }
+}
+
+// 8x8
+static void load_buffer_8x8(const int32_t *coeff, __m128i *in) {
+ in[0] = _mm_load_si128((const __m128i *)(coeff + 0));
+ in[1] = _mm_load_si128((const __m128i *)(coeff + 4));
+ in[2] = _mm_load_si128((const __m128i *)(coeff + 8));
+ in[3] = _mm_load_si128((const __m128i *)(coeff + 12));
+ in[4] = _mm_load_si128((const __m128i *)(coeff + 16));
+ in[5] = _mm_load_si128((const __m128i *)(coeff + 20));
+ in[6] = _mm_load_si128((const __m128i *)(coeff + 24));
+ in[7] = _mm_load_si128((const __m128i *)(coeff + 28));
+ in[8] = _mm_load_si128((const __m128i *)(coeff + 32));
+ in[9] = _mm_load_si128((const __m128i *)(coeff + 36));
+ in[10] = _mm_load_si128((const __m128i *)(coeff + 40));
+ in[11] = _mm_load_si128((const __m128i *)(coeff + 44));
+ in[12] = _mm_load_si128((const __m128i *)(coeff + 48));
+ in[13] = _mm_load_si128((const __m128i *)(coeff + 52));
+ in[14] = _mm_load_si128((const __m128i *)(coeff + 56));
+ in[15] = _mm_load_si128((const __m128i *)(coeff + 60));
+}
+
+static void idct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ __m128i u0, u1, u2, u3, u4, u5, u6, u7;
+ __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+ __m128i x, y;
+ int col;
+
+ // Note:
+ // Even column: 0, 2, ..., 14
+ // Odd column: 1, 3, ..., 15
+ // one even column plus one odd column constructs one row (8 coeffs)
+ // total we have 8 rows (8x8).
+ for (col = 0; col < 2; ++col) {
+ // stage 0
+ // stage 1
+ // stage 2
+ u0 = in[0 * 2 + col];
+ u1 = in[4 * 2 + col];
+ u2 = in[2 * 2 + col];
+ u3 = in[6 * 2 + col];
+
+ x = _mm_mullo_epi32(in[1 * 2 + col], cospi56);
+ y = _mm_mullo_epi32(in[7 * 2 + col], cospim8);
+ u4 = _mm_add_epi32(x, y);
+ u4 = _mm_add_epi32(u4, rnding);
+ u4 = _mm_srai_epi32(u4, bit);
+
+ x = _mm_mullo_epi32(in[1 * 2 + col], cospi8);
+ y = _mm_mullo_epi32(in[7 * 2 + col], cospi56);
+ u7 = _mm_add_epi32(x, y);
+ u7 = _mm_add_epi32(u7, rnding);
+ u7 = _mm_srai_epi32(u7, bit);
+
+ x = _mm_mullo_epi32(in[5 * 2 + col], cospi24);
+ y = _mm_mullo_epi32(in[3 * 2 + col], cospim40);
+ u5 = _mm_add_epi32(x, y);
+ u5 = _mm_add_epi32(u5, rnding);
+ u5 = _mm_srai_epi32(u5, bit);
+
+ x = _mm_mullo_epi32(in[5 * 2 + col], cospi40);
+ y = _mm_mullo_epi32(in[3 * 2 + col], cospi24);
+ u6 = _mm_add_epi32(x, y);
+ u6 = _mm_add_epi32(u6, rnding);
+ u6 = _mm_srai_epi32(u6, bit);
+
+ // stage 3
+ x = _mm_mullo_epi32(u0, cospi32);
+ y = _mm_mullo_epi32(u1, cospi32);
+ v0 = _mm_add_epi32(x, y);
+ v0 = _mm_add_epi32(v0, rnding);
+ v0 = _mm_srai_epi32(v0, bit);
+
+ v1 = _mm_sub_epi32(x, y);
+ v1 = _mm_add_epi32(v1, rnding);
+ v1 = _mm_srai_epi32(v1, bit);
+
+ x = _mm_mullo_epi32(u2, cospi48);
+ y = _mm_mullo_epi32(u3, cospim16);
+ v2 = _mm_add_epi32(x, y);
+ v2 = _mm_add_epi32(v2, rnding);
+ v2 = _mm_srai_epi32(v2, bit);
+
+ x = _mm_mullo_epi32(u2, cospi16);
+ y = _mm_mullo_epi32(u3, cospi48);
+ v3 = _mm_add_epi32(x, y);
+ v3 = _mm_add_epi32(v3, rnding);
+ v3 = _mm_srai_epi32(v3, bit);
+
+ v4 = _mm_add_epi32(u4, u5);
+ v5 = _mm_sub_epi32(u4, u5);
+ v6 = _mm_sub_epi32(u7, u6);
+ v7 = _mm_add_epi32(u6, u7);
+
+ // stage 4
+ u0 = _mm_add_epi32(v0, v3);
+ u1 = _mm_add_epi32(v1, v2);
+ u2 = _mm_sub_epi32(v1, v2);
+ u3 = _mm_sub_epi32(v0, v3);
+ u4 = v4;
+ u7 = v7;
+
+ x = _mm_mullo_epi32(v5, cospi32);
+ y = _mm_mullo_epi32(v6, cospi32);
+ u6 = _mm_add_epi32(y, x);
+ u6 = _mm_add_epi32(u6, rnding);
+ u6 = _mm_srai_epi32(u6, bit);
+
+ u5 = _mm_sub_epi32(y, x);
+ u5 = _mm_add_epi32(u5, rnding);
+ u5 = _mm_srai_epi32(u5, bit);
+
+ // stage 5
+ out[0 * 2 + col] = _mm_add_epi32(u0, u7);
+ out[1 * 2 + col] = _mm_add_epi32(u1, u6);
+ out[2 * 2 + col] = _mm_add_epi32(u2, u5);
+ out[3 * 2 + col] = _mm_add_epi32(u3, u4);
+ out[4 * 2 + col] = _mm_sub_epi32(u3, u4);
+ out[5 * 2 + col] = _mm_sub_epi32(u2, u5);
+ out[6 * 2 + col] = _mm_sub_epi32(u1, u6);
+ out[7 * 2 + col] = _mm_sub_epi32(u0, u7);
+ }
+}
+
+static void iadst8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+ const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
+ const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
+ const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
+ const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
+ const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
+ const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
+ const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
+ const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
+ const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
+ const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
+ const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
+ const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ const __m128i zero = _mm_setzero_si128();
+ __m128i u0, u1, u2, u3, u4, u5, u6, u7;
+ __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+ __m128i x, y;
+ int col;
+
+ // Note:
+ // Even column: 0, 2, ..., 14
+ // Odd column: 1, 3, ..., 15
+ // one even column plus one odd column constructs one row (8 coeffs)
+ // total we have 8 rows (8x8).
+ for (col = 0; col < 2; ++col) {
+ // stage 0
+ // stage 1
+ u0 = in[2 * 0 + col];
+ u1 = _mm_sub_epi32(zero, in[2 * 7 + col]);
+ u2 = _mm_sub_epi32(zero, in[2 * 3 + col]);
+ u3 = in[2 * 4 + col];
+ u4 = _mm_sub_epi32(zero, in[2 * 1 + col]);
+ u5 = in[2 * 6 + col];
+ u6 = in[2 * 2 + col];
+ u7 = _mm_sub_epi32(zero, in[2 * 5 + col]);
+
+ // stage 2
+ v0 = u0;
+ v1 = u1;
+
+ x = _mm_mullo_epi32(u2, cospi32);
+ y = _mm_mullo_epi32(u3, cospi32);
+ v2 = _mm_add_epi32(x, y);
+ v2 = _mm_add_epi32(v2, rnding);
+ v2 = _mm_srai_epi32(v2, bit);
+
+ v3 = _mm_sub_epi32(x, y);
+ v3 = _mm_add_epi32(v3, rnding);
+ v3 = _mm_srai_epi32(v3, bit);
+
+ v4 = u4;
+ v5 = u5;
+
+ x = _mm_mullo_epi32(u6, cospi32);
+ y = _mm_mullo_epi32(u7, cospi32);
+ v6 = _mm_add_epi32(x, y);
+ v6 = _mm_add_epi32(v6, rnding);
+ v6 = _mm_srai_epi32(v6, bit);
+
+ v7 = _mm_sub_epi32(x, y);
+ v7 = _mm_add_epi32(v7, rnding);
+ v7 = _mm_srai_epi32(v7, bit);
+
+ // stage 3
+ u0 = _mm_add_epi32(v0, v2);
+ u1 = _mm_add_epi32(v1, v3);
+ u2 = _mm_sub_epi32(v0, v2);
+ u3 = _mm_sub_epi32(v1, v3);
+ u4 = _mm_add_epi32(v4, v6);
+ u5 = _mm_add_epi32(v5, v7);
+ u6 = _mm_sub_epi32(v4, v6);
+ u7 = _mm_sub_epi32(v5, v7);
+
+ // stage 4
+ v0 = u0;
+ v1 = u1;
+ v2 = u2;
+ v3 = u3;
+
+ x = _mm_mullo_epi32(u4, cospi16);
+ y = _mm_mullo_epi32(u5, cospi48);
+ v4 = _mm_add_epi32(x, y);
+ v4 = _mm_add_epi32(v4, rnding);
+ v4 = _mm_srai_epi32(v4, bit);
+
+ x = _mm_mullo_epi32(u4, cospi48);
+ y = _mm_mullo_epi32(u5, cospim16);
+ v5 = _mm_add_epi32(x, y);
+ v5 = _mm_add_epi32(v5, rnding);
+ v5 = _mm_srai_epi32(v5, bit);
+
+ x = _mm_mullo_epi32(u6, cospim48);
+ y = _mm_mullo_epi32(u7, cospi16);
+ v6 = _mm_add_epi32(x, y);
+ v6 = _mm_add_epi32(v6, rnding);
+ v6 = _mm_srai_epi32(v6, bit);
+
+ x = _mm_mullo_epi32(u6, cospi16);
+ y = _mm_mullo_epi32(u7, cospi48);
+ v7 = _mm_add_epi32(x, y);
+ v7 = _mm_add_epi32(v7, rnding);
+ v7 = _mm_srai_epi32(v7, bit);
+
+ // stage 5
+ u0 = _mm_add_epi32(v0, v4);
+ u1 = _mm_add_epi32(v1, v5);
+ u2 = _mm_add_epi32(v2, v6);
+ u3 = _mm_add_epi32(v3, v7);
+ u4 = _mm_sub_epi32(v0, v4);
+ u5 = _mm_sub_epi32(v1, v5);
+ u6 = _mm_sub_epi32(v2, v6);
+ u7 = _mm_sub_epi32(v3, v7);
+
+ // stage 6
+ x = _mm_mullo_epi32(u0, cospi4);
+ y = _mm_mullo_epi32(u1, cospi60);
+ v0 = _mm_add_epi32(x, y);
+ v0 = _mm_add_epi32(v0, rnding);
+ v0 = _mm_srai_epi32(v0, bit);
+
+ x = _mm_mullo_epi32(u0, cospi60);
+ y = _mm_mullo_epi32(u1, cospim4);
+ v1 = _mm_add_epi32(x, y);
+ v1 = _mm_add_epi32(v1, rnding);
+ v1 = _mm_srai_epi32(v1, bit);
+
+ x = _mm_mullo_epi32(u2, cospi20);
+ y = _mm_mullo_epi32(u3, cospi44);
+ v2 = _mm_add_epi32(x, y);
+ v2 = _mm_add_epi32(v2, rnding);
+ v2 = _mm_srai_epi32(v2, bit);
+
+ x = _mm_mullo_epi32(u2, cospi44);
+ y = _mm_mullo_epi32(u3, cospim20);
+ v3 = _mm_add_epi32(x, y);
+ v3 = _mm_add_epi32(v3, rnding);
+ v3 = _mm_srai_epi32(v3, bit);
+
+ x = _mm_mullo_epi32(u4, cospi36);
+ y = _mm_mullo_epi32(u5, cospi28);
+ v4 = _mm_add_epi32(x, y);
+ v4 = _mm_add_epi32(v4, rnding);
+ v4 = _mm_srai_epi32(v4, bit);
+
+ x = _mm_mullo_epi32(u4, cospi28);
+ y = _mm_mullo_epi32(u5, cospim36);
+ v5 = _mm_add_epi32(x, y);
+ v5 = _mm_add_epi32(v5, rnding);
+ v5 = _mm_srai_epi32(v5, bit);
+
+ x = _mm_mullo_epi32(u6, cospi52);
+ y = _mm_mullo_epi32(u7, cospi12);
+ v6 = _mm_add_epi32(x, y);
+ v6 = _mm_add_epi32(v6, rnding);
+ v6 = _mm_srai_epi32(v6, bit);
+
+ x = _mm_mullo_epi32(u6, cospi12);
+ y = _mm_mullo_epi32(u7, cospim52);
+ v7 = _mm_add_epi32(x, y);
+ v7 = _mm_add_epi32(v7, rnding);
+ v7 = _mm_srai_epi32(v7, bit);
+
+ // stage 7
+ out[2 * 0 + col] = v1;
+ out[2 * 1 + col] = v6;
+ out[2 * 2 + col] = v3;
+ out[2 * 3 + col] = v4;
+ out[2 * 4 + col] = v5;
+ out[2 * 5 + col] = v2;
+ out[2 * 6 + col] = v7;
+ out[2 * 7 + col] = v0;
+ }
+}
+
+static void round_shift_8x8(__m128i *in, int shift) {
+ round_shift_4x4(&in[0], shift);
+ round_shift_4x4(&in[4], shift);
+ round_shift_4x4(&in[8], shift);
+ round_shift_4x4(&in[12], shift);
+}
+
+static __m128i get_recon_8x8(const __m128i pred, __m128i res_lo, __m128i res_hi,
+ int fliplr, int bd) {
+ __m128i x0, x1;
+ const __m128i zero = _mm_setzero_si128();
+
+ x0 = _mm_unpacklo_epi16(pred, zero);
+ x1 = _mm_unpackhi_epi16(pred, zero);
+
+ if (fliplr) {
+ res_lo = _mm_shuffle_epi32(res_lo, 0x1B);
+ res_hi = _mm_shuffle_epi32(res_hi, 0x1B);
+ x0 = _mm_add_epi32(res_hi, x0);
+ x1 = _mm_add_epi32(res_lo, x1);
+
+ } else {
+ x0 = _mm_add_epi32(res_lo, x0);
+ x1 = _mm_add_epi32(res_hi, x1);
+ }
+
+ x0 = _mm_packus_epi32(x0, x1);
+ return highbd_clamp_epi16(x0, bd);
+}
+
+static void write_buffer_8x8(__m128i *in, uint16_t *output, int stride,
+ int fliplr, int flipud, int shift, int bd) {
+ __m128i u0, u1, u2, u3, u4, u5, u6, u7;
+ __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+
+ round_shift_8x8(in, shift);
+
+ v0 = _mm_load_si128((__m128i const *)(output + 0 * stride));
+ v1 = _mm_load_si128((__m128i const *)(output + 1 * stride));
+ v2 = _mm_load_si128((__m128i const *)(output + 2 * stride));
+ v3 = _mm_load_si128((__m128i const *)(output + 3 * stride));
+ v4 = _mm_load_si128((__m128i const *)(output + 4 * stride));
+ v5 = _mm_load_si128((__m128i const *)(output + 5 * stride));
+ v6 = _mm_load_si128((__m128i const *)(output + 6 * stride));
+ v7 = _mm_load_si128((__m128i const *)(output + 7 * stride));
+
+ if (flipud) {
+ u0 = get_recon_8x8(v0, in[14], in[15], fliplr, bd);
+ u1 = get_recon_8x8(v1, in[12], in[13], fliplr, bd);
+ u2 = get_recon_8x8(v2, in[10], in[11], fliplr, bd);
+ u3 = get_recon_8x8(v3, in[8], in[9], fliplr, bd);
+ u4 = get_recon_8x8(v4, in[6], in[7], fliplr, bd);
+ u5 = get_recon_8x8(v5, in[4], in[5], fliplr, bd);
+ u6 = get_recon_8x8(v6, in[2], in[3], fliplr, bd);
+ u7 = get_recon_8x8(v7, in[0], in[1], fliplr, bd);
+ } else {
+ u0 = get_recon_8x8(v0, in[0], in[1], fliplr, bd);
+ u1 = get_recon_8x8(v1, in[2], in[3], fliplr, bd);
+ u2 = get_recon_8x8(v2, in[4], in[5], fliplr, bd);
+ u3 = get_recon_8x8(v3, in[6], in[7], fliplr, bd);
+ u4 = get_recon_8x8(v4, in[8], in[9], fliplr, bd);
+ u5 = get_recon_8x8(v5, in[10], in[11], fliplr, bd);
+ u6 = get_recon_8x8(v6, in[12], in[13], fliplr, bd);
+ u7 = get_recon_8x8(v7, in[14], in[15], fliplr, bd);
+ }
+
+ _mm_store_si128((__m128i *)(output + 0 * stride), u0);
+ _mm_store_si128((__m128i *)(output + 1 * stride), u1);
+ _mm_store_si128((__m128i *)(output + 2 * stride), u2);
+ _mm_store_si128((__m128i *)(output + 3 * stride), u3);
+ _mm_store_si128((__m128i *)(output + 4 * stride), u4);
+ _mm_store_si128((__m128i *)(output + 5 * stride), u5);
+ _mm_store_si128((__m128i *)(output + 6 * stride), u6);
+ _mm_store_si128((__m128i *)(output + 7 * stride), u7);
+}
+
+void vp10_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
+ int stride, int tx_type, int bd) {
+ __m128i in[16], out[16];
+ const TXFM_2D_CFG *cfg = NULL;
+
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &inv_txfm_2d_cfg_dct_dct_8;
+ load_buffer_8x8(coeff, in);
+ transpose_8x8(in, out);
+ idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+ transpose_8x8(in, out);
+ idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
+ break;
+ case DCT_ADST:
+ cfg = &inv_txfm_2d_cfg_dct_adst_8;
+ load_buffer_8x8(coeff, in);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+ transpose_8x8(in, out);
+ idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
+ break;
+ case ADST_DCT:
+ cfg = &inv_txfm_2d_cfg_adst_dct_8;
+ load_buffer_8x8(coeff, in);
+ transpose_8x8(in, out);
+ idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
+ break;
+ case ADST_ADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_8;
+ load_buffer_8x8(coeff, in);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_8x8(in, output, stride, 0, 0, -cfg->shift[1], bd);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ cfg = &inv_txfm_2d_cfg_adst_dct_8;
+ load_buffer_8x8(coeff, in);
+ transpose_8x8(in, out);
+ idct8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_8x8(in, output, stride, 0, 1, -cfg->shift[1], bd);
+ break;
+ case DCT_FLIPADST:
+ cfg = &inv_txfm_2d_cfg_dct_adst_8;
+ load_buffer_8x8(coeff, in);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+ transpose_8x8(in, out);
+ idct8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_8x8(in, output, stride, 1, 0, -cfg->shift[1], bd);
+ break;
+ case ADST_FLIPADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_8;
+ load_buffer_8x8(coeff, in);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_8x8(in, output, stride, 1, 0, -cfg->shift[1], bd);
+ break;
+ case FLIPADST_FLIPADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_8;
+ load_buffer_8x8(coeff, in);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_8x8(in, output, stride, 1, 1, -cfg->shift[1], bd);
+ break;
+ case FLIPADST_ADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_8;
+ load_buffer_8x8(coeff, in);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_row[2]);
+ transpose_8x8(in, out);
+ iadst8x8_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_8x8(in, output, stride, 0, 1, -cfg->shift[1], bd);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0);
+ }
+}
+
+// 16x16
+static void load_buffer_16x16(const int32_t *coeff, __m128i *in) {
+ int i;
+ for (i = 0; i < 64; ++i) {
+ in[i] = _mm_load_si128((const __m128i *)(coeff + (i << 2)));
+ }
+}
+
+static void assign_8x8_input_from_16x16(const __m128i *in, __m128i *in8x8,
+ int col) {
+ int i;
+ for (i = 0; i < 16; i += 2) {
+ in8x8[i] = in[col];
+ in8x8[i + 1] = in[col + 1];
+ col += 4;
+ }
+}
+
+static void swap_addr(uint16_t **output1, uint16_t **output2) {
+ uint16_t *tmp;
+ tmp = *output1;
+ *output1 = *output2;
+ *output2 = tmp;
+}
+
+static void write_buffer_16x16(__m128i *in, uint16_t *output, int stride,
+ int fliplr, int flipud, int shift, int bd) {
+ __m128i in8x8[16];
+ uint16_t *leftUp = &output[0];
+ uint16_t *rightUp = &output[8];
+ uint16_t *leftDown = &output[8 * stride];
+ uint16_t *rightDown = &output[8 * stride + 8];
+
+ if (fliplr) {
+ swap_addr(&leftUp, &rightUp);
+ swap_addr(&leftDown, &rightDown);
+ }
+
+ if (flipud) {
+ swap_addr(&leftUp, &leftDown);
+ swap_addr(&rightUp, &rightDown);
+ }
+
+ // Left-up quarter
+ assign_8x8_input_from_16x16(in, in8x8, 0);
+ write_buffer_8x8(in8x8, leftUp, stride, fliplr, flipud, shift, bd);
+
+ // Right-up quarter
+ assign_8x8_input_from_16x16(in, in8x8, 2);
+ write_buffer_8x8(in8x8, rightUp, stride, fliplr, flipud, shift, bd);
+
+ // Left-down quarter
+ assign_8x8_input_from_16x16(in, in8x8, 32);
+ write_buffer_8x8(in8x8, leftDown, stride, fliplr, flipud, shift, bd);
+
+ // Right-down quarter
+ assign_8x8_input_from_16x16(in, in8x8, 34);
+ write_buffer_8x8(in8x8, rightDown, stride, fliplr, flipud, shift, bd);
+}
+
+static void idct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
+ const __m128i cospim4 = _mm_set1_epi32(-cospi[4]);
+ const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
+ const __m128i cospim36 = _mm_set1_epi32(-cospi[36]);
+ const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
+ const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
+ const __m128i cospim20 = _mm_set1_epi32(-cospi[20]);
+ const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
+ const __m128i cospim52 = _mm_set1_epi32(-cospi[52]);
+ const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
+ const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
+ const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+ const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ __m128i u[16], v[16], x, y;
+ int col;
+
+ for (col = 0; col < 4; ++col) {
+ // stage 0
+ // stage 1
+ u[0] = in[0 * 4 + col];
+ u[1] = in[8 * 4 + col];
+ u[2] = in[4 * 4 + col];
+ u[3] = in[12 * 4 + col];
+ u[4] = in[2 * 4 + col];
+ u[5] = in[10 * 4 + col];
+ u[6] = in[6 * 4 + col];
+ u[7] = in[14 * 4 + col];
+ u[8] = in[1 * 4 + col];
+ u[9] = in[9 * 4 + col];
+ u[10] = in[5 * 4 + col];
+ u[11] = in[13 * 4 + col];
+ u[12] = in[3 * 4 + col];
+ u[13] = in[11 * 4 + col];
+ u[14] = in[7 * 4 + col];
+ u[15] = in[15 * 4 + col];
+
+ // stage 2
+ v[0] = u[0];
+ v[1] = u[1];
+ v[2] = u[2];
+ v[3] = u[3];
+ v[4] = u[4];
+ v[5] = u[5];
+ v[6] = u[6];
+ v[7] = u[7];
+
+ v[8] = half_btf_sse4_1(cospi60, u[8], cospim4, u[15], rnding, bit);
+ v[9] = half_btf_sse4_1(cospi28, u[9], cospim36, u[14], rnding, bit);
+ v[10] = half_btf_sse4_1(cospi44, u[10], cospim20, u[13], rnding, bit);
+ v[11] = half_btf_sse4_1(cospi12, u[11], cospim52, u[12], rnding, bit);
+ v[12] = half_btf_sse4_1(cospi52, u[11], cospi12, u[12], rnding, bit);
+ v[13] = half_btf_sse4_1(cospi20, u[10], cospi44, u[13], rnding, bit);
+ v[14] = half_btf_sse4_1(cospi36, u[9], cospi28, u[14], rnding, bit);
+ v[15] = half_btf_sse4_1(cospi4, u[8], cospi60, u[15], rnding, bit);
+
+ // stage 3
+ u[0] = v[0];
+ u[1] = v[1];
+ u[2] = v[2];
+ u[3] = v[3];
+ u[4] = half_btf_sse4_1(cospi56, v[4], cospim8, v[7], rnding, bit);
+ u[5] = half_btf_sse4_1(cospi24, v[5], cospim40, v[6], rnding, bit);
+ u[6] = half_btf_sse4_1(cospi40, v[5], cospi24, v[6], rnding, bit);
+ u[7] = half_btf_sse4_1(cospi8, v[4], cospi56, v[7], rnding, bit);
+ u[8] = _mm_add_epi32(v[8], v[9]);
+ u[9] = _mm_sub_epi32(v[8], v[9]);
+ u[10] = _mm_sub_epi32(v[11], v[10]);
+ u[11] = _mm_add_epi32(v[10], v[11]);
+ u[12] = _mm_add_epi32(v[12], v[13]);
+ u[13] = _mm_sub_epi32(v[12], v[13]);
+ u[14] = _mm_sub_epi32(v[15], v[14]);
+ u[15] = _mm_add_epi32(v[14], v[15]);
+
+ // stage 4
+ x = _mm_mullo_epi32(u[0], cospi32);
+ y = _mm_mullo_epi32(u[1], cospi32);
+ v[0] = _mm_add_epi32(x, y);
+ v[0] = _mm_add_epi32(v[0], rnding);
+ v[0] = _mm_srai_epi32(v[0], bit);
+
+ v[1] = _mm_sub_epi32(x, y);
+ v[1] = _mm_add_epi32(v[1], rnding);
+ v[1] = _mm_srai_epi32(v[1], bit);
+
+ v[2] = half_btf_sse4_1(cospi48, u[2], cospim16, u[3], rnding, bit);
+ v[3] = half_btf_sse4_1(cospi16, u[2], cospi48, u[3], rnding, bit);
+ v[4] = _mm_add_epi32(u[4], u[5]);
+ v[5] = _mm_sub_epi32(u[4], u[5]);
+ v[6] = _mm_sub_epi32(u[7], u[6]);
+ v[7] = _mm_add_epi32(u[6], u[7]);
+ v[8] = u[8];
+ v[9] = half_btf_sse4_1(cospim16, u[9], cospi48, u[14], rnding, bit);
+ v[10] = half_btf_sse4_1(cospim48, u[10], cospim16, u[13], rnding, bit);
+ v[11] = u[11];
+ v[12] = u[12];
+ v[13] = half_btf_sse4_1(cospim16, u[10], cospi48, u[13], rnding, bit);
+ v[14] = half_btf_sse4_1(cospi48, u[9], cospi16, u[14], rnding, bit);
+ v[15] = u[15];
+
+ // stage 5
+ u[0] = _mm_add_epi32(v[0], v[3]);
+ u[1] = _mm_add_epi32(v[1], v[2]);
+ u[2] = _mm_sub_epi32(v[1], v[2]);
+ u[3] = _mm_sub_epi32(v[0], v[3]);
+ u[4] = v[4];
+
+ x = _mm_mullo_epi32(v[5], cospi32);
+ y = _mm_mullo_epi32(v[6], cospi32);
+ u[5] = _mm_sub_epi32(y, x);
+ u[5] = _mm_add_epi32(u[5], rnding);
+ u[5] = _mm_srai_epi32(u[5], bit);
+
+ u[6] = _mm_add_epi32(y, x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = v[7];
+ u[8] = _mm_add_epi32(v[8], v[11]);
+ u[9] = _mm_add_epi32(v[9], v[10]);
+ u[10] = _mm_sub_epi32(v[9], v[10]);
+ u[11] = _mm_sub_epi32(v[8], v[11]);
+ u[12] = _mm_sub_epi32(v[15], v[12]);
+ u[13] = _mm_sub_epi32(v[14], v[13]);
+ u[14] = _mm_add_epi32(v[13], v[14]);
+ u[15] = _mm_add_epi32(v[12], v[15]);
+
+ // stage 6
+ v[0] = _mm_add_epi32(u[0], u[7]);
+ v[1] = _mm_add_epi32(u[1], u[6]);
+ v[2] = _mm_add_epi32(u[2], u[5]);
+ v[3] = _mm_add_epi32(u[3], u[4]);
+ v[4] = _mm_sub_epi32(u[3], u[4]);
+ v[5] = _mm_sub_epi32(u[2], u[5]);
+ v[6] = _mm_sub_epi32(u[1], u[6]);
+ v[7] = _mm_sub_epi32(u[0], u[7]);
+ v[8] = u[8];
+ v[9] = u[9];
+
+ x = _mm_mullo_epi32(u[10], cospi32);
+ y = _mm_mullo_epi32(u[13], cospi32);
+ v[10] = _mm_sub_epi32(y, x);
+ v[10] = _mm_add_epi32(v[10], rnding);
+ v[10] = _mm_srai_epi32(v[10], bit);
+
+ v[13] = _mm_add_epi32(x, y);
+ v[13] = _mm_add_epi32(v[13], rnding);
+ v[13] = _mm_srai_epi32(v[13], bit);
+
+ x = _mm_mullo_epi32(u[11], cospi32);
+ y = _mm_mullo_epi32(u[12], cospi32);
+ v[11] = _mm_sub_epi32(y, x);
+ v[11] = _mm_add_epi32(v[11], rnding);
+ v[11] = _mm_srai_epi32(v[11], bit);
+
+ v[12] = _mm_add_epi32(x, y);
+ v[12] = _mm_add_epi32(v[12], rnding);
+ v[12] = _mm_srai_epi32(v[12], bit);
+
+ v[14] = u[14];
+ v[15] = u[15];
+
+ // stage 7
+ out[0 * 4 + col] = _mm_add_epi32(v[0], v[15]);
+ out[1 * 4 + col] = _mm_add_epi32(v[1], v[14]);
+ out[2 * 4 + col] = _mm_add_epi32(v[2], v[13]);
+ out[3 * 4 + col] = _mm_add_epi32(v[3], v[12]);
+ out[4 * 4 + col] = _mm_add_epi32(v[4], v[11]);
+ out[5 * 4 + col] = _mm_add_epi32(v[5], v[10]);
+ out[6 * 4 + col] = _mm_add_epi32(v[6], v[9]);
+ out[7 * 4 + col] = _mm_add_epi32(v[7], v[8]);
+ out[8 * 4 + col] = _mm_sub_epi32(v[7], v[8]);
+ out[9 * 4 + col] = _mm_sub_epi32(v[6], v[9]);
+ out[10 * 4 + col] = _mm_sub_epi32(v[5], v[10]);
+ out[11 * 4 + col] = _mm_sub_epi32(v[4], v[11]);
+ out[12 * 4 + col] = _mm_sub_epi32(v[3], v[12]);
+ out[13 * 4 + col] = _mm_sub_epi32(v[2], v[13]);
+ out[14 * 4 + col] = _mm_sub_epi32(v[1], v[14]);
+ out[15 * 4 + col] = _mm_sub_epi32(v[0], v[15]);
+ }
+}
+
+static void iadst16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+ const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
+ const __m128i cospim8 = _mm_set1_epi32(-cospi[8]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
+ const __m128i cospim40 = _mm_set1_epi32(-cospi[40]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
+ const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
+ const __m128i cospim2 = _mm_set1_epi32(-cospi[2]);
+ const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
+ const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
+ const __m128i cospim10 = _mm_set1_epi32(-cospi[10]);
+ const __m128i cospi18 = _mm_set1_epi32(cospi[18]);
+ const __m128i cospi46 = _mm_set1_epi32(cospi[46]);
+ const __m128i cospim18 = _mm_set1_epi32(-cospi[18]);
+ const __m128i cospi26 = _mm_set1_epi32(cospi[26]);
+ const __m128i cospi38 = _mm_set1_epi32(cospi[38]);
+ const __m128i cospim26 = _mm_set1_epi32(-cospi[26]);
+ const __m128i cospi34 = _mm_set1_epi32(cospi[34]);
+ const __m128i cospi30 = _mm_set1_epi32(cospi[30]);
+ const __m128i cospim34 = _mm_set1_epi32(-cospi[34]);
+ const __m128i cospi42 = _mm_set1_epi32(cospi[42]);
+ const __m128i cospi22 = _mm_set1_epi32(cospi[22]);
+ const __m128i cospim42 = _mm_set1_epi32(-cospi[42]);
+ const __m128i cospi50 = _mm_set1_epi32(cospi[50]);
+ const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
+ const __m128i cospim50 = _mm_set1_epi32(-cospi[50]);
+ const __m128i cospi58 = _mm_set1_epi32(cospi[58]);
+ const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
+ const __m128i cospim58 = _mm_set1_epi32(-cospi[58]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ const __m128i zero = _mm_setzero_si128();
+
+ __m128i u[16], v[16], x, y;
+ int col;
+
+ for (col = 0; col < 4; ++col) {
+ // stage 0
+ // stage 1
+ u[0] = in[0 * 4 + col];
+ u[1] = _mm_sub_epi32(zero, in[15 * 4 + col]);
+ u[2] = _mm_sub_epi32(zero, in[7 * 4 + col]);
+ u[3] = in[8 * 4 + col];
+ u[4] = _mm_sub_epi32(zero, in[3 * 4 + col]);
+ u[5] = in[12 * 4 + col];
+ u[6] = in[4 * 4 + col];
+ u[7] = _mm_sub_epi32(zero, in[11 * 4 + col]);
+ u[8] = _mm_sub_epi32(zero, in[1 * 4 + col]);
+ u[9] = in[14 * 4 + col];
+ u[10] = in[6 * 4 + col];
+ u[11] = _mm_sub_epi32(zero, in[9 * 4 + col]);
+ u[12] = in[2 * 4 + col];
+ u[13] = _mm_sub_epi32(zero, in[13 * 4 + col]);
+ u[14] = _mm_sub_epi32(zero, in[5 * 4 + col]);
+ u[15] = in[10 * 4 + col];
+
+ // stage 2
+ v[0] = u[0];
+ v[1] = u[1];
+
+ x = _mm_mullo_epi32(u[2], cospi32);
+ y = _mm_mullo_epi32(u[3], cospi32);
+ v[2] = _mm_add_epi32(x, y);
+ v[2] = _mm_add_epi32(v[2], rnding);
+ v[2] = _mm_srai_epi32(v[2], bit);
+
+ v[3] = _mm_sub_epi32(x, y);
+ v[3] = _mm_add_epi32(v[3], rnding);
+ v[3] = _mm_srai_epi32(v[3], bit);
+
+ v[4] = u[4];
+ v[5] = u[5];
+
+ x = _mm_mullo_epi32(u[6], cospi32);
+ y = _mm_mullo_epi32(u[7], cospi32);
+ v[6] = _mm_add_epi32(x, y);
+ v[6] = _mm_add_epi32(v[6], rnding);
+ v[6] = _mm_srai_epi32(v[6], bit);
+
+ v[7] = _mm_sub_epi32(x, y);
+ v[7] = _mm_add_epi32(v[7], rnding);
+ v[7] = _mm_srai_epi32(v[7], bit);
+
+ v[8] = u[8];
+ v[9] = u[9];
+
+ x = _mm_mullo_epi32(u[10], cospi32);
+ y = _mm_mullo_epi32(u[11], cospi32);
+ v[10] = _mm_add_epi32(x, y);
+ v[10] = _mm_add_epi32(v[10], rnding);
+ v[10] = _mm_srai_epi32(v[10], bit);
+
+ v[11] = _mm_sub_epi32(x, y);
+ v[11] = _mm_add_epi32(v[11], rnding);
+ v[11] = _mm_srai_epi32(v[11], bit);
+
+ v[12] = u[12];
+ v[13] = u[13];
+
+ x = _mm_mullo_epi32(u[14], cospi32);
+ y = _mm_mullo_epi32(u[15], cospi32);
+ v[14] = _mm_add_epi32(x, y);
+ v[14] = _mm_add_epi32(v[14], rnding);
+ v[14] = _mm_srai_epi32(v[14], bit);
+
+ v[15] = _mm_sub_epi32(x, y);
+ v[15] = _mm_add_epi32(v[15], rnding);
+ v[15] = _mm_srai_epi32(v[15], bit);
+
+ // stage 3
+ u[0] = _mm_add_epi32(v[0], v[2]);
+ u[1] = _mm_add_epi32(v[1], v[3]);
+ u[2] = _mm_sub_epi32(v[0], v[2]);
+ u[3] = _mm_sub_epi32(v[1], v[3]);
+ u[4] = _mm_add_epi32(v[4], v[6]);
+ u[5] = _mm_add_epi32(v[5], v[7]);
+ u[6] = _mm_sub_epi32(v[4], v[6]);
+ u[7] = _mm_sub_epi32(v[5], v[7]);
+ u[8] = _mm_add_epi32(v[8], v[10]);
+ u[9] = _mm_add_epi32(v[9], v[11]);
+ u[10] = _mm_sub_epi32(v[8], v[10]);
+ u[11] = _mm_sub_epi32(v[9], v[11]);
+ u[12] = _mm_add_epi32(v[12], v[14]);
+ u[13] = _mm_add_epi32(v[13], v[15]);
+ u[14] = _mm_sub_epi32(v[12], v[14]);
+ u[15] = _mm_sub_epi32(v[13], v[15]);
+
+ // stage 4
+ v[0] = u[0];
+ v[1] = u[1];
+ v[2] = u[2];
+ v[3] = u[3];
+ v[4] = half_btf_sse4_1(cospi16, u[4], cospi48, u[5], rnding, bit);
+ v[5] = half_btf_sse4_1(cospi48, u[4], cospim16, u[5], rnding, bit);
+ v[6] = half_btf_sse4_1(cospim48, u[6], cospi16, u[7], rnding, bit);
+ v[7] = half_btf_sse4_1(cospi16, u[6], cospi48, u[7], rnding, bit);
+ v[8] = u[8];
+ v[9] = u[9];
+ v[10] = u[10];
+ v[11] = u[11];
+ v[12] = half_btf_sse4_1(cospi16, u[12], cospi48, u[13], rnding, bit);
+ v[13] = half_btf_sse4_1(cospi48, u[12], cospim16, u[13], rnding, bit);
+ v[14] = half_btf_sse4_1(cospim48, u[14], cospi16, u[15], rnding, bit);
+ v[15] = half_btf_sse4_1(cospi16, u[14], cospi48, u[15], rnding, bit);
+
+ // stage 5
+ u[0] = _mm_add_epi32(v[0], v[4]);
+ u[1] = _mm_add_epi32(v[1], v[5]);
+ u[2] = _mm_add_epi32(v[2], v[6]);
+ u[3] = _mm_add_epi32(v[3], v[7]);
+ u[4] = _mm_sub_epi32(v[0], v[4]);
+ u[5] = _mm_sub_epi32(v[1], v[5]);
+ u[6] = _mm_sub_epi32(v[2], v[6]);
+ u[7] = _mm_sub_epi32(v[3], v[7]);
+ u[8] = _mm_add_epi32(v[8], v[12]);
+ u[9] = _mm_add_epi32(v[9], v[13]);
+ u[10] = _mm_add_epi32(v[10], v[14]);
+ u[11] = _mm_add_epi32(v[11], v[15]);
+ u[12] = _mm_sub_epi32(v[8], v[12]);
+ u[13] = _mm_sub_epi32(v[9], v[13]);
+ u[14] = _mm_sub_epi32(v[10], v[14]);
+ u[15] = _mm_sub_epi32(v[11], v[15]);
+
+ // stage 6
+ v[0] = u[0];
+ v[1] = u[1];
+ v[2] = u[2];
+ v[3] = u[3];
+ v[4] = u[4];
+ v[5] = u[5];
+ v[6] = u[6];
+ v[7] = u[7];
+ v[8] = half_btf_sse4_1(cospi8, u[8], cospi56, u[9], rnding, bit);
+ v[9] = half_btf_sse4_1(cospi56, u[8], cospim8, u[9], rnding, bit);
+ v[10] = half_btf_sse4_1(cospi40, u[10], cospi24, u[11], rnding, bit);
+ v[11] = half_btf_sse4_1(cospi24, u[10], cospim40, u[11], rnding, bit);
+ v[12] = half_btf_sse4_1(cospim56, u[12], cospi8, u[13], rnding, bit);
+ v[13] = half_btf_sse4_1(cospi8, u[12], cospi56, u[13], rnding, bit);
+ v[14] = half_btf_sse4_1(cospim24, u[14], cospi40, u[15], rnding, bit);
+ v[15] = half_btf_sse4_1(cospi40, u[14], cospi24, u[15], rnding, bit);
+
+ // stage 7
+ u[0] = _mm_add_epi32(v[0], v[8]);
+ u[1] = _mm_add_epi32(v[1], v[9]);
+ u[2] = _mm_add_epi32(v[2], v[10]);
+ u[3] = _mm_add_epi32(v[3], v[11]);
+ u[4] = _mm_add_epi32(v[4], v[12]);
+ u[5] = _mm_add_epi32(v[5], v[13]);
+ u[6] = _mm_add_epi32(v[6], v[14]);
+ u[7] = _mm_add_epi32(v[7], v[15]);
+ u[8] = _mm_sub_epi32(v[0], v[8]);
+ u[9] = _mm_sub_epi32(v[1], v[9]);
+ u[10] = _mm_sub_epi32(v[2], v[10]);
+ u[11] = _mm_sub_epi32(v[3], v[11]);
+ u[12] = _mm_sub_epi32(v[4], v[12]);
+ u[13] = _mm_sub_epi32(v[5], v[13]);
+ u[14] = _mm_sub_epi32(v[6], v[14]);
+ u[15] = _mm_sub_epi32(v[7], v[15]);
+
+ // stage 8
+ v[0] = half_btf_sse4_1(cospi2, u[0], cospi62, u[1], rnding, bit);
+ v[1] = half_btf_sse4_1(cospi62, u[0], cospim2, u[1], rnding, bit);
+ v[2] = half_btf_sse4_1(cospi10, u[2], cospi54, u[3], rnding, bit);
+ v[3] = half_btf_sse4_1(cospi54, u[2], cospim10, u[3], rnding, bit);
+ v[4] = half_btf_sse4_1(cospi18, u[4], cospi46, u[5], rnding, bit);
+ v[5] = half_btf_sse4_1(cospi46, u[4], cospim18, u[5], rnding, bit);
+ v[6] = half_btf_sse4_1(cospi26, u[6], cospi38, u[7], rnding, bit);
+ v[7] = half_btf_sse4_1(cospi38, u[6], cospim26, u[7], rnding, bit);
+ v[8] = half_btf_sse4_1(cospi34, u[8], cospi30, u[9], rnding, bit);
+ v[9] = half_btf_sse4_1(cospi30, u[8], cospim34, u[9], rnding, bit);
+ v[10] = half_btf_sse4_1(cospi42, u[10], cospi22, u[11], rnding, bit);
+ v[11] = half_btf_sse4_1(cospi22, u[10], cospim42, u[11], rnding, bit);
+ v[12] = half_btf_sse4_1(cospi50, u[12], cospi14, u[13], rnding, bit);
+ v[13] = half_btf_sse4_1(cospi14, u[12], cospim50, u[13], rnding, bit);
+ v[14] = half_btf_sse4_1(cospi58, u[14], cospi6, u[15], rnding, bit);
+ v[15] = half_btf_sse4_1(cospi6, u[14], cospim58, u[15], rnding, bit);
+
+ // stage 9
+ out[0 * 4 + col] = v[1];
+ out[1 * 4 + col] = v[14];
+ out[2 * 4 + col] = v[3];
+ out[3 * 4 + col] = v[12];
+ out[4 * 4 + col] = v[5];
+ out[5 * 4 + col] = v[10];
+ out[6 * 4 + col] = v[7];
+ out[7 * 4 + col] = v[8];
+ out[8 * 4 + col] = v[9];
+ out[9 * 4 + col] = v[6];
+ out[10 * 4 + col] = v[11];
+ out[11 * 4 + col] = v[4];
+ out[12 * 4 + col] = v[13];
+ out[13 * 4 + col] = v[2];
+ out[14 * 4 + col] = v[15];
+ out[15 * 4 + col] = v[0];
+ }
+}
+
+static void round_shift_16x16(__m128i *in, int shift) {
+ round_shift_8x8(&in[0], shift);
+ round_shift_8x8(&in[16], shift);
+ round_shift_8x8(&in[32], shift);
+ round_shift_8x8(&in[48], shift);
+}
+
+void vp10_inv_txfm2d_add_16x16_sse4_1(const int32_t *coeff, uint16_t *output,
+ int stride, int tx_type, int bd) {
+ __m128i in[64], out[64];
+ const TXFM_2D_CFG *cfg = NULL;
+
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &inv_txfm_2d_cfg_dct_dct_16;
+ load_buffer_16x16(coeff, in);
+ transpose_16x16(in, out);
+ idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+ round_shift_16x16(in, -cfg->shift[0]);
+ transpose_16x16(in, out);
+ idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
+ break;
+ case DCT_ADST:
+ cfg = &inv_txfm_2d_cfg_dct_adst_16;
+ load_buffer_16x16(coeff, in);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+ round_shift_16x16(in, -cfg->shift[0]);
+ transpose_16x16(in, out);
+ idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
+ break;
+ case ADST_DCT:
+ cfg = &inv_txfm_2d_cfg_adst_dct_16;
+ load_buffer_16x16(coeff, in);
+ transpose_16x16(in, out);
+ idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+ round_shift_16x16(in, -cfg->shift[0]);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
+ break;
+ case ADST_ADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_16;
+ load_buffer_16x16(coeff, in);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+ round_shift_16x16(in, -cfg->shift[0]);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_16x16(in, output, stride, 0, 0, -cfg->shift[1], bd);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ cfg = &inv_txfm_2d_cfg_adst_dct_16;
+ load_buffer_16x16(coeff, in);
+ transpose_16x16(in, out);
+ idct16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+ round_shift_16x16(in, -cfg->shift[0]);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_16x16(in, output, stride, 0, 1, -cfg->shift[1], bd);
+ break;
+ case DCT_FLIPADST:
+ cfg = &inv_txfm_2d_cfg_dct_adst_16;
+ load_buffer_16x16(coeff, in);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+ round_shift_16x16(in, -cfg->shift[0]);
+ transpose_16x16(in, out);
+ idct16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_16x16(in, output, stride, 1, 0, -cfg->shift[1], bd);
+ break;
+ case ADST_FLIPADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_16;
+ load_buffer_16x16(coeff, in);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+ round_shift_16x16(in, -cfg->shift[0]);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_16x16(in, output, stride, 1, 0, -cfg->shift[1], bd);
+ break;
+ case FLIPADST_FLIPADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_16;
+ load_buffer_16x16(coeff, in);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+ round_shift_16x16(in, -cfg->shift[0]);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_16x16(in, output, stride, 1, 1, -cfg->shift[1], bd);
+ break;
+ case FLIPADST_ADST:
+ cfg = &inv_txfm_2d_cfg_adst_adst_16;
+ load_buffer_16x16(coeff, in);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_row[2]);
+ round_shift_16x16(in, -cfg->shift[0]);
+ transpose_16x16(in, out);
+ iadst16x16_sse4_1(out, in, cfg->cos_bit_col[2]);
+ write_buffer_16x16(in, output, stride, 0, 1, -cfg->shift[1], bd);
+ break;
+#endif
+ default: assert(0);
+ }
+}
diff --git a/av1/common/x86/highbd_txfm_utility_sse4.h b/av1/common/x86/highbd_txfm_utility_sse4.h
new file mode 100644
index 0000000..f1e298d
--- /dev/null
+++ b/av1/common/x86/highbd_txfm_utility_sse4.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef _HIGHBD_TXFM_UTILITY_SSE4_H
+#define _HIGHBD_TXFM_UTILITY_SSE4_H
+
+#include <smmintrin.h> /* SSE4.1 */
+
+#define TRANSPOSE_4X4(x0, x1, x2, x3, y0, y1, y2, y3) \
+ do { \
+ __m128i u0, u1, u2, u3; \
+ u0 = _mm_unpacklo_epi32(x0, x1); \
+ u1 = _mm_unpackhi_epi32(x0, x1); \
+ u2 = _mm_unpacklo_epi32(x2, x3); \
+ u3 = _mm_unpackhi_epi32(x2, x3); \
+ y0 = _mm_unpacklo_epi64(u0, u2); \
+ y1 = _mm_unpackhi_epi64(u0, u2); \
+ y2 = _mm_unpacklo_epi64(u1, u3); \
+ y3 = _mm_unpackhi_epi64(u1, u3); \
+ } while (0)
+
+static INLINE void transpose_8x8(const __m128i *in, __m128i *out) {
+ TRANSPOSE_4X4(in[0], in[2], in[4], in[6], out[0], out[2], out[4], out[6]);
+ TRANSPOSE_4X4(in[1], in[3], in[5], in[7], out[8], out[10], out[12], out[14]);
+ TRANSPOSE_4X4(in[8], in[10], in[12], in[14], out[1], out[3], out[5], out[7]);
+ TRANSPOSE_4X4(in[9], in[11], in[13], in[15], out[9], out[11], out[13],
+ out[15]);
+}
+
+static INLINE void transpose_16x16(const __m128i *in, __m128i *out) {
+ // Upper left 8x8
+ TRANSPOSE_4X4(in[0], in[4], in[8], in[12], out[0], out[4], out[8], out[12]);
+ TRANSPOSE_4X4(in[1], in[5], in[9], in[13], out[16], out[20], out[24],
+ out[28]);
+ TRANSPOSE_4X4(in[16], in[20], in[24], in[28], out[1], out[5], out[9],
+ out[13]);
+ TRANSPOSE_4X4(in[17], in[21], in[25], in[29], out[17], out[21], out[25],
+ out[29]);
+
+ // Upper right 8x8
+ TRANSPOSE_4X4(in[2], in[6], in[10], in[14], out[32], out[36], out[40],
+ out[44]);
+ TRANSPOSE_4X4(in[3], in[7], in[11], in[15], out[48], out[52], out[56],
+ out[60]);
+ TRANSPOSE_4X4(in[18], in[22], in[26], in[30], out[33], out[37], out[41],
+ out[45]);
+ TRANSPOSE_4X4(in[19], in[23], in[27], in[31], out[49], out[53], out[57],
+ out[61]);
+
+ // Lower left 8x8
+ TRANSPOSE_4X4(in[32], in[36], in[40], in[44], out[2], out[6], out[10],
+ out[14]);
+ TRANSPOSE_4X4(in[33], in[37], in[41], in[45], out[18], out[22], out[26],
+ out[30]);
+ TRANSPOSE_4X4(in[48], in[52], in[56], in[60], out[3], out[7], out[11],
+ out[15]);
+ TRANSPOSE_4X4(in[49], in[53], in[57], in[61], out[19], out[23], out[27],
+ out[31]);
+ // Lower right 8x8
+ TRANSPOSE_4X4(in[34], in[38], in[42], in[46], out[34], out[38], out[42],
+ out[46]);
+ TRANSPOSE_4X4(in[35], in[39], in[43], in[47], out[50], out[54], out[58],
+ out[62]);
+ TRANSPOSE_4X4(in[50], in[54], in[58], in[62], out[35], out[39], out[43],
+ out[47]);
+ TRANSPOSE_4X4(in[51], in[55], in[59], in[63], out[51], out[55], out[59],
+ out[63]);
+}
+
+// Note:
+// rounding = 1 << (bit - 1)
+static INLINE __m128i half_btf_sse4_1(__m128i w0, __m128i n0, __m128i w1,
+ __m128i n1, __m128i rounding, int bit) {
+ __m128i x, y;
+
+ x = _mm_mullo_epi32(w0, n0);
+ y = _mm_mullo_epi32(w1, n1);
+ x = _mm_add_epi32(x, y);
+ x = _mm_add_epi32(x, rounding);
+ x = _mm_srai_epi32(x, bit);
+ return x;
+}
+
+#endif // _HIGHBD_TXFM_UTILITY_SSE4_H
diff --git a/av1/common/x86/idct_intrin_sse2.c b/av1/common/x86/idct_intrin_sse2.c
new file mode 100644
index 0000000..70bf9bf
--- /dev/null
+++ b/av1/common/x86/idct_intrin_sse2.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "aom_dsp/x86/inv_txfm_sse2.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+#include "aom_ports/mem.h"
+#include "av1/common/enums.h"
+
+#if CONFIG_EXT_TX
+static INLINE void fliplr_4x4(__m128i in[2]) {
+ in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
+ in[0] = _mm_shufflehi_epi16(in[0], 0x1b);
+ in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
+ in[1] = _mm_shufflehi_epi16(in[1], 0x1b);
+}
+
+static INLINE void fliplr_8x8(__m128i in[8]) {
+ in[0] = mm_reverse_epi16(in[0]);
+ in[1] = mm_reverse_epi16(in[1]);
+ in[2] = mm_reverse_epi16(in[2]);
+ in[3] = mm_reverse_epi16(in[3]);
+
+ in[4] = mm_reverse_epi16(in[4]);
+ in[5] = mm_reverse_epi16(in[5]);
+ in[6] = mm_reverse_epi16(in[6]);
+ in[7] = mm_reverse_epi16(in[7]);
+}
+
+static INLINE void fliplr_16x8(__m128i in[16]) {
+ fliplr_8x8(&in[0]);
+ fliplr_8x8(&in[8]);
+}
+
+#define FLIPLR_16x16(in0, in1) \
+ do { \
+ __m128i *tmp; \
+ fliplr_16x8(in0); \
+ fliplr_16x8(in1); \
+ tmp = (in0); \
+ (in0) = (in1); \
+ (in1) = tmp; \
+ } while (0)
+
+#define FLIPUD_PTR(dest, stride, size) \
+ do { \
+ (dest) = (dest) + ((size)-1) * (stride); \
+ (stride) = -(stride); \
+ } while (0)
+#endif
+
+void vp10_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ __m128i in[2];
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i eight = _mm_set1_epi16(8);
+
+ in[0] = load_input_data(input);
+ in[1] = load_input_data(input + 8);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ idct4_sse2(in);
+ idct4_sse2(in);
+ break;
+ case ADST_DCT:
+ idct4_sse2(in);
+ iadst4_sse2(in);
+ break;
+ case DCT_ADST:
+ iadst4_sse2(in);
+ idct4_sse2(in);
+ break;
+ case ADST_ADST:
+ iadst4_sse2(in);
+ iadst4_sse2(in);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ idct4_sse2(in);
+ iadst4_sse2(in);
+ FLIPUD_PTR(dest, stride, 4);
+ break;
+ case DCT_FLIPADST:
+ iadst4_sse2(in);
+ idct4_sse2(in);
+ fliplr_4x4(in);
+ break;
+ case FLIPADST_FLIPADST:
+ iadst4_sse2(in);
+ iadst4_sse2(in);
+ FLIPUD_PTR(dest, stride, 4);
+ fliplr_4x4(in);
+ break;
+ case ADST_FLIPADST:
+ iadst4_sse2(in);
+ iadst4_sse2(in);
+ fliplr_4x4(in);
+ break;
+ case FLIPADST_ADST:
+ iadst4_sse2(in);
+ iadst4_sse2(in);
+ FLIPUD_PTR(dest, stride, 4);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+
+ // Final round and shift
+ in[0] = _mm_add_epi16(in[0], eight);
+ in[1] = _mm_add_epi16(in[1], eight);
+
+ in[0] = _mm_srai_epi16(in[0], 4);
+ in[1] = _mm_srai_epi16(in[1], 4);
+
+ // Reconstruction and Store
+ {
+ __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 0));
+ __m128i d1 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 1));
+ __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
+ __m128i d3 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 3));
+ d0 = _mm_unpacklo_epi32(d0, d1);
+ d2 = _mm_unpacklo_epi32(d2, d3);
+ d0 = _mm_unpacklo_epi8(d0, zero);
+ d2 = _mm_unpacklo_epi8(d2, zero);
+ d0 = _mm_add_epi16(d0, in[0]);
+ d2 = _mm_add_epi16(d2, in[1]);
+ d0 = _mm_packus_epi16(d0, d2);
+ // store result[0]
+ *(int *)dest = _mm_cvtsi128_si32(d0);
+ // store result[1]
+ d0 = _mm_srli_si128(d0, 4);
+ *(int *)(dest + stride) = _mm_cvtsi128_si32(d0);
+ // store result[2]
+ d0 = _mm_srli_si128(d0, 4);
+ *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0);
+ // store result[3]
+ d0 = _mm_srli_si128(d0, 4);
+ *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0);
+ }
+}
+
+void vp10_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ __m128i in[8];
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i final_rounding = _mm_set1_epi16(1 << 4);
+
+ // load input data
+ in[0] = load_input_data(input);
+ in[1] = load_input_data(input + 8 * 1);
+ in[2] = load_input_data(input + 8 * 2);
+ in[3] = load_input_data(input + 8 * 3);
+ in[4] = load_input_data(input + 8 * 4);
+ in[5] = load_input_data(input + 8 * 5);
+ in[6] = load_input_data(input + 8 * 6);
+ in[7] = load_input_data(input + 8 * 7);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ idct8_sse2(in);
+ idct8_sse2(in);
+ break;
+ case ADST_DCT:
+ idct8_sse2(in);
+ iadst8_sse2(in);
+ break;
+ case DCT_ADST:
+ iadst8_sse2(in);
+ idct8_sse2(in);
+ break;
+ case ADST_ADST:
+ iadst8_sse2(in);
+ iadst8_sse2(in);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ idct8_sse2(in);
+ iadst8_sse2(in);
+ FLIPUD_PTR(dest, stride, 8);
+ break;
+ case DCT_FLIPADST:
+ iadst8_sse2(in);
+ idct8_sse2(in);
+ fliplr_8x8(in);
+ break;
+ case FLIPADST_FLIPADST:
+ iadst8_sse2(in);
+ iadst8_sse2(in);
+ FLIPUD_PTR(dest, stride, 8);
+ fliplr_8x8(in);
+ break;
+ case ADST_FLIPADST:
+ iadst8_sse2(in);
+ iadst8_sse2(in);
+ fliplr_8x8(in);
+ break;
+ case FLIPADST_ADST:
+ iadst8_sse2(in);
+ iadst8_sse2(in);
+ FLIPUD_PTR(dest, stride, 8);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+
+ // Final rounding and shift
+ in[0] = _mm_adds_epi16(in[0], final_rounding);
+ in[1] = _mm_adds_epi16(in[1], final_rounding);
+ in[2] = _mm_adds_epi16(in[2], final_rounding);
+ in[3] = _mm_adds_epi16(in[3], final_rounding);
+ in[4] = _mm_adds_epi16(in[4], final_rounding);
+ in[5] = _mm_adds_epi16(in[5], final_rounding);
+ in[6] = _mm_adds_epi16(in[6], final_rounding);
+ in[7] = _mm_adds_epi16(in[7], final_rounding);
+
+ in[0] = _mm_srai_epi16(in[0], 5);
+ in[1] = _mm_srai_epi16(in[1], 5);
+ in[2] = _mm_srai_epi16(in[2], 5);
+ in[3] = _mm_srai_epi16(in[3], 5);
+ in[4] = _mm_srai_epi16(in[4], 5);
+ in[5] = _mm_srai_epi16(in[5], 5);
+ in[6] = _mm_srai_epi16(in[6], 5);
+ in[7] = _mm_srai_epi16(in[7], 5);
+
+ RECON_AND_STORE(dest + 0 * stride, in[0]);
+ RECON_AND_STORE(dest + 1 * stride, in[1]);
+ RECON_AND_STORE(dest + 2 * stride, in[2]);
+ RECON_AND_STORE(dest + 3 * stride, in[3]);
+ RECON_AND_STORE(dest + 4 * stride, in[4]);
+ RECON_AND_STORE(dest + 5 * stride, in[5]);
+ RECON_AND_STORE(dest + 6 * stride, in[6]);
+ RECON_AND_STORE(dest + 7 * stride, in[7]);
+}
+
+void vp10_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
+ int stride, int tx_type) {
+ __m128i in[32];
+ __m128i *in0 = &in[0];
+ __m128i *in1 = &in[16];
+
+ load_buffer_8x16(input, in0);
+ input += 8;
+ load_buffer_8x16(input, in1);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ idct16_sse2(in0, in1);
+ idct16_sse2(in0, in1);
+ break;
+ case ADST_DCT:
+ idct16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ break;
+ case DCT_ADST:
+ iadst16_sse2(in0, in1);
+ idct16_sse2(in0, in1);
+ break;
+ case ADST_ADST:
+ iadst16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ idct16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ FLIPUD_PTR(dest, stride, 16);
+ break;
+ case DCT_FLIPADST:
+ iadst16_sse2(in0, in1);
+ idct16_sse2(in0, in1);
+ FLIPLR_16x16(in0, in1);
+ break;
+ case FLIPADST_FLIPADST:
+ iadst16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ FLIPUD_PTR(dest, stride, 16);
+ FLIPLR_16x16(in0, in1);
+ break;
+ case ADST_FLIPADST:
+ iadst16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ FLIPLR_16x16(in0, in1);
+ break;
+ case FLIPADST_ADST:
+ iadst16_sse2(in0, in1);
+ iadst16_sse2(in0, in1);
+ FLIPUD_PTR(dest, stride, 16);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+
+ write_buffer_8x16(dest, in0, stride);
+ dest += 8;
+ write_buffer_8x16(dest, in1, stride);
+}
diff --git a/av1/common/x86/reconintra_sse4.c b/av1/common/x86/reconintra_sse4.c
new file mode 100644
index 0000000..cac34a6
--- /dev/null
+++ b/av1/common/x86/reconintra_sse4.c
@@ -0,0 +1,891 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <smmintrin.h>
+
+#include "./vp10_rtcd.h"
+#include "aom_ports/mem.h"
+#include "av1/common/enums.h"
+#include "av1/common/intra_filters.h"
+
+static INLINE void AddPixelsSmall(const uint8_t *above, const uint8_t *left,
+ __m128i *sum) {
+ const __m128i a = _mm_loadu_si128((const __m128i *)above);
+ const __m128i l = _mm_loadu_si128((const __m128i *)left);
+ const __m128i zero = _mm_setzero_si128();
+
+ __m128i u0 = _mm_unpacklo_epi8(a, zero);
+ __m128i u1 = _mm_unpacklo_epi8(l, zero);
+
+ sum[0] = _mm_add_epi16(u0, u1);
+}
+
+static INLINE int GetMeanValue4x4(const uint8_t *above, const uint8_t *left,
+ __m128i *params) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i sum_vector, u;
+ uint16_t sum_value;
+
+ AddPixelsSmall(above, left, &sum_vector);
+
+ sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 2 values
+ u = _mm_srli_si128(sum_vector, 2);
+ sum_vector = _mm_add_epi16(sum_vector, u);
+
+ sum_value = _mm_extract_epi16(sum_vector, 0);
+ sum_value += 4;
+ sum_value >>= 3;
+ *params = _mm_set1_epi32(sum_value);
+ return sum_value;
+}
+
+static INLINE int GetMeanValue8x8(const uint8_t *above, const uint8_t *left,
+ __m128i *params) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i sum_vector, u;
+ uint16_t sum_value;
+
+ AddPixelsSmall(above, left, &sum_vector);
+
+ sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 4 values
+ sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 2 values
+
+ u = _mm_srli_si128(sum_vector, 2);
+ sum_vector = _mm_add_epi16(sum_vector, u);
+
+ sum_value = _mm_extract_epi16(sum_vector, 0);
+ sum_value += 8;
+ sum_value >>= 4;
+ *params = _mm_set1_epi32(sum_value);
+ return sum_value;
+}
+
+static INLINE void AddPixelsLarge(const uint8_t *above, const uint8_t *left,
+ __m128i *sum) {
+ const __m128i a = _mm_loadu_si128((const __m128i *)above);
+ const __m128i l = _mm_loadu_si128((const __m128i *)left);
+ const __m128i zero = _mm_setzero_si128();
+
+ __m128i u0 = _mm_unpacklo_epi8(a, zero);
+ __m128i u1 = _mm_unpacklo_epi8(l, zero);
+
+ sum[0] = _mm_add_epi16(u0, u1);
+
+ u0 = _mm_unpackhi_epi8(a, zero);
+ u1 = _mm_unpackhi_epi8(l, zero);
+
+ sum[0] = _mm_add_epi16(sum[0], u0);
+ sum[0] = _mm_add_epi16(sum[0], u1);
+}
+
+static INLINE int GetMeanValue16x16(const uint8_t *above, const uint8_t *left,
+ __m128i *params) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i sum_vector, u;
+ uint16_t sum_value;
+
+ AddPixelsLarge(above, left, &sum_vector);
+
+ sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 4 values
+ sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 2 values
+
+ u = _mm_srli_si128(sum_vector, 2);
+ sum_vector = _mm_add_epi16(sum_vector, u);
+
+ sum_value = _mm_extract_epi16(sum_vector, 0);
+ sum_value += 16;
+ sum_value >>= 5;
+ *params = _mm_set1_epi32(sum_value);
+ return sum_value;
+}
+
+static INLINE int GetMeanValue32x32(const uint8_t *above, const uint8_t *left,
+ __m128i *params) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i sum_vector[2], u;
+ uint16_t sum_value;
+
+ AddPixelsLarge(above, left, &sum_vector[0]);
+ AddPixelsLarge(above + 16, left + 16, &sum_vector[1]);
+
+ sum_vector[0] = _mm_add_epi16(sum_vector[0], sum_vector[1]);
+ sum_vector[0] = _mm_hadd_epi16(sum_vector[0], zero); // still has 4 values
+ sum_vector[0] = _mm_hadd_epi16(sum_vector[0], zero); // still has 2 values
+
+ u = _mm_srli_si128(sum_vector[0], 2);
+ sum_vector[0] = _mm_add_epi16(sum_vector[0], u);
+
+ sum_value = _mm_extract_epi16(sum_vector[0], 0);
+ sum_value += 32;
+ sum_value >>= 6;
+ *params = _mm_set1_epi32(sum_value);
+ return sum_value;
+}
+
+// Note:
+// params[4] : mean value, 4 int32_t repetition
+//
+static INLINE int CalcRefPixelsMeanValue(const uint8_t *above,
+ const uint8_t *left, int bs,
+ __m128i *params) {
+ int meanValue = 0;
+ switch (bs) {
+ case 4: meanValue = GetMeanValue4x4(above, left, params); break;
+ case 8: meanValue = GetMeanValue8x8(above, left, params); break;
+ case 16: meanValue = GetMeanValue16x16(above, left, params); break;
+ case 32: meanValue = GetMeanValue32x32(above, left, params); break;
+ default: assert(0);
+ }
+ return meanValue;
+}
+
+// Note:
+// params[0-3] : 4-tap filter coefficients (int32_t per coefficient)
+//
+static INLINE void GetIntraFilterParams(int bs, int mode, __m128i *params) {
+ const TX_SIZE tx_size =
+ (bs == 32) ? TX_32X32
+ : ((bs == 16) ? TX_16X16 : ((bs == 8) ? TX_8X8 : (TX_4X4)));
+ // c0
+ params[0] = _mm_set_epi32(filter_intra_taps_4[tx_size][mode][0],
+ filter_intra_taps_4[tx_size][mode][0],
+ filter_intra_taps_4[tx_size][mode][0],
+ filter_intra_taps_4[tx_size][mode][0]);
+ // c1
+ params[1] = _mm_set_epi32(filter_intra_taps_4[tx_size][mode][1],
+ filter_intra_taps_4[tx_size][mode][1],
+ filter_intra_taps_4[tx_size][mode][1],
+ filter_intra_taps_4[tx_size][mode][1]);
+ // c2
+ params[2] = _mm_set_epi32(filter_intra_taps_4[tx_size][mode][2],
+ filter_intra_taps_4[tx_size][mode][2],
+ filter_intra_taps_4[tx_size][mode][2],
+ filter_intra_taps_4[tx_size][mode][2]);
+ // c3
+ params[3] = _mm_set_epi32(filter_intra_taps_4[tx_size][mode][3],
+ filter_intra_taps_4[tx_size][mode][3],
+ filter_intra_taps_4[tx_size][mode][3],
+ filter_intra_taps_4[tx_size][mode][3]);
+}
+
+static const int maxBlkSize = 32;
+
+static INLINE void SavePred4x4(int *pred, const __m128i *mean, uint8_t *dst,
+ ptrdiff_t stride) {
+ const int predStride = (maxBlkSize << 1) + 1;
+ __m128i p0 = _mm_loadu_si128((const __m128i *)pred);
+ __m128i p1 = _mm_loadu_si128((const __m128i *)(pred + predStride));
+ __m128i p2 = _mm_loadu_si128((const __m128i *)(pred + 2 * predStride));
+ __m128i p3 = _mm_loadu_si128((const __m128i *)(pred + 3 * predStride));
+
+ p0 = _mm_add_epi32(p0, mean[0]);
+ p1 = _mm_add_epi32(p1, mean[0]);
+ p2 = _mm_add_epi32(p2, mean[0]);
+ p3 = _mm_add_epi32(p3, mean[0]);
+
+ p0 = _mm_packus_epi32(p0, p1);
+ p1 = _mm_packus_epi32(p2, p3);
+ p0 = _mm_packus_epi16(p0, p1);
+
+ *((int *)dst) = _mm_cvtsi128_si32(p0);
+ p0 = _mm_srli_si128(p0, 4);
+ *((int *)(dst + stride)) = _mm_cvtsi128_si32(p0);
+ p0 = _mm_srli_si128(p0, 4);
+ *((int *)(dst + 2 * stride)) = _mm_cvtsi128_si32(p0);
+ p0 = _mm_srli_si128(p0, 4);
+ *((int *)(dst + 3 * stride)) = _mm_cvtsi128_si32(p0);
+}
+
+static void SavePred8x8(int *pred, const __m128i *mean, uint8_t *dst,
+ ptrdiff_t stride) {
+ const int predStride = (maxBlkSize << 1) + 1;
+ __m128i p0, p1, p2, p3;
+ int r = 0;
+
+ while (r < 8) {
+ p0 = _mm_loadu_si128((const __m128i *)(pred + r * predStride));
+ p1 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 4));
+ r += 1;
+ p2 = _mm_loadu_si128((const __m128i *)(pred + r * predStride));
+ p3 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 4));
+
+ p0 = _mm_add_epi32(p0, mean[0]);
+ p1 = _mm_add_epi32(p1, mean[0]);
+ p2 = _mm_add_epi32(p2, mean[0]);
+ p3 = _mm_add_epi32(p3, mean[0]);
+
+ p0 = _mm_packus_epi32(p0, p1);
+ p1 = _mm_packus_epi32(p2, p3);
+ p0 = _mm_packus_epi16(p0, p1);
+
+ _mm_storel_epi64((__m128i *)dst, p0);
+ dst += stride;
+ p0 = _mm_srli_si128(p0, 8);
+ _mm_storel_epi64((__m128i *)dst, p0);
+ dst += stride;
+ r += 1;
+ }
+}
+
+static void SavePred16x16(int *pred, const __m128i *mean, uint8_t *dst,
+ ptrdiff_t stride) {
+ const int predStride = (maxBlkSize << 1) + 1;
+ __m128i p0, p1, p2, p3;
+ int r = 0;
+
+ while (r < 16) {
+ p0 = _mm_loadu_si128((const __m128i *)(pred + r * predStride));
+ p1 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 4));
+ p2 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 8));
+ p3 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 12));
+
+ p0 = _mm_add_epi32(p0, mean[0]);
+ p1 = _mm_add_epi32(p1, mean[0]);
+ p2 = _mm_add_epi32(p2, mean[0]);
+ p3 = _mm_add_epi32(p3, mean[0]);
+
+ p0 = _mm_packus_epi32(p0, p1);
+ p1 = _mm_packus_epi32(p2, p3);
+ p0 = _mm_packus_epi16(p0, p1);
+
+ _mm_storel_epi64((__m128i *)dst, p0);
+ p0 = _mm_srli_si128(p0, 8);
+ _mm_storel_epi64((__m128i *)(dst + 8), p0);
+ dst += stride;
+ r += 1;
+ }
+}
+
+static void SavePred32x32(int *pred, const __m128i *mean, uint8_t *dst,
+ ptrdiff_t stride) {
+ const int predStride = (maxBlkSize << 1) + 1;
+ __m128i p0, p1, p2, p3, p4, p5, p6, p7;
+ int r = 0;
+
+ while (r < 32) {
+ p0 = _mm_loadu_si128((const __m128i *)(pred + r * predStride));
+ p1 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 4));
+ p2 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 8));
+ p3 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 12));
+
+ p4 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 16));
+ p5 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 20));
+ p6 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 24));
+ p7 = _mm_loadu_si128((const __m128i *)(pred + r * predStride + 28));
+
+ p0 = _mm_add_epi32(p0, mean[0]);
+ p1 = _mm_add_epi32(p1, mean[0]);
+ p2 = _mm_add_epi32(p2, mean[0]);
+ p3 = _mm_add_epi32(p3, mean[0]);
+
+ p4 = _mm_add_epi32(p4, mean[0]);
+ p5 = _mm_add_epi32(p5, mean[0]);
+ p6 = _mm_add_epi32(p6, mean[0]);
+ p7 = _mm_add_epi32(p7, mean[0]);
+
+ p0 = _mm_packus_epi32(p0, p1);
+ p1 = _mm_packus_epi32(p2, p3);
+ p0 = _mm_packus_epi16(p0, p1);
+
+ p4 = _mm_packus_epi32(p4, p5);
+ p5 = _mm_packus_epi32(p6, p7);
+ p4 = _mm_packus_epi16(p4, p5);
+
+ _mm_storel_epi64((__m128i *)dst, p0);
+ p0 = _mm_srli_si128(p0, 8);
+ _mm_storel_epi64((__m128i *)(dst + 8), p0);
+
+ _mm_storel_epi64((__m128i *)(dst + 16), p4);
+ p4 = _mm_srli_si128(p4, 8);
+ _mm_storel_epi64((__m128i *)(dst + 24), p4);
+
+ dst += stride;
+ r += 1;
+ }
+}
+
+static void SavePrediction(int *pred, const __m128i *mean, int bs, uint8_t *dst,
+ ptrdiff_t stride) {
+ switch (bs) {
+ case 4: SavePred4x4(pred, mean, dst, stride); break;
+ case 8: SavePred8x8(pred, mean, dst, stride); break;
+ case 16: SavePred16x16(pred, mean, dst, stride); break;
+ case 32: SavePred32x32(pred, mean, dst, stride); break;
+ default: assert(0);
+ }
+}
+
+typedef void (*ProducePixelsFunc)(__m128i *p, const __m128i *prm, int *pred,
+ const int predStride);
+
+static void ProduceFourPixels(__m128i *p, const __m128i *prm, int *pred,
+ const int predStride) {
+ __m128i u0, u1, u2;
+ int c0 = _mm_extract_epi32(prm[1], 0);
+ int x = *(pred + predStride);
+ int sum;
+
+ u0 = _mm_mullo_epi32(p[0], prm[2]);
+ u1 = _mm_mullo_epi32(p[1], prm[0]);
+ u2 = _mm_mullo_epi32(p[2], prm[3]);
+
+ u0 = _mm_add_epi32(u0, u1);
+ u0 = _mm_add_epi32(u0, u2);
+
+ sum = _mm_extract_epi32(u0, 0);
+ sum += c0 * x;
+ x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+ *(pred + predStride + 1) = x;
+
+ sum = _mm_extract_epi32(u0, 1);
+ sum += c0 * x;
+ x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+ *(pred + predStride + 2) = x;
+
+ sum = _mm_extract_epi32(u0, 2);
+ sum += c0 * x;
+ x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+ *(pred + predStride + 3) = x;
+
+ sum = _mm_extract_epi32(u0, 3);
+ sum += c0 * x;
+ x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+ *(pred + predStride + 4) = x;
+}
+
+static void ProduceThreePixels(__m128i *p, const __m128i *prm, int *pred,
+ const int predStride) {
+ __m128i u0, u1, u2;
+ int c0 = _mm_extract_epi32(prm[1], 0);
+ int x = *(pred + predStride);
+ int sum;
+
+ u0 = _mm_mullo_epi32(p[0], prm[2]);
+ u1 = _mm_mullo_epi32(p[1], prm[0]);
+ u2 = _mm_mullo_epi32(p[2], prm[3]);
+
+ u0 = _mm_add_epi32(u0, u1);
+ u0 = _mm_add_epi32(u0, u2);
+
+ sum = _mm_extract_epi32(u0, 0);
+ sum += c0 * x;
+ x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+ *(pred + predStride + 1) = x;
+
+ sum = _mm_extract_epi32(u0, 1);
+ sum += c0 * x;
+ x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+ *(pred + predStride + 2) = x;
+
+ sum = _mm_extract_epi32(u0, 2);
+ sum += c0 * x;
+ x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+ *(pred + predStride + 3) = x;
+}
+
+static void ProduceTwoPixels(__m128i *p, const __m128i *prm, int *pred,
+ const int predStride) {
+ __m128i u0, u1, u2;
+ int c0 = _mm_extract_epi32(prm[1], 0);
+ int x = *(pred + predStride);
+ int sum;
+
+ u0 = _mm_mullo_epi32(p[0], prm[2]);
+ u1 = _mm_mullo_epi32(p[1], prm[0]);
+ u2 = _mm_mullo_epi32(p[2], prm[3]);
+
+ u0 = _mm_add_epi32(u0, u1);
+ u0 = _mm_add_epi32(u0, u2);
+
+ sum = _mm_extract_epi32(u0, 0);
+ sum += c0 * x;
+ x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+ *(pred + predStride + 1) = x;
+
+ sum = _mm_extract_epi32(u0, 1);
+ sum += c0 * x;
+ x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+ *(pred + predStride + 2) = x;
+}
+
+static void ProduceOnePixels(__m128i *p, const __m128i *prm, int *pred,
+ const int predStride) {
+ __m128i u0, u1, u2;
+ int c0 = _mm_extract_epi32(prm[1], 0);
+ int x = *(pred + predStride);
+ int sum;
+
+ u0 = _mm_mullo_epi32(p[0], prm[2]);
+ u1 = _mm_mullo_epi32(p[1], prm[0]);
+ u2 = _mm_mullo_epi32(p[2], prm[3]);
+
+ u0 = _mm_add_epi32(u0, u1);
+ u0 = _mm_add_epi32(u0, u2);
+
+ sum = _mm_extract_epi32(u0, 0);
+ sum += c0 * x;
+ x = ROUND_POWER_OF_TWO_SIGNED(sum, FILTER_INTRA_PREC_BITS);
+ *(pred + predStride + 1) = x;
+}
+
+static ProducePixelsFunc prodPixelsFuncTab[4] = {
+ ProduceOnePixels, ProduceTwoPixels, ProduceThreePixels, ProduceFourPixels
+};
+
+static void ProducePixels(int *pred, const __m128i *prm, int remain) {
+ __m128i p[3];
+ const int predStride = (maxBlkSize << 1) + 1;
+ int index;
+
+ p[0] = _mm_loadu_si128((const __m128i *)pred);
+ p[1] = _mm_loadu_si128((const __m128i *)(pred + 1));
+ p[2] = _mm_loadu_si128((const __m128i *)(pred + 2));
+
+ if (remain <= 2) {
+ return;
+ }
+ if (remain > 5) {
+ index = 3;
+ } else {
+ index = remain - 3;
+ }
+ prodPixelsFuncTab[index](p, prm, pred, predStride);
+}
+
+// Note:
+// At column index c, the remaining pixels are R = 2 * bs + 1 - r - c
+// the number of pixels to produce is R - 2 = 2 * bs - r - c - 1
+static void GeneratePrediction(const uint8_t *above, const uint8_t *left,
+ const int bs, const __m128i *prm, int meanValue,
+ uint8_t *dst, ptrdiff_t stride) {
+ int pred[33][65];
+ int r, c, colBound;
+ int remainings;
+
+ for (r = 0; r < bs; ++r) {
+ pred[r + 1][0] = (int)left[r] - meanValue;
+ }
+
+ above -= 1;
+ for (c = 0; c < 2 * bs + 1; ++c) {
+ pred[0][c] = (int)above[c] - meanValue;
+ }
+
+ r = 0;
+ c = 0;
+ while (r < bs) {
+ colBound = (bs << 1) - r;
+ for (c = 0; c < colBound; c += 4) {
+ remainings = colBound - c + 1;
+ ProducePixels(&pred[r][c], prm, remainings);
+ }
+ r += 1;
+ }
+
+ SavePrediction(&pred[1][1], &prm[4], bs, dst, stride);
+}
+
+static void FilterPrediction(const uint8_t *above, const uint8_t *left, int bs,
+ __m128i *prm, uint8_t *dst, ptrdiff_t stride) {
+ int meanValue = 0;
+ meanValue = CalcRefPixelsMeanValue(above, left, bs, &prm[4]);
+ GeneratePrediction(above, left, bs, prm, meanValue, dst, stride);
+}
+
+void vp10_dc_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above,
+ const uint8_t *left) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, DC_PRED, &prm[0]);
+ FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_v_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, V_PRED, &prm[0]);
+ FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_h_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, H_PRED, &prm[0]);
+ FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_d45_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above,
+ const uint8_t *left) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, D45_PRED, &prm[0]);
+ FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_d135_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above,
+ const uint8_t *left) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, D135_PRED, &prm[0]);
+ FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_d117_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above,
+ const uint8_t *left) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, D117_PRED, &prm[0]);
+ FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_d153_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above,
+ const uint8_t *left) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, D153_PRED, &prm[0]);
+ FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_d207_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above,
+ const uint8_t *left) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, D207_PRED, &prm[0]);
+ FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_d63_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above,
+ const uint8_t *left) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, D63_PRED, &prm[0]);
+ FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+void vp10_tm_filter_predictor_sse4_1(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above,
+ const uint8_t *left) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, TM_PRED, &prm[0]);
+ FilterPrediction(above, left, bs, prm, dst, stride);
+}
+
+// ============== High Bit Depth ==============
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE int HighbdGetMeanValue4x4(const uint16_t *above,
+ const uint16_t *left, const int bd,
+ __m128i *params) {
+ const __m128i a = _mm_loadu_si128((const __m128i *)above);
+ const __m128i l = _mm_loadu_si128((const __m128i *)left);
+ const __m128i zero = _mm_setzero_si128();
+ __m128i sum_vector, u;
+ uint16_t sum_value;
+ (void)bd;
+
+ sum_vector = _mm_add_epi16(a, l);
+
+ sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 2 values
+ u = _mm_srli_si128(sum_vector, 2);
+ sum_vector = _mm_add_epi16(sum_vector, u);
+
+ sum_value = _mm_extract_epi16(sum_vector, 0);
+ sum_value += 4;
+ sum_value >>= 3;
+ *params = _mm_set1_epi32(sum_value);
+ return sum_value;
+}
+
+static INLINE int HighbdGetMeanValue8x8(const uint16_t *above,
+ const uint16_t *left, const int bd,
+ __m128i *params) {
+ const __m128i a = _mm_loadu_si128((const __m128i *)above);
+ const __m128i l = _mm_loadu_si128((const __m128i *)left);
+ const __m128i zero = _mm_setzero_si128();
+ __m128i sum_vector, u;
+ uint16_t sum_value;
+ (void)bd;
+
+ sum_vector = _mm_add_epi16(a, l);
+
+ sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 4 values
+ sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 2 values
+
+ u = _mm_srli_si128(sum_vector, 2);
+ sum_vector = _mm_add_epi16(sum_vector, u);
+
+ sum_value = _mm_extract_epi16(sum_vector, 0);
+ sum_value += 8;
+ sum_value >>= 4;
+ *params = _mm_set1_epi32(sum_value);
+ return sum_value;
+}
+
+// Note:
+// Process 16 pixels above and left, 10-bit depth
+// Add to the last 8 pixels sum
+static INLINE void AddPixels10bit(const uint16_t *above, const uint16_t *left,
+ __m128i *sum) {
+ __m128i a = _mm_loadu_si128((const __m128i *)above);
+ __m128i l = _mm_loadu_si128((const __m128i *)left);
+ sum[0] = _mm_add_epi16(a, l);
+ a = _mm_loadu_si128((const __m128i *)(above + 8));
+ l = _mm_loadu_si128((const __m128i *)(left + 8));
+ sum[0] = _mm_add_epi16(sum[0], a);
+ sum[0] = _mm_add_epi16(sum[0], l);
+}
+
+// Note:
+// Process 16 pixels above and left, 12-bit depth
+// Add to the last 8 pixels sum
+static INLINE void AddPixels12bit(const uint16_t *above, const uint16_t *left,
+ __m128i *sum) {
+ __m128i a = _mm_loadu_si128((const __m128i *)above);
+ __m128i l = _mm_loadu_si128((const __m128i *)left);
+ const __m128i zero = _mm_setzero_si128();
+ __m128i v0, v1;
+
+ v0 = _mm_unpacklo_epi16(a, zero);
+ v1 = _mm_unpacklo_epi16(l, zero);
+ sum[0] = _mm_add_epi32(v0, v1);
+
+ v0 = _mm_unpackhi_epi16(a, zero);
+ v1 = _mm_unpackhi_epi16(l, zero);
+ sum[0] = _mm_add_epi32(sum[0], v0);
+ sum[0] = _mm_add_epi32(sum[0], v1);
+
+ a = _mm_loadu_si128((const __m128i *)(above + 8));
+ l = _mm_loadu_si128((const __m128i *)(left + 8));
+
+ v0 = _mm_unpacklo_epi16(a, zero);
+ v1 = _mm_unpacklo_epi16(l, zero);
+ sum[0] = _mm_add_epi32(sum[0], v0);
+ sum[0] = _mm_add_epi32(sum[0], v1);
+
+ v0 = _mm_unpackhi_epi16(a, zero);
+ v1 = _mm_unpackhi_epi16(l, zero);
+ sum[0] = _mm_add_epi32(sum[0], v0);
+ sum[0] = _mm_add_epi32(sum[0], v1);
+}
+
+static INLINE int HighbdGetMeanValue16x16(const uint16_t *above,
+ const uint16_t *left, const int bd,
+ __m128i *params) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i sum_vector, u;
+ uint32_t sum_value = 0;
+
+ if (10 == bd) {
+ AddPixels10bit(above, left, &sum_vector);
+ sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 4 values
+ sum_vector = _mm_hadd_epi16(sum_vector, zero); // still has 2 values
+
+ u = _mm_srli_si128(sum_vector, 2);
+ sum_vector = _mm_add_epi16(sum_vector, u);
+ sum_value = _mm_extract_epi16(sum_vector, 0);
+ } else if (12 == bd) {
+ AddPixels12bit(above, left, &sum_vector);
+
+ sum_vector = _mm_hadd_epi32(sum_vector, zero);
+ u = _mm_srli_si128(sum_vector, 4);
+ sum_vector = _mm_add_epi32(u, sum_vector);
+ sum_value = _mm_extract_epi32(sum_vector, 0);
+ }
+
+ sum_value += 16;
+ sum_value >>= 5;
+ *params = _mm_set1_epi32(sum_value);
+ return sum_value;
+}
+
+static INLINE int HighbdGetMeanValue32x32(const uint16_t *above,
+ const uint16_t *left, const int bd,
+ __m128i *params) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i sum_vector[2], u;
+ uint32_t sum_value = 0;
+
+ if (10 == bd) {
+ AddPixels10bit(above, left, &sum_vector[0]);
+ AddPixels10bit(above + 16, left + 16, &sum_vector[1]);
+
+ sum_vector[0] = _mm_add_epi16(sum_vector[0], sum_vector[1]);
+ sum_vector[0] = _mm_hadd_epi16(sum_vector[0], zero); // still has 4 values
+ sum_vector[0] = _mm_hadd_epi16(sum_vector[0], zero); // still has 2 values
+
+ u = _mm_srli_si128(sum_vector[0], 2);
+ sum_vector[0] = _mm_add_epi16(sum_vector[0], u);
+ sum_value = _mm_extract_epi16(sum_vector[0], 0);
+ } else if (12 == bd) {
+ AddPixels12bit(above, left, &sum_vector[0]);
+ AddPixels12bit(above + 16, left + 16, &sum_vector[1]);
+
+ sum_vector[0] = _mm_add_epi32(sum_vector[0], sum_vector[1]);
+ sum_vector[0] = _mm_hadd_epi32(sum_vector[0], zero);
+ u = _mm_srli_si128(sum_vector[0], 4);
+ sum_vector[0] = _mm_add_epi32(u, sum_vector[0]);
+ sum_value = _mm_extract_epi32(sum_vector[0], 0);
+ }
+
+ sum_value += 32;
+ sum_value >>= 6;
+ *params = _mm_set1_epi32(sum_value);
+ return sum_value;
+}
+
+// Note:
+// params[4] : mean value, 4 int32_t repetition
+//
+static INLINE int HighbdCalcRefPixelsMeanValue(const uint16_t *above,
+ const uint16_t *left, int bs,
+ const int bd, __m128i *params) {
+ int meanValue = 0;
+ switch (bs) {
+ case 4: meanValue = HighbdGetMeanValue4x4(above, left, bd, params); break;
+ case 8: meanValue = HighbdGetMeanValue8x8(above, left, bd, params); break;
+ case 16:
+ meanValue = HighbdGetMeanValue16x16(above, left, bd, params);
+ break;
+ case 32:
+ meanValue = HighbdGetMeanValue32x32(above, left, bd, params);
+ break;
+ default: assert(0);
+ }
+ return meanValue;
+}
+
+// Note:
+// At column index c, the remaining pixels are R = 2 * bs + 1 - r - c
+// the number of pixels to produce is R - 2 = 2 * bs - r - c - 1
+static void HighbdGeneratePrediction(const uint16_t *above,
+ const uint16_t *left, const int bs,
+ const int bd, const __m128i *prm,
+ int meanValue, uint16_t *dst,
+ ptrdiff_t stride) {
+ int pred[33][65];
+ int r, c, colBound;
+ int remainings;
+ int ipred;
+
+ for (r = 0; r < bs; ++r) {
+ pred[r + 1][0] = (int)left[r] - meanValue;
+ }
+
+ above -= 1;
+ for (c = 0; c < 2 * bs + 1; ++c) {
+ pred[0][c] = (int)above[c] - meanValue;
+ }
+
+ r = 0;
+ c = 0;
+ while (r < bs) {
+ colBound = (bs << 1) - r;
+ for (c = 0; c < colBound; c += 4) {
+ remainings = colBound - c + 1;
+ ProducePixels(&pred[r][c], prm, remainings);
+ }
+ r += 1;
+ }
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ ipred = pred[r + 1][c + 1] + meanValue;
+ dst[c] = clip_pixel_highbd(ipred, bd);
+ }
+ dst += stride;
+ }
+}
+
+static void HighbdFilterPrediction(const uint16_t *above, const uint16_t *left,
+ int bs, const int bd, __m128i *prm,
+ uint16_t *dst, ptrdiff_t stride) {
+ int meanValue = 0;
+ meanValue = HighbdCalcRefPixelsMeanValue(above, left, bs, bd, &prm[4]);
+ HighbdGeneratePrediction(above, left, bs, bd, prm, meanValue, dst, stride);
+}
+
+void vp10_highbd_dc_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, DC_PRED, &prm[0]);
+ HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_v_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, V_PRED, &prm[0]);
+ HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_h_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, H_PRED, &prm[0]);
+ HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_d45_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, D45_PRED, &prm[0]);
+ HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_d135_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, D135_PRED, &prm[0]);
+ HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_d117_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, D117_PRED, &prm[0]);
+ HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_d153_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, D153_PRED, &prm[0]);
+ HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_d207_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, D207_PRED, &prm[0]);
+ HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_d63_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, D63_PRED, &prm[0]);
+ HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+
+void vp10_highbd_tm_filter_predictor_sse4_1(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ __m128i prm[5];
+ GetIntraFilterParams(bs, TM_PRED, &prm[0]);
+ HighbdFilterPrediction(above, left, bs, bd, prm, dst, stride);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/x86/vp10_convolve_filters_ssse3.c b/av1/common/x86/vp10_convolve_filters_ssse3.c
new file mode 100644
index 0000000..b842589
--- /dev/null
+++ b/av1/common/x86/vp10_convolve_filters_ssse3.c
@@ -0,0 +1,660 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_config.h"
+#include "av1/common/filter.h"
+
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int8_t,
+ sub_pel_filters_10sharp_signal_dir[15][2][16]) = {
+ {
+ { 0, 0, -1, 3, -6, 127, 8, -4, 2, -1, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, -1, 3, -6, 127, 8, -4, 2, -1, 0, 0, 0, 0 },
+ },
+ {
+ { 0, 1, -2, 5, -12, 124, 18, -7, 3, -2, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, -2, 5, -12, 124, 18, -7, 3, -2, 0, 0, 0, 0 },
+ },
+ {
+ { 0, 1, -3, 7, -17, 119, 28, -11, 5, -2, 1, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, -3, 7, -17, 119, 28, -11, 5, -2, 1, 0, 0, 0 },
+ },
+ {
+ { 0, 1, -4, 8, -20, 114, 38, -14, 7, -3, 1, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, -4, 8, -20, 114, 38, -14, 7, -3, 1, 0, 0, 0 },
+ },
+ {
+ { 0, 1, -4, 9, -22, 107, 49, -17, 8, -4, 1, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, -4, 9, -22, 107, 49, -17, 8, -4, 1, 0, 0, 0 },
+ },
+ {
+ { 0, 2, -5, 10, -24, 99, 59, -20, 9, -4, 2, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 2, -5, 10, -24, 99, 59, -20, 9, -4, 2, 0, 0, 0 },
+ },
+ {
+ { 0, 2, -5, 10, -24, 90, 70, -22, 10, -5, 2, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 2, -5, 10, -24, 90, 70, -22, 10, -5, 2, 0, 0, 0 },
+ },
+ {
+ { 0, 2, -5, 10, -23, 80, 80, -23, 10, -5, 2, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 2, -5, 10, -23, 80, 80, -23, 10, -5, 2, 0, 0, 0 },
+ },
+ {
+ { 0, 2, -5, 10, -22, 70, 90, -24, 10, -5, 2, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 2, -5, 10, -22, 70, 90, -24, 10, -5, 2, 0, 0, 0 },
+ },
+ {
+ { 0, 2, -4, 9, -20, 59, 99, -24, 10, -5, 2, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 2, -4, 9, -20, 59, 99, -24, 10, -5, 2, 0, 0, 0 },
+ },
+ {
+ { 0, 1, -4, 8, -17, 49, 107, -22, 9, -4, 1, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, -4, 8, -17, 49, 107, -22, 9, -4, 1, 0, 0, 0 },
+ },
+ {
+ { 0, 1, -3, 7, -14, 38, 114, -20, 8, -4, 1, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, -3, 7, -14, 38, 114, -20, 8, -4, 1, 0, 0, 0 },
+ },
+ {
+ { 0, 1, -2, 5, -11, 28, 119, -17, 7, -3, 1, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, -2, 5, -11, 28, 119, -17, 7, -3, 1, 0, 0, 0 },
+ },
+ {
+ { 0, 0, -2, 3, -7, 18, 124, -12, 5, -2, 1, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, -2, 3, -7, 18, 124, -12, 5, -2, 1, 0, 0, 0 },
+ },
+ {
+ { 0, 0, -1, 2, -4, 8, 127, -6, 3, -1, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, -1, 2, -4, 8, 127, -6, 3, -1, 0, 0, 0, 0 },
+ },
+};
+#endif
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int8_t,
+ sub_pel_filters_10sharp_ver_signal_dir[15][6][16]) = {
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6,
+ 127 },
+ { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5 },
+ { -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124,
+ -12, 124 },
+ { 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7 },
+ { 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7 },
+ { -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119,
+ -17, 119 },
+ { 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28,
+ -11 },
+ { 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2 },
+ { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+ { -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114,
+ -20, 114 },
+ { 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38,
+ -14 },
+ { 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3 },
+ { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9 },
+ { -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107,
+ -22, 107 },
+ { 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49,
+ -17 },
+ { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 },
+ { -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10 },
+ { -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24,
+ 99 },
+ { 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59,
+ -20 },
+ { 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4 },
+ { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 },
+ { -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10 },
+ { -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24,
+ 90 },
+ { 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70,
+ -22 },
+ { 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5 },
+ { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 },
+ { -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10 },
+ { -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23,
+ 80 },
+ { 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80, -23, 80,
+ -23 },
+ { 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5 },
+ { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 },
+ { -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10 },
+ { -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22, 70, -22,
+ 70 },
+ { 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90, -24, 90,
+ -24 },
+ { 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5 },
+ { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 },
+ { -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9 },
+ { -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20, 59, -20,
+ 59 },
+ { 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99, -24, 99,
+ -24 },
+ { 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5, 10, -5 },
+ { 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+ { -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17, 49, -17,
+ 49 },
+ { 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22, 107, -22,
+ 107, -22 },
+ { 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4, 9, -4 },
+ { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7 },
+ { -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14, 38, -14,
+ 38 },
+ { 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20, 114, -20,
+ 114, -20 },
+ { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5 },
+ { -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11,
+ 28 },
+ { 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17, 119, -17,
+ 119, -17 },
+ { 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3, 7, -3 },
+ { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3 },
+ { -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18, -7, 18 },
+ { 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12,
+ 124, -12 },
+ { 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2, 5, -2 },
+ { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+ { 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127, -6, 127,
+ -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ },
+};
+#endif
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int8_t,
+ sub_pel_filters_12sharp_signal_dir[15][2][16]) = {
+ {
+ { 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, -2, 3, -7, 127, 8, -4, 2, -1, 1, 0, 0, 0 },
+ },
+ {
+ { -1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 2, -3, 6, -13, 124, 18, -8, 4, -2, 2, -1, 0, 0 },
+ },
+ {
+ { -1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 3, -4, 8, -18, 120, 28, -12, 7, -4, 2, -1, 0, 0 },
+ },
+ {
+ { -1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 3, -6, 10, -21, 115, 38, -15, 8, -5, 3, -1, 0, 0 },
+ },
+ {
+ { -2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2, 0, 0, 0, 0 },
+ { 0, 0, -2, 4, -6, 12, -24, 108, 49, -18, 10, -6, 3, -2, 0, 0 },
+ },
+ {
+ { -2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2, 0, 0, 0, 0 },
+ { 0, 0, -2, 4, -7, 13, -25, 100, 60, -21, 11, -7, 4, -2, 0, 0 },
+ },
+ {
+ { -2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2, 0, 0, 0, 0 },
+ { 0, 0, -2, 4, -7, 13, -26, 91, 71, -24, 13, -7, 4, -2, 0, 0 },
+ },
+ {
+ { -2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2, 0, 0, 0, 0 },
+ { 0, 0, -2, 4, -7, 13, -25, 81, 81, -25, 13, -7, 4, -2, 0, 0 },
+ },
+ {
+ { -2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2, 0, 0, 0, 0 },
+ { 0, 0, -2, 4, -7, 13, -24, 71, 91, -26, 13, -7, 4, -2, 0, 0 },
+ },
+ {
+ { -2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2, 0, 0, 0, 0 },
+ { 0, 0, -2, 4, -7, 11, -21, 60, 100, -25, 13, -7, 4, -2, 0, 0 },
+ },
+ {
+ { -2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2, 0, 0, 0, 0 },
+ { 0, 0, -2, 3, -6, 10, -18, 49, 108, -24, 12, -6, 4, -2, 0, 0 },
+ },
+ {
+ { -1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 3, -5, 8, -15, 38, 115, -21, 10, -6, 3, -1, 0, 0 },
+ },
+ {
+ { -1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 2, -4, 7, -12, 28, 120, -18, 8, -4, 3, -1, 0, 0 },
+ },
+ {
+ { -1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 2, -2, 4, -8, 18, 124, -13, 6, -3, 2, -1, 0, 0 },
+ },
+ {
+ { 0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, -1, 2, -4, 8, 127, -7, 3, -2, 1, 0, 0, 0 },
+ },
+};
+#endif
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int8_t,
+ sub_pel_filters_12sharp_ver_signal_dir[15][6][16]) = {
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3 },
+ { -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7,
+ 127 },
+ { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+ { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6 },
+ { -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124,
+ -13, 124 },
+ { 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8 },
+ { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+ { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+ { -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120,
+ -18, 120 },
+ { 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28,
+ -12 },
+ { 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10 },
+ { -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115,
+ -21, 115 },
+ { 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38,
+ -15 },
+ { 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5 },
+ { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 },
+ { -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108,
+ -24, 108 },
+ { 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49,
+ -18 },
+ { 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6 },
+ { 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2 },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13 },
+ { -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100,
+ -25, 100 },
+ { 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60,
+ -21 },
+ { 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7 },
+ { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13 },
+ { -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26,
+ 91 },
+ { 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71,
+ -24 },
+ { 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7 },
+ { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13 },
+ { -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25,
+ 81 },
+ { 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81, -25, 81,
+ -25 },
+ { 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7 },
+ { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13 },
+ { -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24, 71, -24,
+ 71 },
+ { 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91, -26, 91,
+ -26 },
+ { 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7 },
+ { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11, -7, 11 },
+ { -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21,
+ 60 },
+ { 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25, 100, -25,
+ 100, -25 },
+ { 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7, 13, -7 },
+ { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+ },
+ {
+ { -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3 },
+ { -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10 },
+ { -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18,
+ 49 },
+ { 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24, 108, -24,
+ 108, -24 },
+ { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 },
+ { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8, -5, 8 },
+ { -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15,
+ 38 },
+ { 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21, 115, -21,
+ 115, -21 },
+ { 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6, 10, -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7, -4, 7 },
+ { -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12, 28, -12,
+ 28 },
+ { 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18, 120, -18,
+ 120, -18 },
+ { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18 },
+ { 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13, 124, -13,
+ 124, -13 },
+ { 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3 },
+ { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+ { 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127,
+ -7 },
+ { 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2, 3, -2 },
+ { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+};
+#endif
+#if USE_TEMPORALFILTER_12TAP
+DECLARE_ALIGNED(16, const int8_t,
+ sub_pel_filters_temporalfilter_12_signal_dir[15][2][16]) = {
+ {
+ { 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, -1, 3, -7, 127, 8, -4, 2, -1, 0, 0, 0, 0 },
+ },
+ {
+ { 0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, -3, 5, -12, 124, 18, -8, 4, -2, 1, 0, 0, 0 },
+ },
+ {
+ { -1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 2, -4, 8, -17, 120, 28, -11, 6, -3, 1, -1, 0, 0 },
+ },
+ {
+ { -1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 2, -4, 10, -21, 114, 38, -15, 8, -4, 2, -1, 0, 0 },
+ },
+ {
+ { -1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 3, -5, 11, -23, 107, 49, -18, 9, -5, 2, -1, 0, 0 },
+ },
+ {
+ { -1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 3, -6, 12, -25, 99, 60, -21, 11, -6, 3, -1, 0, 0 },
+ },
+ {
+ { -1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 3, -6, 12, -25, 90, 70, -23, 12, -6, 3, -1, 0, 0 },
+ },
+ {
+ { -1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 3, -6, 12, -24, 80, 80, -24, 12, -6, 3, -1, 0, 0 },
+ },
+ {
+ { -1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 3, -6, 12, -23, 70, 90, -25, 12, -6, 3, -1, 0, 0 },
+ },
+ {
+ { -1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 3, -6, 11, -21, 60, 99, -25, 12, -6, 3, -1, 0, 0 },
+ },
+ {
+ { -1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 2, -5, 9, -18, 49, 107, -23, 11, -5, 3, -1, 0, 0 },
+ },
+ {
+ { -1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 2, -4, 8, -15, 38, 114, -21, 10, -4, 2, -1, 0, 0 },
+ },
+ {
+ { -1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1, 0, 0, 0, 0 },
+ { 0, 0, -1, 1, -3, 6, -11, 28, 120, -17, 8, -4, 2, -1, 0, 0 },
+ },
+ {
+ { 0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, -2, 4, -8, 18, 124, -12, 5, -3, 1, 0, 0, 0 },
+ },
+ {
+ { 0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, -1, 2, -4, 8, 127, -7, 3, -1, 1, 0, 0, 0 },
+ },
+};
+#endif
+#if USE_TEMPORALFILTER_12TAP
+DECLARE_ALIGNED(16, const int8_t,
+ sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6][16]) = {
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7,
+ 127 },
+ { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5 },
+ { -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124,
+ -12, 124 },
+ { 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8 },
+ { 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2 },
+ { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+ { -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120,
+ -17, 120 },
+ { 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28,
+ -11 },
+ { 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3 },
+ { 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10 },
+ { -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114,
+ -21, 114 },
+ { 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38,
+ -15 },
+ { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11 },
+ { -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107,
+ -23, 107 },
+ { 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49,
+ -18 },
+ { 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5 },
+ { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 },
+ { -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25,
+ 99 },
+ { 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60,
+ -21 },
+ { 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 },
+ { -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25,
+ 90 },
+ { 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70,
+ -23 },
+ { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 },
+ { -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24,
+ 80 },
+ { 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80, -24, 80,
+ -24 },
+ { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12 },
+ { -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23, 70, -23,
+ 70 },
+ { 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90, -25, 90,
+ -25 },
+ { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11, -6, 11 },
+ { -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21, 60, -21,
+ 60 },
+ { 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99, -25, 99,
+ -25 },
+ { 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6, 12, -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9, -5, 9 },
+ { -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18, 49, -18,
+ 49 },
+ { 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23, 107, -23,
+ 107, -23 },
+ { 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5, 11, -5 },
+ { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+ { -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15, 38, -15,
+ 38 },
+ { 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21, 114, -21,
+ 114, -21 },
+ { 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4, 10, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1 },
+ { -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6, -3, 6 },
+ { -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11, 28, -11,
+ 28 },
+ { 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17, 120, -17,
+ 120, -17 },
+ { 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18, -8, 18 },
+ { 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12, 124, -12,
+ 124, -12 },
+ { 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3, 5, -3 },
+ { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8, -4, 8 },
+ { 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127, -7, 127,
+ -7 },
+ { 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1, 3, -1 },
+ { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+};
+#endif
diff --git a/av1/common/x86/vp10_convolve_ssse3.c b/av1/common/x86/vp10_convolve_ssse3.c
new file mode 100644
index 0000000..e891d74
--- /dev/null
+++ b/av1/common/x86/vp10_convolve_ssse3.c
@@ -0,0 +1,878 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <tmmintrin.h>
+
+#include "./vp10_rtcd.h"
+#include "av1/common/filter.h"
+
+#define WIDTH_BOUND (16)
+#define HEIGHT_BOUND (16)
+
+static INLINE void transpose_4x8(const __m128i *in, __m128i *out) {
+ __m128i t0, t1;
+
+ t0 = _mm_unpacklo_epi16(in[0], in[1]);
+ t1 = _mm_unpacklo_epi16(in[2], in[3]);
+
+ out[0] = _mm_unpacklo_epi32(t0, t1);
+ out[1] = _mm_srli_si128(out[0], 8);
+ out[2] = _mm_unpackhi_epi32(t0, t1);
+ out[3] = _mm_srli_si128(out[2], 8);
+
+ t0 = _mm_unpackhi_epi16(in[0], in[1]);
+ t1 = _mm_unpackhi_epi16(in[2], in[3]);
+
+ out[4] = _mm_unpacklo_epi32(t0, t1);
+ out[5] = _mm_srli_si128(out[4], 8);
+ // Note: We ignore out[6] and out[7] because
+ // they're zero vectors.
+}
+
+typedef void (*store_pixel_t)(const __m128i *x, uint8_t *dst);
+
+static INLINE __m128i accumulate_store(const __m128i *x, uint8_t *src) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i one = _mm_set1_epi16(1);
+ __m128i y = _mm_loadl_epi64((__m128i const *)src);
+ y = _mm_unpacklo_epi8(y, zero);
+ y = _mm_add_epi16(*x, y);
+ y = _mm_add_epi16(y, one);
+ y = _mm_srai_epi16(y, 1);
+ y = _mm_packus_epi16(y, y);
+ return y;
+}
+
+static INLINE void store_2_pixel_only(const __m128i *x, uint8_t *dst) {
+ uint32_t temp;
+ __m128i u = _mm_packus_epi16(*x, *x);
+ temp = _mm_cvtsi128_si32(u);
+ *(uint16_t *)dst = (uint16_t)temp;
+}
+
+static INLINE void accumulate_store_2_pixel(const __m128i *x, uint8_t *dst) {
+ uint32_t temp;
+ __m128i y = accumulate_store(x, dst);
+ temp = _mm_cvtsi128_si32(y);
+ *(uint16_t *)dst = (uint16_t)temp;
+}
+
+static store_pixel_t store2pixelTab[2] = { store_2_pixel_only,
+ accumulate_store_2_pixel };
+
+static INLINE void store_4_pixel_only(const __m128i *x, uint8_t *dst) {
+ __m128i u = _mm_packus_epi16(*x, *x);
+ *(int *)dst = _mm_cvtsi128_si32(u);
+}
+
+static INLINE void accumulate_store_4_pixel(const __m128i *x, uint8_t *dst) {
+ __m128i y = accumulate_store(x, dst);
+ *(int *)dst = _mm_cvtsi128_si32(y);
+}
+
+static store_pixel_t store4pixelTab[2] = { store_4_pixel_only,
+ accumulate_store_4_pixel };
+
+static void horiz_w4_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
+ store_pixel_t store_func, uint8_t *dst) {
+ __m128i sumPairRow[4];
+ __m128i sumPairCol[8];
+ __m128i pixel;
+ const __m128i k_256 = _mm_set1_epi16(1 << 8);
+ const __m128i zero = _mm_setzero_si128();
+
+ if (10 == tapsNum) {
+ src -= 1;
+ }
+
+ pixel = _mm_loadu_si128((__m128i const *)src);
+ sumPairRow[0] = _mm_maddubs_epi16(pixel, f[0]);
+ sumPairRow[2] = _mm_maddubs_epi16(pixel, f[1]);
+ sumPairRow[2] = _mm_srli_si128(sumPairRow[2], 2);
+
+ pixel = _mm_loadu_si128((__m128i const *)(src + 1));
+ sumPairRow[1] = _mm_maddubs_epi16(pixel, f[0]);
+ sumPairRow[3] = _mm_maddubs_epi16(pixel, f[1]);
+ sumPairRow[3] = _mm_srli_si128(sumPairRow[3], 2);
+
+ transpose_4x8(sumPairRow, sumPairCol);
+
+ sumPairRow[0] = _mm_adds_epi16(sumPairCol[0], sumPairCol[1]);
+ sumPairRow[1] = _mm_adds_epi16(sumPairCol[4], sumPairCol[5]);
+
+ sumPairRow[2] = _mm_min_epi16(sumPairCol[2], sumPairCol[3]);
+ sumPairRow[3] = _mm_max_epi16(sumPairCol[2], sumPairCol[3]);
+
+ sumPairRow[0] = _mm_adds_epi16(sumPairRow[0], sumPairRow[1]);
+ sumPairRow[0] = _mm_adds_epi16(sumPairRow[0], sumPairRow[2]);
+ sumPairRow[0] = _mm_adds_epi16(sumPairRow[0], sumPairRow[3]);
+
+ sumPairRow[1] = _mm_mulhrs_epi16(sumPairRow[0], k_256);
+ sumPairRow[1] = _mm_packus_epi16(sumPairRow[1], sumPairRow[1]);
+ sumPairRow[1] = _mm_unpacklo_epi8(sumPairRow[1], zero);
+
+ store_func(&sumPairRow[1], dst);
+}
+
+static void horiz_w8_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
+ store_pixel_t store, uint8_t *buf) {
+ horiz_w4_ssse3(src, f, tapsNum, store, buf);
+ src += 4;
+ buf += 4;
+ horiz_w4_ssse3(src, f, tapsNum, store, buf);
+}
+
+static void horiz_w16_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
+ store_pixel_t store, uint8_t *buf) {
+ horiz_w8_ssse3(src, f, tapsNum, store, buf);
+ src += 8;
+ buf += 8;
+ horiz_w8_ssse3(src, f, tapsNum, store, buf);
+}
+
+static void horiz_w32_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
+ store_pixel_t store, uint8_t *buf) {
+ horiz_w16_ssse3(src, f, tapsNum, store, buf);
+ src += 16;
+ buf += 16;
+ horiz_w16_ssse3(src, f, tapsNum, store, buf);
+}
+
+static void horiz_w64_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
+ store_pixel_t store, uint8_t *buf) {
+ horiz_w32_ssse3(src, f, tapsNum, store, buf);
+ src += 32;
+ buf += 32;
+ horiz_w32_ssse3(src, f, tapsNum, store, buf);
+}
+
+static void horiz_w128_ssse3(const uint8_t *src, const __m128i *f, int tapsNum,
+ store_pixel_t store, uint8_t *buf) {
+ horiz_w64_ssse3(src, f, tapsNum, store, buf);
+ src += 64;
+ buf += 64;
+ horiz_w64_ssse3(src, f, tapsNum, store, buf);
+}
+
+static void (*horizTab[6])(const uint8_t *, const __m128i *, int, store_pixel_t,
+ uint8_t *) = {
+ horiz_w4_ssse3, horiz_w8_ssse3, horiz_w16_ssse3,
+ horiz_w32_ssse3, horiz_w64_ssse3, horiz_w128_ssse3,
+};
+
+static void filter_horiz_ssse3(const uint8_t *src, __m128i *f, int tapsNum,
+ int width, store_pixel_t store, uint8_t *dst) {
+ switch (width) {
+ // Note:
+ // For width=2 and 4, store function must be different
+ case 2:
+ case 4: horizTab[0](src, f, tapsNum, store, dst); break;
+ case 8: horizTab[1](src, f, tapsNum, store, dst); break;
+ case 16: horizTab[2](src, f, tapsNum, store, dst); break;
+ case 32: horizTab[3](src, f, tapsNum, store, dst); break;
+ case 64: horizTab[4](src, f, tapsNum, store, dst); break;
+ case 128: horizTab[5](src, f, tapsNum, store, dst); break;
+ default: assert(0);
+ }
+}
+
+// Vertical 8-pixel parallel
+typedef void (*transpose_to_dst_t)(const uint16_t *src, int src_stride,
+ uint8_t *dst, int dst_stride);
+
+static INLINE void transpose8x8_direct_to_dst(const uint16_t *src,
+ int src_stride, uint8_t *dst,
+ int dst_stride) {
+ const __m128i k_256 = _mm_set1_epi16(1 << 8);
+ __m128i v0, v1, v2, v3;
+
+ __m128i u0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride));
+ __m128i u1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride));
+ __m128i u2 = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
+ __m128i u3 = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
+ __m128i u4 = _mm_loadu_si128((__m128i const *)(src + 4 * src_stride));
+ __m128i u5 = _mm_loadu_si128((__m128i const *)(src + 5 * src_stride));
+ __m128i u6 = _mm_loadu_si128((__m128i const *)(src + 6 * src_stride));
+ __m128i u7 = _mm_loadu_si128((__m128i const *)(src + 7 * src_stride));
+
+ u0 = _mm_mulhrs_epi16(u0, k_256);
+ u1 = _mm_mulhrs_epi16(u1, k_256);
+ u2 = _mm_mulhrs_epi16(u2, k_256);
+ u3 = _mm_mulhrs_epi16(u3, k_256);
+ u4 = _mm_mulhrs_epi16(u4, k_256);
+ u5 = _mm_mulhrs_epi16(u5, k_256);
+ u6 = _mm_mulhrs_epi16(u6, k_256);
+ u7 = _mm_mulhrs_epi16(u7, k_256);
+
+ v0 = _mm_packus_epi16(u0, u1);
+ v1 = _mm_packus_epi16(u2, u3);
+ v2 = _mm_packus_epi16(u4, u5);
+ v3 = _mm_packus_epi16(u6, u7);
+
+ u0 = _mm_unpacklo_epi8(v0, v1);
+ u1 = _mm_unpackhi_epi8(v0, v1);
+ u2 = _mm_unpacklo_epi8(v2, v3);
+ u3 = _mm_unpackhi_epi8(v2, v3);
+
+ u4 = _mm_unpacklo_epi8(u0, u1);
+ u5 = _mm_unpacklo_epi8(u2, u3);
+ u6 = _mm_unpackhi_epi8(u0, u1);
+ u7 = _mm_unpackhi_epi8(u2, u3);
+
+ u0 = _mm_unpacklo_epi32(u4, u5);
+ u1 = _mm_unpackhi_epi32(u4, u5);
+ u2 = _mm_unpacklo_epi32(u6, u7);
+ u3 = _mm_unpackhi_epi32(u6, u7);
+
+ u4 = _mm_srli_si128(u0, 8);
+ u5 = _mm_srli_si128(u1, 8);
+ u6 = _mm_srli_si128(u2, 8);
+ u7 = _mm_srli_si128(u3, 8);
+
+ _mm_storel_epi64((__m128i *)dst, u0);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 1), u4);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 2), u1);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 3), u5);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 4), u2);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 5), u6);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 6), u3);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 7), u7);
+}
+
+static INLINE void transpose8x8_accumu_to_dst(const uint16_t *src,
+ int src_stride, uint8_t *dst,
+ int dst_stride) {
+ const __m128i k_256 = _mm_set1_epi16(1 << 8);
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i one = _mm_set1_epi16(1);
+ __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+
+ __m128i u0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride));
+ __m128i u1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride));
+ __m128i u2 = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
+ __m128i u3 = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
+ __m128i u4 = _mm_loadu_si128((__m128i const *)(src + 4 * src_stride));
+ __m128i u5 = _mm_loadu_si128((__m128i const *)(src + 5 * src_stride));
+ __m128i u6 = _mm_loadu_si128((__m128i const *)(src + 6 * src_stride));
+ __m128i u7 = _mm_loadu_si128((__m128i const *)(src + 7 * src_stride));
+
+ u0 = _mm_mulhrs_epi16(u0, k_256);
+ u1 = _mm_mulhrs_epi16(u1, k_256);
+ u2 = _mm_mulhrs_epi16(u2, k_256);
+ u3 = _mm_mulhrs_epi16(u3, k_256);
+ u4 = _mm_mulhrs_epi16(u4, k_256);
+ u5 = _mm_mulhrs_epi16(u5, k_256);
+ u6 = _mm_mulhrs_epi16(u6, k_256);
+ u7 = _mm_mulhrs_epi16(u7, k_256);
+
+ v0 = _mm_packus_epi16(u0, u1);
+ v1 = _mm_packus_epi16(u2, u3);
+ v2 = _mm_packus_epi16(u4, u5);
+ v3 = _mm_packus_epi16(u6, u7);
+
+ u0 = _mm_unpacklo_epi8(v0, v1);
+ u1 = _mm_unpackhi_epi8(v0, v1);
+ u2 = _mm_unpacklo_epi8(v2, v3);
+ u3 = _mm_unpackhi_epi8(v2, v3);
+
+ u4 = _mm_unpacklo_epi8(u0, u1);
+ u5 = _mm_unpacklo_epi8(u2, u3);
+ u6 = _mm_unpackhi_epi8(u0, u1);
+ u7 = _mm_unpackhi_epi8(u2, u3);
+
+ u0 = _mm_unpacklo_epi32(u4, u5);
+ u1 = _mm_unpackhi_epi32(u4, u5);
+ u2 = _mm_unpacklo_epi32(u6, u7);
+ u3 = _mm_unpackhi_epi32(u6, u7);
+
+ u4 = _mm_srli_si128(u0, 8);
+ u5 = _mm_srli_si128(u1, 8);
+ u6 = _mm_srli_si128(u2, 8);
+ u7 = _mm_srli_si128(u3, 8);
+
+ v0 = _mm_loadl_epi64((__m128i const *)(dst + 0 * dst_stride));
+ v1 = _mm_loadl_epi64((__m128i const *)(dst + 1 * dst_stride));
+ v2 = _mm_loadl_epi64((__m128i const *)(dst + 2 * dst_stride));
+ v3 = _mm_loadl_epi64((__m128i const *)(dst + 3 * dst_stride));
+ v4 = _mm_loadl_epi64((__m128i const *)(dst + 4 * dst_stride));
+ v5 = _mm_loadl_epi64((__m128i const *)(dst + 5 * dst_stride));
+ v6 = _mm_loadl_epi64((__m128i const *)(dst + 6 * dst_stride));
+ v7 = _mm_loadl_epi64((__m128i const *)(dst + 7 * dst_stride));
+
+ u0 = _mm_unpacklo_epi8(u0, zero);
+ u1 = _mm_unpacklo_epi8(u1, zero);
+ u2 = _mm_unpacklo_epi8(u2, zero);
+ u3 = _mm_unpacklo_epi8(u3, zero);
+ u4 = _mm_unpacklo_epi8(u4, zero);
+ u5 = _mm_unpacklo_epi8(u5, zero);
+ u6 = _mm_unpacklo_epi8(u6, zero);
+ u7 = _mm_unpacklo_epi8(u7, zero);
+
+ v0 = _mm_unpacklo_epi8(v0, zero);
+ v1 = _mm_unpacklo_epi8(v1, zero);
+ v2 = _mm_unpacklo_epi8(v2, zero);
+ v3 = _mm_unpacklo_epi8(v3, zero);
+ v4 = _mm_unpacklo_epi8(v4, zero);
+ v5 = _mm_unpacklo_epi8(v5, zero);
+ v6 = _mm_unpacklo_epi8(v6, zero);
+ v7 = _mm_unpacklo_epi8(v7, zero);
+
+ v0 = _mm_adds_epi16(u0, v0);
+ v1 = _mm_adds_epi16(u4, v1);
+ v2 = _mm_adds_epi16(u1, v2);
+ v3 = _mm_adds_epi16(u5, v3);
+ v4 = _mm_adds_epi16(u2, v4);
+ v5 = _mm_adds_epi16(u6, v5);
+ v6 = _mm_adds_epi16(u3, v6);
+ v7 = _mm_adds_epi16(u7, v7);
+
+ v0 = _mm_adds_epi16(v0, one);
+ v1 = _mm_adds_epi16(v1, one);
+ v2 = _mm_adds_epi16(v2, one);
+ v3 = _mm_adds_epi16(v3, one);
+ v4 = _mm_adds_epi16(v4, one);
+ v5 = _mm_adds_epi16(v5, one);
+ v6 = _mm_adds_epi16(v6, one);
+ v7 = _mm_adds_epi16(v7, one);
+
+ v0 = _mm_srai_epi16(v0, 1);
+ v1 = _mm_srai_epi16(v1, 1);
+ v2 = _mm_srai_epi16(v2, 1);
+ v3 = _mm_srai_epi16(v3, 1);
+ v4 = _mm_srai_epi16(v4, 1);
+ v5 = _mm_srai_epi16(v5, 1);
+ v6 = _mm_srai_epi16(v6, 1);
+ v7 = _mm_srai_epi16(v7, 1);
+
+ u0 = _mm_packus_epi16(v0, v1);
+ u1 = _mm_packus_epi16(v2, v3);
+ u2 = _mm_packus_epi16(v4, v5);
+ u3 = _mm_packus_epi16(v6, v7);
+
+ u4 = _mm_srli_si128(u0, 8);
+ u5 = _mm_srli_si128(u1, 8);
+ u6 = _mm_srli_si128(u2, 8);
+ u7 = _mm_srli_si128(u3, 8);
+
+ _mm_storel_epi64((__m128i *)dst, u0);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 1), u4);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 2), u1);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 3), u5);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 4), u2);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 5), u6);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 6), u3);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride * 7), u7);
+}
+
+static transpose_to_dst_t trans8x8Tab[2] = { transpose8x8_direct_to_dst,
+ transpose8x8_accumu_to_dst };
+
+static INLINE void transpose_8x16(const __m128i *in, __m128i *out) {
+ __m128i t0, t1, t2, t3, u0, u1;
+
+ t0 = _mm_unpacklo_epi16(in[0], in[1]);
+ t1 = _mm_unpacklo_epi16(in[2], in[3]);
+ t2 = _mm_unpacklo_epi16(in[4], in[5]);
+ t3 = _mm_unpacklo_epi16(in[6], in[7]);
+
+ u0 = _mm_unpacklo_epi32(t0, t1);
+ u1 = _mm_unpacklo_epi32(t2, t3);
+
+ out[0] = _mm_unpacklo_epi64(u0, u1);
+ out[1] = _mm_unpackhi_epi64(u0, u1);
+
+ u0 = _mm_unpackhi_epi32(t0, t1);
+ u1 = _mm_unpackhi_epi32(t2, t3);
+
+ out[2] = _mm_unpacklo_epi64(u0, u1);
+ out[3] = _mm_unpackhi_epi64(u0, u1);
+
+ t0 = _mm_unpackhi_epi16(in[0], in[1]);
+ t1 = _mm_unpackhi_epi16(in[2], in[3]);
+ t2 = _mm_unpackhi_epi16(in[4], in[5]);
+ t3 = _mm_unpackhi_epi16(in[6], in[7]);
+
+ u0 = _mm_unpacklo_epi32(t0, t1);
+ u1 = _mm_unpacklo_epi32(t2, t3);
+
+ out[4] = _mm_unpacklo_epi64(u0, u1);
+ out[5] = _mm_unpackhi_epi64(u0, u1);
+
+ // Ignore out[6] and out[7]
+ // they're zero vectors.
+}
+
+static void filter_horiz_v8p_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch,
+ __m128i *f, int tapsNum, uint16_t *buf) {
+ __m128i s[8], t[6];
+ __m128i min_x2x3, max_x2x3;
+ __m128i temp;
+
+ if (tapsNum == 10) {
+ src_ptr -= 1;
+ }
+ s[0] = _mm_loadu_si128((const __m128i *)src_ptr);
+ s[1] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch));
+ s[2] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2));
+ s[3] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3));
+ s[4] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4));
+ s[5] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5));
+ s[6] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6));
+ s[7] = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7));
+
+ // TRANSPOSE...
+ // Vecotor represents column pixel pairs instead of a row
+ transpose_8x16(s, t);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ s[0] = _mm_maddubs_epi16(t[0], f[0]);
+ s[1] = _mm_maddubs_epi16(t[1], f[1]);
+ s[2] = _mm_maddubs_epi16(t[2], f[2]);
+ s[3] = _mm_maddubs_epi16(t[3], f[3]);
+ s[4] = _mm_maddubs_epi16(t[4], f[4]);
+ s[5] = _mm_maddubs_epi16(t[5], f[5]);
+
+ // add and saturate the results together
+ min_x2x3 = _mm_min_epi16(s[2], s[3]);
+ max_x2x3 = _mm_max_epi16(s[2], s[3]);
+ temp = _mm_adds_epi16(s[0], s[1]);
+ temp = _mm_adds_epi16(temp, s[5]);
+ temp = _mm_adds_epi16(temp, s[4]);
+
+ temp = _mm_adds_epi16(temp, min_x2x3);
+ temp = _mm_adds_epi16(temp, max_x2x3);
+
+ _mm_storeu_si128((__m128i *)buf, temp);
+}
+
+// Vertical 4-pixel parallel
+static INLINE void transpose4x4_direct_to_dst(const uint16_t *src,
+ int src_stride, uint8_t *dst,
+ int dst_stride) {
+ const __m128i k_256 = _mm_set1_epi16(1 << 8);
+ __m128i v0, v1, v2, v3;
+
+ // TODO(luoyi): two loads, 8 elements per load (two bytes per element)
+ __m128i u0 = _mm_loadl_epi64((__m128i const *)(src + 0 * src_stride));
+ __m128i u1 = _mm_loadl_epi64((__m128i const *)(src + 1 * src_stride));
+ __m128i u2 = _mm_loadl_epi64((__m128i const *)(src + 2 * src_stride));
+ __m128i u3 = _mm_loadl_epi64((__m128i const *)(src + 3 * src_stride));
+
+ v0 = _mm_unpacklo_epi16(u0, u1);
+ v1 = _mm_unpacklo_epi16(u2, u3);
+
+ v2 = _mm_unpacklo_epi32(v0, v1);
+ v3 = _mm_unpackhi_epi32(v0, v1);
+
+ u0 = _mm_mulhrs_epi16(v2, k_256);
+ u1 = _mm_mulhrs_epi16(v3, k_256);
+
+ u0 = _mm_packus_epi16(u0, u1);
+ u1 = _mm_srli_si128(u0, 4);
+ u2 = _mm_srli_si128(u0, 8);
+ u3 = _mm_srli_si128(u0, 12);
+
+ *(int *)(dst) = _mm_cvtsi128_si32(u0);
+ *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u1);
+ *(int *)(dst + dst_stride * 2) = _mm_cvtsi128_si32(u2);
+ *(int *)(dst + dst_stride * 3) = _mm_cvtsi128_si32(u3);
+}
+
+static INLINE void transpose4x4_accumu_to_dst(const uint16_t *src,
+ int src_stride, uint8_t *dst,
+ int dst_stride) {
+ const __m128i k_256 = _mm_set1_epi16(1 << 8);
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i one = _mm_set1_epi16(1);
+
+ __m128i v0, v1, v2, v3;
+
+ __m128i u0 = _mm_loadl_epi64((__m128i const *)(src));
+ __m128i u1 = _mm_loadl_epi64((__m128i const *)(src + src_stride));
+ __m128i u2 = _mm_loadl_epi64((__m128i const *)(src + 2 * src_stride));
+ __m128i u3 = _mm_loadl_epi64((__m128i const *)(src + 3 * src_stride));
+
+ v0 = _mm_unpacklo_epi16(u0, u1);
+ v1 = _mm_unpacklo_epi16(u2, u3);
+
+ v2 = _mm_unpacklo_epi32(v0, v1);
+ v3 = _mm_unpackhi_epi32(v0, v1);
+
+ u0 = _mm_mulhrs_epi16(v2, k_256);
+ u1 = _mm_mulhrs_epi16(v3, k_256);
+
+ u2 = _mm_packus_epi16(u0, u1);
+ u0 = _mm_unpacklo_epi8(u2, zero);
+ u1 = _mm_unpackhi_epi8(u2, zero);
+
+ // load pixel values
+ v0 = _mm_loadl_epi64((__m128i const *)(dst));
+ v1 = _mm_loadl_epi64((__m128i const *)(dst + dst_stride));
+ v2 = _mm_loadl_epi64((__m128i const *)(dst + 2 * dst_stride));
+ v3 = _mm_loadl_epi64((__m128i const *)(dst + 3 * dst_stride));
+
+ v0 = _mm_unpacklo_epi8(v0, zero);
+ v1 = _mm_unpacklo_epi8(v1, zero);
+ v2 = _mm_unpacklo_epi8(v2, zero);
+ v3 = _mm_unpacklo_epi8(v3, zero);
+
+ v0 = _mm_unpacklo_epi64(v0, v1);
+ v1 = _mm_unpacklo_epi64(v2, v3);
+
+ u0 = _mm_adds_epi16(u0, v0);
+ u1 = _mm_adds_epi16(u1, v1);
+
+ u0 = _mm_adds_epi16(u0, one);
+ u1 = _mm_adds_epi16(u1, one);
+
+ u0 = _mm_srai_epi16(u0, 1);
+ u1 = _mm_srai_epi16(u1, 1);
+
+ // saturation and pack to pixels
+ u0 = _mm_packus_epi16(u0, u1);
+ u1 = _mm_srli_si128(u0, 4);
+ u2 = _mm_srli_si128(u0, 8);
+ u3 = _mm_srli_si128(u0, 12);
+
+ *(int *)(dst) = _mm_cvtsi128_si32(u0);
+ *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u1);
+ *(int *)(dst + dst_stride * 2) = _mm_cvtsi128_si32(u2);
+ *(int *)(dst + dst_stride * 3) = _mm_cvtsi128_si32(u3);
+}
+
+static transpose_to_dst_t trans4x4Tab[2] = { transpose4x4_direct_to_dst,
+ transpose4x4_accumu_to_dst };
+
+static void filter_horiz_v4p_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch,
+ __m128i *f, int tapsNum, uint16_t *buf) {
+ __m128i A, B, C, D;
+ __m128i tr0_0, tr0_1, s1s0, s3s2, s5s4, s7s6, s9s8, sbsa;
+ __m128i x0, x1, x2, x3, x4, x5;
+ __m128i min_x2x3, max_x2x3, temp;
+
+ if (tapsNum == 10) {
+ src_ptr -= 1;
+ }
+ A = _mm_loadu_si128((const __m128i *)src_ptr);
+ B = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch));
+ C = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2));
+ D = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3));
+
+ // TRANSPOSE...
+ // Vecotor represents column pixel pairs instead of a row
+ // 00 01 10 11 02 03 12 13 04 05 14 15 06 07 16 17
+ tr0_0 = _mm_unpacklo_epi16(A, B);
+ // 20 21 30 31 22 23 32 33 24 25 34 35 26 27 36 37
+ tr0_1 = _mm_unpacklo_epi16(C, D);
+ // 00 01 10 11 20 21 30 31 02 03 12 13 22 23 32 33
+ s1s0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ // 04 05 14 15 24 25 34 35 06 07 16 17 26 27 36 37
+ s5s4 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ // 02 03 12 13 22 23 32 33
+ s3s2 = _mm_srli_si128(s1s0, 8);
+ // 06 07 16 17 26 27 36 37
+ s7s6 = _mm_srli_si128(s5s4, 8);
+
+ tr0_0 = _mm_unpackhi_epi16(A, B);
+ tr0_1 = _mm_unpackhi_epi16(C, D);
+ s9s8 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ sbsa = _mm_srli_si128(s9s8, 8);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ x0 = _mm_maddubs_epi16(s1s0, f[0]);
+ x1 = _mm_maddubs_epi16(s3s2, f[1]);
+ x2 = _mm_maddubs_epi16(s5s4, f[2]);
+ x3 = _mm_maddubs_epi16(s7s6, f[3]);
+ x4 = _mm_maddubs_epi16(s9s8, f[4]);
+ x5 = _mm_maddubs_epi16(sbsa, f[5]);
+ // add and saturate the results together
+ min_x2x3 = _mm_min_epi16(x2, x3);
+ max_x2x3 = _mm_max_epi16(x2, x3);
+ temp = _mm_adds_epi16(x0, x1);
+ temp = _mm_adds_epi16(temp, x5);
+ temp = _mm_adds_epi16(temp, x4);
+
+ temp = _mm_adds_epi16(temp, min_x2x3);
+ temp = _mm_adds_epi16(temp, max_x2x3);
+ _mm_storel_epi64((__m128i *)buf, temp);
+}
+
+// Note:
+// This function assumes:
+// (1) 10/12-taps filters
+// (2) x_step_q4 = 16 then filter is fixed at the call
+
+void vp10_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x_q4, int x_step_q4, int avg) {
+ DECLARE_ALIGNED(16, uint16_t, temp[8 * 8]);
+ __m128i verf[6];
+ __m128i horf[2];
+ SubpelFilterCoeffs hCoeffs, vCoeffs;
+ const uint8_t *src_ptr;
+ store_pixel_t store2p = store2pixelTab[avg];
+ store_pixel_t store4p = store4pixelTab[avg];
+ transpose_to_dst_t transpose_4x4 = trans4x4Tab[avg];
+ transpose_to_dst_t transpose_8x8 = trans8x8Tab[avg];
+
+ const int tapsNum = filter_params.taps;
+ int block_height, block_residu;
+ int i, col, count;
+ (void)x_step_q4;
+
+ if (0 == subpel_x_q4 || 16 != x_step_q4) {
+ vp10_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params,
+ subpel_x_q4, x_step_q4, avg);
+ return;
+ }
+
+ hCoeffs = vp10_get_subpel_filter_signal_dir(filter_params, subpel_x_q4 - 1);
+ vCoeffs =
+ vp10_get_subpel_filter_ver_signal_dir(filter_params, subpel_x_q4 - 1);
+
+ if (!hCoeffs || !vCoeffs) {
+ vp10_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params,
+ subpel_x_q4, x_step_q4, avg);
+ return;
+ }
+
+ verf[0] = *((const __m128i *)(vCoeffs));
+ verf[1] = *((const __m128i *)(vCoeffs + 1));
+ verf[2] = *((const __m128i *)(vCoeffs + 2));
+ verf[3] = *((const __m128i *)(vCoeffs + 3));
+ verf[4] = *((const __m128i *)(vCoeffs + 4));
+ verf[5] = *((const __m128i *)(vCoeffs + 5));
+
+ horf[0] = *((const __m128i *)(hCoeffs));
+ horf[1] = *((const __m128i *)(hCoeffs + 1));
+
+ count = 0;
+
+ // here tapsNum is filter size
+ src -= (tapsNum >> 1) - 1;
+ src_ptr = src;
+ if (w > WIDTH_BOUND && h > HEIGHT_BOUND) {
+ // 8-pixels parallel
+ block_height = h >> 3;
+ block_residu = h & 7;
+
+ do {
+ for (col = 0; col < w; col += 8) {
+ for (i = 0; i < 8; ++i) {
+ filter_horiz_v8p_ssse3(src_ptr, src_stride, verf, tapsNum,
+ temp + (i * 8));
+ src_ptr += 1;
+ }
+ transpose_8x8(temp, 8, dst + col, dst_stride);
+ }
+ count++;
+ src_ptr = src + count * src_stride * 8;
+ dst += dst_stride * 8;
+ } while (count < block_height);
+
+ for (i = 0; i < block_residu; ++i) {
+ filter_horiz_ssse3(src_ptr, horf, tapsNum, w, store4p, dst);
+ src_ptr += src_stride;
+ dst += dst_stride;
+ }
+ } else {
+ if (w > 2) {
+ // 4-pixels parallel
+ block_height = h >> 2;
+ block_residu = h & 3;
+
+ do {
+ for (col = 0; col < w; col += 4) {
+ for (i = 0; i < 4; ++i) {
+ filter_horiz_v4p_ssse3(src_ptr, src_stride, verf, tapsNum,
+ temp + (i * 4));
+ src_ptr += 1;
+ }
+ transpose_4x4(temp, 4, dst + col, dst_stride);
+ }
+ count++;
+ src_ptr = src + count * src_stride * 4;
+ dst += dst_stride * 4;
+ } while (count < block_height);
+
+ for (i = 0; i < block_residu; ++i) {
+ filter_horiz_ssse3(src_ptr, horf, tapsNum, w, store4p, dst);
+ src_ptr += src_stride;
+ dst += dst_stride;
+ }
+ } else {
+ for (i = 0; i < h; i++) {
+ filter_horiz_ssse3(src_ptr, horf, tapsNum, w, store2p, dst);
+ src_ptr += src_stride;
+ dst += dst_stride;
+ }
+ }
+ }
+}
+
+// Vertical convolution filtering
+static INLINE void store_8_pixel_only(const __m128i *x, uint8_t *dst) {
+ __m128i u = _mm_packus_epi16(*x, *x);
+ _mm_storel_epi64((__m128i *)dst, u);
+}
+
+static INLINE void accumulate_store_8_pixel(const __m128i *x, uint8_t *dst) {
+ __m128i y = accumulate_store(x, dst);
+ _mm_storel_epi64((__m128i *)dst, y);
+}
+
+static store_pixel_t store8pixelTab[2] = { store_8_pixel_only,
+ accumulate_store_8_pixel };
+
+static __m128i filter_vert_ssse3(const uint8_t *src, int src_stride,
+ int tapsNum, __m128i *f) {
+ __m128i s[12];
+ const __m128i k_256 = _mm_set1_epi16(1 << 8);
+ const __m128i zero = _mm_setzero_si128();
+ __m128i min_x2x3, max_x2x3, sum;
+ int i = 0;
+ int r = 0;
+
+ if (10 == tapsNum) {
+ i += 1;
+ s[0] = zero;
+ }
+ while (i < 12) {
+ s[i] = _mm_loadu_si128((__m128i const *)(src + r * src_stride));
+ i += 1;
+ r += 1;
+ }
+
+ s[0] = _mm_unpacklo_epi8(s[0], s[1]);
+ s[2] = _mm_unpacklo_epi8(s[2], s[3]);
+ s[4] = _mm_unpacklo_epi8(s[4], s[5]);
+ s[6] = _mm_unpacklo_epi8(s[6], s[7]);
+ s[8] = _mm_unpacklo_epi8(s[8], s[9]);
+ s[10] = _mm_unpacklo_epi8(s[10], s[11]);
+
+ s[0] = _mm_maddubs_epi16(s[0], f[0]);
+ s[2] = _mm_maddubs_epi16(s[2], f[1]);
+ s[4] = _mm_maddubs_epi16(s[4], f[2]);
+ s[6] = _mm_maddubs_epi16(s[6], f[3]);
+ s[8] = _mm_maddubs_epi16(s[8], f[4]);
+ s[10] = _mm_maddubs_epi16(s[10], f[5]);
+
+ min_x2x3 = _mm_min_epi16(s[4], s[6]);
+ max_x2x3 = _mm_max_epi16(s[4], s[6]);
+ sum = _mm_adds_epi16(s[0], s[2]);
+ sum = _mm_adds_epi16(sum, s[10]);
+ sum = _mm_adds_epi16(sum, s[8]);
+
+ sum = _mm_adds_epi16(sum, min_x2x3);
+ sum = _mm_adds_epi16(sum, max_x2x3);
+
+ sum = _mm_mulhrs_epi16(sum, k_256);
+ sum = _mm_packus_epi16(sum, sum);
+ sum = _mm_unpacklo_epi8(sum, zero);
+ return sum;
+}
+
+static void filter_vert_horiz_parallel_ssse3(const uint8_t *src, int src_stride,
+ __m128i *f, int tapsNum,
+ store_pixel_t store_func,
+ uint8_t *dst) {
+ __m128i sum = filter_vert_ssse3(src, src_stride, tapsNum, f);
+ store_func(&sum, dst);
+}
+
+static void filter_vert_compute_small(const uint8_t *src, int src_stride,
+ __m128i *f, int tapsNum,
+ store_pixel_t store_func, int h,
+ uint8_t *dst, int dst_stride) {
+ int rowIndex = 0;
+ do {
+ filter_vert_horiz_parallel_ssse3(src, src_stride, f, tapsNum, store_func,
+ dst);
+ rowIndex++;
+ src += src_stride;
+ dst += dst_stride;
+ } while (rowIndex < h);
+}
+
+static void filter_vert_compute_large(const uint8_t *src, int src_stride,
+ __m128i *f, int tapsNum,
+ store_pixel_t store_func, int w, int h,
+ uint8_t *dst, int dst_stride) {
+ int col;
+ int rowIndex = 0;
+ const uint8_t *src_ptr = src;
+ uint8_t *dst_ptr = dst;
+
+ do {
+ for (col = 0; col < w; col += 8) {
+ filter_vert_horiz_parallel_ssse3(src_ptr, src_stride, f, tapsNum,
+ store_func, dst_ptr);
+ src_ptr += 8;
+ dst_ptr += 8;
+ }
+ rowIndex++;
+ src_ptr = src + rowIndex * src_stride;
+ dst_ptr = dst + rowIndex * dst_stride;
+ } while (rowIndex < h);
+}
+
+void vp10_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, int w, int h,
+ const InterpFilterParams filter_params,
+ const int subpel_y_q4, int y_step_q4, int avg) {
+ __m128i verf[6];
+ SubpelFilterCoeffs vCoeffs;
+ const uint8_t *src_ptr;
+ uint8_t *dst_ptr = dst;
+ store_pixel_t store2p = store2pixelTab[avg];
+ store_pixel_t store4p = store4pixelTab[avg];
+ store_pixel_t store8p = store8pixelTab[avg];
+ const int tapsNum = filter_params.taps;
+
+ if (0 == subpel_y_q4 || 16 != y_step_q4) {
+ vp10_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
+ subpel_y_q4, y_step_q4, avg);
+ return;
+ }
+
+ vCoeffs =
+ vp10_get_subpel_filter_ver_signal_dir(filter_params, subpel_y_q4 - 1);
+
+ if (!vCoeffs) {
+ vp10_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
+ subpel_y_q4, y_step_q4, avg);
+ return;
+ }
+
+ verf[0] = *((const __m128i *)(vCoeffs));
+ verf[1] = *((const __m128i *)(vCoeffs + 1));
+ verf[2] = *((const __m128i *)(vCoeffs + 2));
+ verf[3] = *((const __m128i *)(vCoeffs + 3));
+ verf[4] = *((const __m128i *)(vCoeffs + 4));
+ verf[5] = *((const __m128i *)(vCoeffs + 5));
+
+ src -= src_stride * ((tapsNum >> 1) - 1);
+ src_ptr = src;
+
+ if (w > 4) {
+ filter_vert_compute_large(src_ptr, src_stride, verf, tapsNum, store8p, w, h,
+ dst_ptr, dst_stride);
+ } else if (4 == w) {
+ filter_vert_compute_small(src_ptr, src_stride, verf, tapsNum, store4p, h,
+ dst_ptr, dst_stride);
+ } else if (2 == w) {
+ filter_vert_compute_small(src_ptr, src_stride, verf, tapsNum, store2p, h,
+ dst_ptr, dst_stride);
+ } else {
+ assert(0);
+ }
+}
diff --git a/av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h b/av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h
new file mode 100644
index 0000000..e7d63fe
--- /dev/null
+++ b/av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h
@@ -0,0 +1,3201 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h> // SSE2
+
+#include "./vp10_rtcd.h"
+#include "av1/common/vp10_fwd_txfm.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+
+// TODO(jingning) The high bit-depth version needs re-work for performance.
+// The current SSE2 implementation also causes cross reference to the static
+// functions in the C implementation file.
+#if DCT_HIGH_BIT_DEPTH
+#define ADD_EPI16 _mm_adds_epi16
+#define SUB_EPI16 _mm_subs_epi16
+#if FDCT32x32_HIGH_PRECISION
+void vp10_fdct32x32_rows_c(const int16_t *intermediate, tran_low_t *out) {
+ int i, j;
+ for (i = 0; i < 32; ++i) {
+ tran_high_t temp_in[32], temp_out[32];
+ for (j = 0; j < 32; ++j) temp_in[j] = intermediate[j * 32 + i];
+ vp10_fdct32(temp_in, temp_out, 0);
+ for (j = 0; j < 32; ++j)
+ out[j + i * 32] =
+ (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
+ }
+}
+#define HIGH_FDCT32x32_2D_C vp10_highbd_fdct32x32_c
+#define HIGH_FDCT32x32_2D_ROWS_C vp10_fdct32x32_rows_c
+#else
+void vp10_fdct32x32_rd_rows_c(const int16_t *intermediate, tran_low_t *out) {
+ int i, j;
+ for (i = 0; i < 32; ++i) {
+ tran_high_t temp_in[32], temp_out[32];
+ for (j = 0; j < 32; ++j) temp_in[j] = intermediate[j * 32 + i];
+ vp10_fdct32(temp_in, temp_out, 1);
+ for (j = 0; j < 32; ++j) out[j + i * 32] = (tran_low_t)temp_out[j];
+ }
+}
+#define HIGH_FDCT32x32_2D_C vp10_highbd_fdct32x32_rd_c
+#define HIGH_FDCT32x32_2D_ROWS_C vp10_fdct32x32_rd_rows_c
+#endif // FDCT32x32_HIGH_PRECISION
+#else
+#define ADD_EPI16 _mm_add_epi16
+#define SUB_EPI16 _mm_sub_epi16
+#endif // DCT_HIGH_BIT_DEPTH
+
+void FDCT32x32_2D(const int16_t *input, tran_low_t *output_org, int stride) {
+ // Calculate pre-multiplied strides
+ const int str1 = stride;
+ const int str2 = 2 * stride;
+ const int str3 = 2 * stride + str1;
+ // We need an intermediate buffer between passes.
+ DECLARE_ALIGNED(16, int16_t, intermediate[32 * 32]);
+ // Constants
+ // When we use them, in one case, they are all the same. In all others
+ // it's a pair of them that we need to repeat four times. This is done
+ // by constructing the 32 bit constant corresponding to that pair.
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(+cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+ const __m128i k__cospi_p24_p08 = pair_set_epi16(+cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_p12_p20 = pair_set_epi16(+cospi_12_64, cospi_20_64);
+ const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p28_p04 = pair_set_epi16(+cospi_28_64, cospi_4_64);
+ const __m128i k__cospi_m28_m04 = pair_set_epi16(-cospi_28_64, -cospi_4_64);
+ const __m128i k__cospi_m12_m20 = pair_set_epi16(-cospi_12_64, -cospi_20_64);
+ const __m128i k__cospi_p30_p02 = pair_set_epi16(+cospi_30_64, cospi_2_64);
+ const __m128i k__cospi_p14_p18 = pair_set_epi16(+cospi_14_64, cospi_18_64);
+ const __m128i k__cospi_p22_p10 = pair_set_epi16(+cospi_22_64, cospi_10_64);
+ const __m128i k__cospi_p06_p26 = pair_set_epi16(+cospi_6_64, cospi_26_64);
+ const __m128i k__cospi_m26_p06 = pair_set_epi16(-cospi_26_64, cospi_6_64);
+ const __m128i k__cospi_m10_p22 = pair_set_epi16(-cospi_10_64, cospi_22_64);
+ const __m128i k__cospi_m18_p14 = pair_set_epi16(-cospi_18_64, cospi_14_64);
+ const __m128i k__cospi_m02_p30 = pair_set_epi16(-cospi_2_64, cospi_30_64);
+ const __m128i k__cospi_p31_p01 = pair_set_epi16(+cospi_31_64, cospi_1_64);
+ const __m128i k__cospi_p15_p17 = pair_set_epi16(+cospi_15_64, cospi_17_64);
+ const __m128i k__cospi_p23_p09 = pair_set_epi16(+cospi_23_64, cospi_9_64);
+ const __m128i k__cospi_p07_p25 = pair_set_epi16(+cospi_7_64, cospi_25_64);
+ const __m128i k__cospi_m25_p07 = pair_set_epi16(-cospi_25_64, cospi_7_64);
+ const __m128i k__cospi_m09_p23 = pair_set_epi16(-cospi_9_64, cospi_23_64);
+ const __m128i k__cospi_m17_p15 = pair_set_epi16(-cospi_17_64, cospi_15_64);
+ const __m128i k__cospi_m01_p31 = pair_set_epi16(-cospi_1_64, cospi_31_64);
+ const __m128i k__cospi_p27_p05 = pair_set_epi16(+cospi_27_64, cospi_5_64);
+ const __m128i k__cospi_p11_p21 = pair_set_epi16(+cospi_11_64, cospi_21_64);
+ const __m128i k__cospi_p19_p13 = pair_set_epi16(+cospi_19_64, cospi_13_64);
+ const __m128i k__cospi_p03_p29 = pair_set_epi16(+cospi_3_64, cospi_29_64);
+ const __m128i k__cospi_m29_p03 = pair_set_epi16(-cospi_29_64, cospi_3_64);
+ const __m128i k__cospi_m13_p19 = pair_set_epi16(-cospi_13_64, cospi_19_64);
+ const __m128i k__cospi_m21_p11 = pair_set_epi16(-cospi_21_64, cospi_11_64);
+ const __m128i k__cospi_m05_p27 = pair_set_epi16(-cospi_5_64, cospi_27_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i kZero = _mm_set1_epi16(0);
+ const __m128i kOne = _mm_set1_epi16(1);
+ // Do the two transform/transpose passes
+ int pass;
+#if DCT_HIGH_BIT_DEPTH
+ int overflow;
+#endif
+ for (pass = 0; pass < 2; ++pass) {
+ // We process eight columns (transposed rows in second pass) at a time.
+ int column_start;
+ for (column_start = 0; column_start < 32; column_start += 8) {
+ __m128i step1[32];
+ __m128i step2[32];
+ __m128i step3[32];
+ __m128i out[32];
+ // Stage 1
+ // Note: even though all the loads below are aligned, using the aligned
+ // intrinsic make the code slightly slower.
+ if (0 == pass) {
+ const int16_t *in = &input[column_start];
+ // step1[i] = (in[ 0 * stride] + in[(32 - 1) * stride]) << 2;
+ // Note: the next four blocks could be in a loop. That would help the
+ // instruction cache but is actually slower.
+ {
+ const int16_t *ina = in + 0 * str1;
+ const int16_t *inb = in + 31 * str1;
+ __m128i *step1a = &step1[0];
+ __m128i *step1b = &step1[31];
+ const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina));
+ const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1));
+ const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2));
+ const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3));
+ const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3));
+ const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2));
+ const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1));
+ const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb));
+ step1a[0] = _mm_add_epi16(ina0, inb0);
+ step1a[1] = _mm_add_epi16(ina1, inb1);
+ step1a[2] = _mm_add_epi16(ina2, inb2);
+ step1a[3] = _mm_add_epi16(ina3, inb3);
+ step1b[-3] = _mm_sub_epi16(ina3, inb3);
+ step1b[-2] = _mm_sub_epi16(ina2, inb2);
+ step1b[-1] = _mm_sub_epi16(ina1, inb1);
+ step1b[-0] = _mm_sub_epi16(ina0, inb0);
+ step1a[0] = _mm_slli_epi16(step1a[0], 2);
+ step1a[1] = _mm_slli_epi16(step1a[1], 2);
+ step1a[2] = _mm_slli_epi16(step1a[2], 2);
+ step1a[3] = _mm_slli_epi16(step1a[3], 2);
+ step1b[-3] = _mm_slli_epi16(step1b[-3], 2);
+ step1b[-2] = _mm_slli_epi16(step1b[-2], 2);
+ step1b[-1] = _mm_slli_epi16(step1b[-1], 2);
+ step1b[-0] = _mm_slli_epi16(step1b[-0], 2);
+ }
+ {
+ const int16_t *ina = in + 4 * str1;
+ const int16_t *inb = in + 27 * str1;
+ __m128i *step1a = &step1[4];
+ __m128i *step1b = &step1[27];
+ const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina));
+ const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1));
+ const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2));
+ const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3));
+ const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3));
+ const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2));
+ const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1));
+ const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb));
+ step1a[0] = _mm_add_epi16(ina0, inb0);
+ step1a[1] = _mm_add_epi16(ina1, inb1);
+ step1a[2] = _mm_add_epi16(ina2, inb2);
+ step1a[3] = _mm_add_epi16(ina3, inb3);
+ step1b[-3] = _mm_sub_epi16(ina3, inb3);
+ step1b[-2] = _mm_sub_epi16(ina2, inb2);
+ step1b[-1] = _mm_sub_epi16(ina1, inb1);
+ step1b[-0] = _mm_sub_epi16(ina0, inb0);
+ step1a[0] = _mm_slli_epi16(step1a[0], 2);
+ step1a[1] = _mm_slli_epi16(step1a[1], 2);
+ step1a[2] = _mm_slli_epi16(step1a[2], 2);
+ step1a[3] = _mm_slli_epi16(step1a[3], 2);
+ step1b[-3] = _mm_slli_epi16(step1b[-3], 2);
+ step1b[-2] = _mm_slli_epi16(step1b[-2], 2);
+ step1b[-1] = _mm_slli_epi16(step1b[-1], 2);
+ step1b[-0] = _mm_slli_epi16(step1b[-0], 2);
+ }
+ {
+ const int16_t *ina = in + 8 * str1;
+ const int16_t *inb = in + 23 * str1;
+ __m128i *step1a = &step1[8];
+ __m128i *step1b = &step1[23];
+ const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina));
+ const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1));
+ const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2));
+ const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3));
+ const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3));
+ const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2));
+ const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1));
+ const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb));
+ step1a[0] = _mm_add_epi16(ina0, inb0);
+ step1a[1] = _mm_add_epi16(ina1, inb1);
+ step1a[2] = _mm_add_epi16(ina2, inb2);
+ step1a[3] = _mm_add_epi16(ina3, inb3);
+ step1b[-3] = _mm_sub_epi16(ina3, inb3);
+ step1b[-2] = _mm_sub_epi16(ina2, inb2);
+ step1b[-1] = _mm_sub_epi16(ina1, inb1);
+ step1b[-0] = _mm_sub_epi16(ina0, inb0);
+ step1a[0] = _mm_slli_epi16(step1a[0], 2);
+ step1a[1] = _mm_slli_epi16(step1a[1], 2);
+ step1a[2] = _mm_slli_epi16(step1a[2], 2);
+ step1a[3] = _mm_slli_epi16(step1a[3], 2);
+ step1b[-3] = _mm_slli_epi16(step1b[-3], 2);
+ step1b[-2] = _mm_slli_epi16(step1b[-2], 2);
+ step1b[-1] = _mm_slli_epi16(step1b[-1], 2);
+ step1b[-0] = _mm_slli_epi16(step1b[-0], 2);
+ }
+ {
+ const int16_t *ina = in + 12 * str1;
+ const int16_t *inb = in + 19 * str1;
+ __m128i *step1a = &step1[12];
+ __m128i *step1b = &step1[19];
+ const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina));
+ const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1));
+ const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2));
+ const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3));
+ const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3));
+ const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2));
+ const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1));
+ const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb));
+ step1a[0] = _mm_add_epi16(ina0, inb0);
+ step1a[1] = _mm_add_epi16(ina1, inb1);
+ step1a[2] = _mm_add_epi16(ina2, inb2);
+ step1a[3] = _mm_add_epi16(ina3, inb3);
+ step1b[-3] = _mm_sub_epi16(ina3, inb3);
+ step1b[-2] = _mm_sub_epi16(ina2, inb2);
+ step1b[-1] = _mm_sub_epi16(ina1, inb1);
+ step1b[-0] = _mm_sub_epi16(ina0, inb0);
+ step1a[0] = _mm_slli_epi16(step1a[0], 2);
+ step1a[1] = _mm_slli_epi16(step1a[1], 2);
+ step1a[2] = _mm_slli_epi16(step1a[2], 2);
+ step1a[3] = _mm_slli_epi16(step1a[3], 2);
+ step1b[-3] = _mm_slli_epi16(step1b[-3], 2);
+ step1b[-2] = _mm_slli_epi16(step1b[-2], 2);
+ step1b[-1] = _mm_slli_epi16(step1b[-1], 2);
+ step1b[-0] = _mm_slli_epi16(step1b[-0], 2);
+ }
+ } else {
+ int16_t *in = &intermediate[column_start];
+ // step1[i] = in[ 0 * 32] + in[(32 - 1) * 32];
+ // Note: using the same approach as above to have common offset is
+ // counter-productive as all offsets can be calculated at compile
+ // time.
+ // Note: the next four blocks could be in a loop. That would help the
+ // instruction cache but is actually slower.
+ {
+ __m128i in00 = _mm_loadu_si128((const __m128i *)(in + 0 * 32));
+ __m128i in01 = _mm_loadu_si128((const __m128i *)(in + 1 * 32));
+ __m128i in02 = _mm_loadu_si128((const __m128i *)(in + 2 * 32));
+ __m128i in03 = _mm_loadu_si128((const __m128i *)(in + 3 * 32));
+ __m128i in28 = _mm_loadu_si128((const __m128i *)(in + 28 * 32));
+ __m128i in29 = _mm_loadu_si128((const __m128i *)(in + 29 * 32));
+ __m128i in30 = _mm_loadu_si128((const __m128i *)(in + 30 * 32));
+ __m128i in31 = _mm_loadu_si128((const __m128i *)(in + 31 * 32));
+ step1[0] = ADD_EPI16(in00, in31);
+ step1[1] = ADD_EPI16(in01, in30);
+ step1[2] = ADD_EPI16(in02, in29);
+ step1[3] = ADD_EPI16(in03, in28);
+ step1[28] = SUB_EPI16(in03, in28);
+ step1[29] = SUB_EPI16(in02, in29);
+ step1[30] = SUB_EPI16(in01, in30);
+ step1[31] = SUB_EPI16(in00, in31);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x8(&step1[0], &step1[1], &step1[2],
+ &step1[3], &step1[28], &step1[29],
+ &step1[30], &step1[31]);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ __m128i in04 = _mm_loadu_si128((const __m128i *)(in + 4 * 32));
+ __m128i in05 = _mm_loadu_si128((const __m128i *)(in + 5 * 32));
+ __m128i in06 = _mm_loadu_si128((const __m128i *)(in + 6 * 32));
+ __m128i in07 = _mm_loadu_si128((const __m128i *)(in + 7 * 32));
+ __m128i in24 = _mm_loadu_si128((const __m128i *)(in + 24 * 32));
+ __m128i in25 = _mm_loadu_si128((const __m128i *)(in + 25 * 32));
+ __m128i in26 = _mm_loadu_si128((const __m128i *)(in + 26 * 32));
+ __m128i in27 = _mm_loadu_si128((const __m128i *)(in + 27 * 32));
+ step1[4] = ADD_EPI16(in04, in27);
+ step1[5] = ADD_EPI16(in05, in26);
+ step1[6] = ADD_EPI16(in06, in25);
+ step1[7] = ADD_EPI16(in07, in24);
+ step1[24] = SUB_EPI16(in07, in24);
+ step1[25] = SUB_EPI16(in06, in25);
+ step1[26] = SUB_EPI16(in05, in26);
+ step1[27] = SUB_EPI16(in04, in27);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x8(&step1[4], &step1[5], &step1[6],
+ &step1[7], &step1[24], &step1[25],
+ &step1[26], &step1[27]);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ __m128i in08 = _mm_loadu_si128((const __m128i *)(in + 8 * 32));
+ __m128i in09 = _mm_loadu_si128((const __m128i *)(in + 9 * 32));
+ __m128i in10 = _mm_loadu_si128((const __m128i *)(in + 10 * 32));
+ __m128i in11 = _mm_loadu_si128((const __m128i *)(in + 11 * 32));
+ __m128i in20 = _mm_loadu_si128((const __m128i *)(in + 20 * 32));
+ __m128i in21 = _mm_loadu_si128((const __m128i *)(in + 21 * 32));
+ __m128i in22 = _mm_loadu_si128((const __m128i *)(in + 22 * 32));
+ __m128i in23 = _mm_loadu_si128((const __m128i *)(in + 23 * 32));
+ step1[8] = ADD_EPI16(in08, in23);
+ step1[9] = ADD_EPI16(in09, in22);
+ step1[10] = ADD_EPI16(in10, in21);
+ step1[11] = ADD_EPI16(in11, in20);
+ step1[20] = SUB_EPI16(in11, in20);
+ step1[21] = SUB_EPI16(in10, in21);
+ step1[22] = SUB_EPI16(in09, in22);
+ step1[23] = SUB_EPI16(in08, in23);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x8(&step1[8], &step1[9], &step1[10],
+ &step1[11], &step1[20], &step1[21],
+ &step1[22], &step1[23]);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ __m128i in12 = _mm_loadu_si128((const __m128i *)(in + 12 * 32));
+ __m128i in13 = _mm_loadu_si128((const __m128i *)(in + 13 * 32));
+ __m128i in14 = _mm_loadu_si128((const __m128i *)(in + 14 * 32));
+ __m128i in15 = _mm_loadu_si128((const __m128i *)(in + 15 * 32));
+ __m128i in16 = _mm_loadu_si128((const __m128i *)(in + 16 * 32));
+ __m128i in17 = _mm_loadu_si128((const __m128i *)(in + 17 * 32));
+ __m128i in18 = _mm_loadu_si128((const __m128i *)(in + 18 * 32));
+ __m128i in19 = _mm_loadu_si128((const __m128i *)(in + 19 * 32));
+ step1[12] = ADD_EPI16(in12, in19);
+ step1[13] = ADD_EPI16(in13, in18);
+ step1[14] = ADD_EPI16(in14, in17);
+ step1[15] = ADD_EPI16(in15, in16);
+ step1[16] = SUB_EPI16(in15, in16);
+ step1[17] = SUB_EPI16(in14, in17);
+ step1[18] = SUB_EPI16(in13, in18);
+ step1[19] = SUB_EPI16(in12, in19);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x8(&step1[12], &step1[13], &step1[14],
+ &step1[15], &step1[16], &step1[17],
+ &step1[18], &step1[19]);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ }
+ // Stage 2
+ {
+ step2[0] = ADD_EPI16(step1[0], step1[15]);
+ step2[1] = ADD_EPI16(step1[1], step1[14]);
+ step2[2] = ADD_EPI16(step1[2], step1[13]);
+ step2[3] = ADD_EPI16(step1[3], step1[12]);
+ step2[4] = ADD_EPI16(step1[4], step1[11]);
+ step2[5] = ADD_EPI16(step1[5], step1[10]);
+ step2[6] = ADD_EPI16(step1[6], step1[9]);
+ step2[7] = ADD_EPI16(step1[7], step1[8]);
+ step2[8] = SUB_EPI16(step1[7], step1[8]);
+ step2[9] = SUB_EPI16(step1[6], step1[9]);
+ step2[10] = SUB_EPI16(step1[5], step1[10]);
+ step2[11] = SUB_EPI16(step1[4], step1[11]);
+ step2[12] = SUB_EPI16(step1[3], step1[12]);
+ step2[13] = SUB_EPI16(step1[2], step1[13]);
+ step2[14] = SUB_EPI16(step1[1], step1[14]);
+ step2[15] = SUB_EPI16(step1[0], step1[15]);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x16(
+ &step2[0], &step2[1], &step2[2], &step2[3], &step2[4], &step2[5],
+ &step2[6], &step2[7], &step2[8], &step2[9], &step2[10], &step2[11],
+ &step2[12], &step2[13], &step2[14], &step2[15]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ const __m128i s2_20_0 = _mm_unpacklo_epi16(step1[27], step1[20]);
+ const __m128i s2_20_1 = _mm_unpackhi_epi16(step1[27], step1[20]);
+ const __m128i s2_21_0 = _mm_unpacklo_epi16(step1[26], step1[21]);
+ const __m128i s2_21_1 = _mm_unpackhi_epi16(step1[26], step1[21]);
+ const __m128i s2_22_0 = _mm_unpacklo_epi16(step1[25], step1[22]);
+ const __m128i s2_22_1 = _mm_unpackhi_epi16(step1[25], step1[22]);
+ const __m128i s2_23_0 = _mm_unpacklo_epi16(step1[24], step1[23]);
+ const __m128i s2_23_1 = _mm_unpackhi_epi16(step1[24], step1[23]);
+ const __m128i s2_20_2 = _mm_madd_epi16(s2_20_0, k__cospi_p16_m16);
+ const __m128i s2_20_3 = _mm_madd_epi16(s2_20_1, k__cospi_p16_m16);
+ const __m128i s2_21_2 = _mm_madd_epi16(s2_21_0, k__cospi_p16_m16);
+ const __m128i s2_21_3 = _mm_madd_epi16(s2_21_1, k__cospi_p16_m16);
+ const __m128i s2_22_2 = _mm_madd_epi16(s2_22_0, k__cospi_p16_m16);
+ const __m128i s2_22_3 = _mm_madd_epi16(s2_22_1, k__cospi_p16_m16);
+ const __m128i s2_23_2 = _mm_madd_epi16(s2_23_0, k__cospi_p16_m16);
+ const __m128i s2_23_3 = _mm_madd_epi16(s2_23_1, k__cospi_p16_m16);
+ const __m128i s2_24_2 = _mm_madd_epi16(s2_23_0, k__cospi_p16_p16);
+ const __m128i s2_24_3 = _mm_madd_epi16(s2_23_1, k__cospi_p16_p16);
+ const __m128i s2_25_2 = _mm_madd_epi16(s2_22_0, k__cospi_p16_p16);
+ const __m128i s2_25_3 = _mm_madd_epi16(s2_22_1, k__cospi_p16_p16);
+ const __m128i s2_26_2 = _mm_madd_epi16(s2_21_0, k__cospi_p16_p16);
+ const __m128i s2_26_3 = _mm_madd_epi16(s2_21_1, k__cospi_p16_p16);
+ const __m128i s2_27_2 = _mm_madd_epi16(s2_20_0, k__cospi_p16_p16);
+ const __m128i s2_27_3 = _mm_madd_epi16(s2_20_1, k__cospi_p16_p16);
+ // dct_const_round_shift
+ const __m128i s2_20_4 = _mm_add_epi32(s2_20_2, k__DCT_CONST_ROUNDING);
+ const __m128i s2_20_5 = _mm_add_epi32(s2_20_3, k__DCT_CONST_ROUNDING);
+ const __m128i s2_21_4 = _mm_add_epi32(s2_21_2, k__DCT_CONST_ROUNDING);
+ const __m128i s2_21_5 = _mm_add_epi32(s2_21_3, k__DCT_CONST_ROUNDING);
+ const __m128i s2_22_4 = _mm_add_epi32(s2_22_2, k__DCT_CONST_ROUNDING);
+ const __m128i s2_22_5 = _mm_add_epi32(s2_22_3, k__DCT_CONST_ROUNDING);
+ const __m128i s2_23_4 = _mm_add_epi32(s2_23_2, k__DCT_CONST_ROUNDING);
+ const __m128i s2_23_5 = _mm_add_epi32(s2_23_3, k__DCT_CONST_ROUNDING);
+ const __m128i s2_24_4 = _mm_add_epi32(s2_24_2, k__DCT_CONST_ROUNDING);
+ const __m128i s2_24_5 = _mm_add_epi32(s2_24_3, k__DCT_CONST_ROUNDING);
+ const __m128i s2_25_4 = _mm_add_epi32(s2_25_2, k__DCT_CONST_ROUNDING);
+ const __m128i s2_25_5 = _mm_add_epi32(s2_25_3, k__DCT_CONST_ROUNDING);
+ const __m128i s2_26_4 = _mm_add_epi32(s2_26_2, k__DCT_CONST_ROUNDING);
+ const __m128i s2_26_5 = _mm_add_epi32(s2_26_3, k__DCT_CONST_ROUNDING);
+ const __m128i s2_27_4 = _mm_add_epi32(s2_27_2, k__DCT_CONST_ROUNDING);
+ const __m128i s2_27_5 = _mm_add_epi32(s2_27_3, k__DCT_CONST_ROUNDING);
+ const __m128i s2_20_6 = _mm_srai_epi32(s2_20_4, DCT_CONST_BITS);
+ const __m128i s2_20_7 = _mm_srai_epi32(s2_20_5, DCT_CONST_BITS);
+ const __m128i s2_21_6 = _mm_srai_epi32(s2_21_4, DCT_CONST_BITS);
+ const __m128i s2_21_7 = _mm_srai_epi32(s2_21_5, DCT_CONST_BITS);
+ const __m128i s2_22_6 = _mm_srai_epi32(s2_22_4, DCT_CONST_BITS);
+ const __m128i s2_22_7 = _mm_srai_epi32(s2_22_5, DCT_CONST_BITS);
+ const __m128i s2_23_6 = _mm_srai_epi32(s2_23_4, DCT_CONST_BITS);
+ const __m128i s2_23_7 = _mm_srai_epi32(s2_23_5, DCT_CONST_BITS);
+ const __m128i s2_24_6 = _mm_srai_epi32(s2_24_4, DCT_CONST_BITS);
+ const __m128i s2_24_7 = _mm_srai_epi32(s2_24_5, DCT_CONST_BITS);
+ const __m128i s2_25_6 = _mm_srai_epi32(s2_25_4, DCT_CONST_BITS);
+ const __m128i s2_25_7 = _mm_srai_epi32(s2_25_5, DCT_CONST_BITS);
+ const __m128i s2_26_6 = _mm_srai_epi32(s2_26_4, DCT_CONST_BITS);
+ const __m128i s2_26_7 = _mm_srai_epi32(s2_26_5, DCT_CONST_BITS);
+ const __m128i s2_27_6 = _mm_srai_epi32(s2_27_4, DCT_CONST_BITS);
+ const __m128i s2_27_7 = _mm_srai_epi32(s2_27_5, DCT_CONST_BITS);
+ // Combine
+ step2[20] = _mm_packs_epi32(s2_20_6, s2_20_7);
+ step2[21] = _mm_packs_epi32(s2_21_6, s2_21_7);
+ step2[22] = _mm_packs_epi32(s2_22_6, s2_22_7);
+ step2[23] = _mm_packs_epi32(s2_23_6, s2_23_7);
+ step2[24] = _mm_packs_epi32(s2_24_6, s2_24_7);
+ step2[25] = _mm_packs_epi32(s2_25_6, s2_25_7);
+ step2[26] = _mm_packs_epi32(s2_26_6, s2_26_7);
+ step2[27] = _mm_packs_epi32(s2_27_6, s2_27_7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x8(&step2[20], &step2[21], &step2[22],
+ &step2[23], &step2[24], &step2[25],
+ &step2[26], &step2[27]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+
+#if !FDCT32x32_HIGH_PRECISION
+ // dump the magnitude by half, hence the intermediate values are within
+ // the range of 16 bits.
+ if (1 == pass) {
+ __m128i s3_00_0 = _mm_cmplt_epi16(step2[0], kZero);
+ __m128i s3_01_0 = _mm_cmplt_epi16(step2[1], kZero);
+ __m128i s3_02_0 = _mm_cmplt_epi16(step2[2], kZero);
+ __m128i s3_03_0 = _mm_cmplt_epi16(step2[3], kZero);
+ __m128i s3_04_0 = _mm_cmplt_epi16(step2[4], kZero);
+ __m128i s3_05_0 = _mm_cmplt_epi16(step2[5], kZero);
+ __m128i s3_06_0 = _mm_cmplt_epi16(step2[6], kZero);
+ __m128i s3_07_0 = _mm_cmplt_epi16(step2[7], kZero);
+ __m128i s2_08_0 = _mm_cmplt_epi16(step2[8], kZero);
+ __m128i s2_09_0 = _mm_cmplt_epi16(step2[9], kZero);
+ __m128i s3_10_0 = _mm_cmplt_epi16(step2[10], kZero);
+ __m128i s3_11_0 = _mm_cmplt_epi16(step2[11], kZero);
+ __m128i s3_12_0 = _mm_cmplt_epi16(step2[12], kZero);
+ __m128i s3_13_0 = _mm_cmplt_epi16(step2[13], kZero);
+ __m128i s2_14_0 = _mm_cmplt_epi16(step2[14], kZero);
+ __m128i s2_15_0 = _mm_cmplt_epi16(step2[15], kZero);
+ __m128i s3_16_0 = _mm_cmplt_epi16(step1[16], kZero);
+ __m128i s3_17_0 = _mm_cmplt_epi16(step1[17], kZero);
+ __m128i s3_18_0 = _mm_cmplt_epi16(step1[18], kZero);
+ __m128i s3_19_0 = _mm_cmplt_epi16(step1[19], kZero);
+ __m128i s3_20_0 = _mm_cmplt_epi16(step2[20], kZero);
+ __m128i s3_21_0 = _mm_cmplt_epi16(step2[21], kZero);
+ __m128i s3_22_0 = _mm_cmplt_epi16(step2[22], kZero);
+ __m128i s3_23_0 = _mm_cmplt_epi16(step2[23], kZero);
+ __m128i s3_24_0 = _mm_cmplt_epi16(step2[24], kZero);
+ __m128i s3_25_0 = _mm_cmplt_epi16(step2[25], kZero);
+ __m128i s3_26_0 = _mm_cmplt_epi16(step2[26], kZero);
+ __m128i s3_27_0 = _mm_cmplt_epi16(step2[27], kZero);
+ __m128i s3_28_0 = _mm_cmplt_epi16(step1[28], kZero);
+ __m128i s3_29_0 = _mm_cmplt_epi16(step1[29], kZero);
+ __m128i s3_30_0 = _mm_cmplt_epi16(step1[30], kZero);
+ __m128i s3_31_0 = _mm_cmplt_epi16(step1[31], kZero);
+
+ step2[0] = SUB_EPI16(step2[0], s3_00_0);
+ step2[1] = SUB_EPI16(step2[1], s3_01_0);
+ step2[2] = SUB_EPI16(step2[2], s3_02_0);
+ step2[3] = SUB_EPI16(step2[3], s3_03_0);
+ step2[4] = SUB_EPI16(step2[4], s3_04_0);
+ step2[5] = SUB_EPI16(step2[5], s3_05_0);
+ step2[6] = SUB_EPI16(step2[6], s3_06_0);
+ step2[7] = SUB_EPI16(step2[7], s3_07_0);
+ step2[8] = SUB_EPI16(step2[8], s2_08_0);
+ step2[9] = SUB_EPI16(step2[9], s2_09_0);
+ step2[10] = SUB_EPI16(step2[10], s3_10_0);
+ step2[11] = SUB_EPI16(step2[11], s3_11_0);
+ step2[12] = SUB_EPI16(step2[12], s3_12_0);
+ step2[13] = SUB_EPI16(step2[13], s3_13_0);
+ step2[14] = SUB_EPI16(step2[14], s2_14_0);
+ step2[15] = SUB_EPI16(step2[15], s2_15_0);
+ step1[16] = SUB_EPI16(step1[16], s3_16_0);
+ step1[17] = SUB_EPI16(step1[17], s3_17_0);
+ step1[18] = SUB_EPI16(step1[18], s3_18_0);
+ step1[19] = SUB_EPI16(step1[19], s3_19_0);
+ step2[20] = SUB_EPI16(step2[20], s3_20_0);
+ step2[21] = SUB_EPI16(step2[21], s3_21_0);
+ step2[22] = SUB_EPI16(step2[22], s3_22_0);
+ step2[23] = SUB_EPI16(step2[23], s3_23_0);
+ step2[24] = SUB_EPI16(step2[24], s3_24_0);
+ step2[25] = SUB_EPI16(step2[25], s3_25_0);
+ step2[26] = SUB_EPI16(step2[26], s3_26_0);
+ step2[27] = SUB_EPI16(step2[27], s3_27_0);
+ step1[28] = SUB_EPI16(step1[28], s3_28_0);
+ step1[29] = SUB_EPI16(step1[29], s3_29_0);
+ step1[30] = SUB_EPI16(step1[30], s3_30_0);
+ step1[31] = SUB_EPI16(step1[31], s3_31_0);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x32(
+ &step2[0], &step2[1], &step2[2], &step2[3], &step2[4], &step2[5],
+ &step2[6], &step2[7], &step2[8], &step2[9], &step2[10], &step2[11],
+ &step2[12], &step2[13], &step2[14], &step2[15], &step1[16],
+ &step1[17], &step1[18], &step1[19], &step2[20], &step2[21],
+ &step2[22], &step2[23], &step2[24], &step2[25], &step2[26],
+ &step2[27], &step1[28], &step1[29], &step1[30], &step1[31]);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ step2[0] = _mm_add_epi16(step2[0], kOne);
+ step2[1] = _mm_add_epi16(step2[1], kOne);
+ step2[2] = _mm_add_epi16(step2[2], kOne);
+ step2[3] = _mm_add_epi16(step2[3], kOne);
+ step2[4] = _mm_add_epi16(step2[4], kOne);
+ step2[5] = _mm_add_epi16(step2[5], kOne);
+ step2[6] = _mm_add_epi16(step2[6], kOne);
+ step2[7] = _mm_add_epi16(step2[7], kOne);
+ step2[8] = _mm_add_epi16(step2[8], kOne);
+ step2[9] = _mm_add_epi16(step2[9], kOne);
+ step2[10] = _mm_add_epi16(step2[10], kOne);
+ step2[11] = _mm_add_epi16(step2[11], kOne);
+ step2[12] = _mm_add_epi16(step2[12], kOne);
+ step2[13] = _mm_add_epi16(step2[13], kOne);
+ step2[14] = _mm_add_epi16(step2[14], kOne);
+ step2[15] = _mm_add_epi16(step2[15], kOne);
+ step1[16] = _mm_add_epi16(step1[16], kOne);
+ step1[17] = _mm_add_epi16(step1[17], kOne);
+ step1[18] = _mm_add_epi16(step1[18], kOne);
+ step1[19] = _mm_add_epi16(step1[19], kOne);
+ step2[20] = _mm_add_epi16(step2[20], kOne);
+ step2[21] = _mm_add_epi16(step2[21], kOne);
+ step2[22] = _mm_add_epi16(step2[22], kOne);
+ step2[23] = _mm_add_epi16(step2[23], kOne);
+ step2[24] = _mm_add_epi16(step2[24], kOne);
+ step2[25] = _mm_add_epi16(step2[25], kOne);
+ step2[26] = _mm_add_epi16(step2[26], kOne);
+ step2[27] = _mm_add_epi16(step2[27], kOne);
+ step1[28] = _mm_add_epi16(step1[28], kOne);
+ step1[29] = _mm_add_epi16(step1[29], kOne);
+ step1[30] = _mm_add_epi16(step1[30], kOne);
+ step1[31] = _mm_add_epi16(step1[31], kOne);
+
+ step2[0] = _mm_srai_epi16(step2[0], 2);
+ step2[1] = _mm_srai_epi16(step2[1], 2);
+ step2[2] = _mm_srai_epi16(step2[2], 2);
+ step2[3] = _mm_srai_epi16(step2[3], 2);
+ step2[4] = _mm_srai_epi16(step2[4], 2);
+ step2[5] = _mm_srai_epi16(step2[5], 2);
+ step2[6] = _mm_srai_epi16(step2[6], 2);
+ step2[7] = _mm_srai_epi16(step2[7], 2);
+ step2[8] = _mm_srai_epi16(step2[8], 2);
+ step2[9] = _mm_srai_epi16(step2[9], 2);
+ step2[10] = _mm_srai_epi16(step2[10], 2);
+ step2[11] = _mm_srai_epi16(step2[11], 2);
+ step2[12] = _mm_srai_epi16(step2[12], 2);
+ step2[13] = _mm_srai_epi16(step2[13], 2);
+ step2[14] = _mm_srai_epi16(step2[14], 2);
+ step2[15] = _mm_srai_epi16(step2[15], 2);
+ step1[16] = _mm_srai_epi16(step1[16], 2);
+ step1[17] = _mm_srai_epi16(step1[17], 2);
+ step1[18] = _mm_srai_epi16(step1[18], 2);
+ step1[19] = _mm_srai_epi16(step1[19], 2);
+ step2[20] = _mm_srai_epi16(step2[20], 2);
+ step2[21] = _mm_srai_epi16(step2[21], 2);
+ step2[22] = _mm_srai_epi16(step2[22], 2);
+ step2[23] = _mm_srai_epi16(step2[23], 2);
+ step2[24] = _mm_srai_epi16(step2[24], 2);
+ step2[25] = _mm_srai_epi16(step2[25], 2);
+ step2[26] = _mm_srai_epi16(step2[26], 2);
+ step2[27] = _mm_srai_epi16(step2[27], 2);
+ step1[28] = _mm_srai_epi16(step1[28], 2);
+ step1[29] = _mm_srai_epi16(step1[29], 2);
+ step1[30] = _mm_srai_epi16(step1[30], 2);
+ step1[31] = _mm_srai_epi16(step1[31], 2);
+ }
+#endif // !FDCT32x32_HIGH_PRECISION
+
+#if FDCT32x32_HIGH_PRECISION
+ if (pass == 0) {
+#endif
+ // Stage 3
+ {
+ step3[0] = ADD_EPI16(step2[(8 - 1)], step2[0]);
+ step3[1] = ADD_EPI16(step2[(8 - 2)], step2[1]);
+ step3[2] = ADD_EPI16(step2[(8 - 3)], step2[2]);
+ step3[3] = ADD_EPI16(step2[(8 - 4)], step2[3]);
+ step3[4] = SUB_EPI16(step2[(8 - 5)], step2[4]);
+ step3[5] = SUB_EPI16(step2[(8 - 6)], step2[5]);
+ step3[6] = SUB_EPI16(step2[(8 - 7)], step2[6]);
+ step3[7] = SUB_EPI16(step2[(8 - 8)], step2[7]);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x8(&step3[0], &step3[1], &step3[2],
+ &step3[3], &step3[4], &step3[5],
+ &step3[6], &step3[7]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ const __m128i s3_10_0 = _mm_unpacklo_epi16(step2[13], step2[10]);
+ const __m128i s3_10_1 = _mm_unpackhi_epi16(step2[13], step2[10]);
+ const __m128i s3_11_0 = _mm_unpacklo_epi16(step2[12], step2[11]);
+ const __m128i s3_11_1 = _mm_unpackhi_epi16(step2[12], step2[11]);
+ const __m128i s3_10_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_m16);
+ const __m128i s3_10_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_m16);
+ const __m128i s3_11_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_m16);
+ const __m128i s3_11_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_m16);
+ const __m128i s3_12_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_p16);
+ const __m128i s3_12_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_p16);
+ const __m128i s3_13_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_p16);
+ const __m128i s3_13_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_p16);
+ // dct_const_round_shift
+ const __m128i s3_10_4 = _mm_add_epi32(s3_10_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_10_5 = _mm_add_epi32(s3_10_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_11_4 = _mm_add_epi32(s3_11_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_11_5 = _mm_add_epi32(s3_11_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_12_4 = _mm_add_epi32(s3_12_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_12_5 = _mm_add_epi32(s3_12_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_13_4 = _mm_add_epi32(s3_13_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_13_5 = _mm_add_epi32(s3_13_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_10_6 = _mm_srai_epi32(s3_10_4, DCT_CONST_BITS);
+ const __m128i s3_10_7 = _mm_srai_epi32(s3_10_5, DCT_CONST_BITS);
+ const __m128i s3_11_6 = _mm_srai_epi32(s3_11_4, DCT_CONST_BITS);
+ const __m128i s3_11_7 = _mm_srai_epi32(s3_11_5, DCT_CONST_BITS);
+ const __m128i s3_12_6 = _mm_srai_epi32(s3_12_4, DCT_CONST_BITS);
+ const __m128i s3_12_7 = _mm_srai_epi32(s3_12_5, DCT_CONST_BITS);
+ const __m128i s3_13_6 = _mm_srai_epi32(s3_13_4, DCT_CONST_BITS);
+ const __m128i s3_13_7 = _mm_srai_epi32(s3_13_5, DCT_CONST_BITS);
+ // Combine
+ step3[10] = _mm_packs_epi32(s3_10_6, s3_10_7);
+ step3[11] = _mm_packs_epi32(s3_11_6, s3_11_7);
+ step3[12] = _mm_packs_epi32(s3_12_6, s3_12_7);
+ step3[13] = _mm_packs_epi32(s3_13_6, s3_13_7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x4(&step3[10], &step3[11], &step3[12],
+ &step3[13]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ step3[16] = ADD_EPI16(step2[23], step1[16]);
+ step3[17] = ADD_EPI16(step2[22], step1[17]);
+ step3[18] = ADD_EPI16(step2[21], step1[18]);
+ step3[19] = ADD_EPI16(step2[20], step1[19]);
+ step3[20] = SUB_EPI16(step1[19], step2[20]);
+ step3[21] = SUB_EPI16(step1[18], step2[21]);
+ step3[22] = SUB_EPI16(step1[17], step2[22]);
+ step3[23] = SUB_EPI16(step1[16], step2[23]);
+ step3[24] = SUB_EPI16(step1[31], step2[24]);
+ step3[25] = SUB_EPI16(step1[30], step2[25]);
+ step3[26] = SUB_EPI16(step1[29], step2[26]);
+ step3[27] = SUB_EPI16(step1[28], step2[27]);
+ step3[28] = ADD_EPI16(step2[27], step1[28]);
+ step3[29] = ADD_EPI16(step2[26], step1[29]);
+ step3[30] = ADD_EPI16(step2[25], step1[30]);
+ step3[31] = ADD_EPI16(step2[24], step1[31]);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x16(
+ &step3[16], &step3[17], &step3[18], &step3[19], &step3[20],
+ &step3[21], &step3[22], &step3[23], &step3[24], &step3[25],
+ &step3[26], &step3[27], &step3[28], &step3[29], &step3[30],
+ &step3[31]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+
+ // Stage 4
+ {
+ step1[0] = ADD_EPI16(step3[3], step3[0]);
+ step1[1] = ADD_EPI16(step3[2], step3[1]);
+ step1[2] = SUB_EPI16(step3[1], step3[2]);
+ step1[3] = SUB_EPI16(step3[0], step3[3]);
+ step1[8] = ADD_EPI16(step3[11], step2[8]);
+ step1[9] = ADD_EPI16(step3[10], step2[9]);
+ step1[10] = SUB_EPI16(step2[9], step3[10]);
+ step1[11] = SUB_EPI16(step2[8], step3[11]);
+ step1[12] = SUB_EPI16(step2[15], step3[12]);
+ step1[13] = SUB_EPI16(step2[14], step3[13]);
+ step1[14] = ADD_EPI16(step3[13], step2[14]);
+ step1[15] = ADD_EPI16(step3[12], step2[15]);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x16(
+ &step1[0], &step1[1], &step1[2], &step1[3], &step1[4], &step1[5],
+ &step1[6], &step1[7], &step1[8], &step1[9], &step1[10],
+ &step1[11], &step1[12], &step1[13], &step1[14], &step1[15]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ const __m128i s1_05_0 = _mm_unpacklo_epi16(step3[6], step3[5]);
+ const __m128i s1_05_1 = _mm_unpackhi_epi16(step3[6], step3[5]);
+ const __m128i s1_05_2 = _mm_madd_epi16(s1_05_0, k__cospi_p16_m16);
+ const __m128i s1_05_3 = _mm_madd_epi16(s1_05_1, k__cospi_p16_m16);
+ const __m128i s1_06_2 = _mm_madd_epi16(s1_05_0, k__cospi_p16_p16);
+ const __m128i s1_06_3 = _mm_madd_epi16(s1_05_1, k__cospi_p16_p16);
+ // dct_const_round_shift
+ const __m128i s1_05_4 = _mm_add_epi32(s1_05_2, k__DCT_CONST_ROUNDING);
+ const __m128i s1_05_5 = _mm_add_epi32(s1_05_3, k__DCT_CONST_ROUNDING);
+ const __m128i s1_06_4 = _mm_add_epi32(s1_06_2, k__DCT_CONST_ROUNDING);
+ const __m128i s1_06_5 = _mm_add_epi32(s1_06_3, k__DCT_CONST_ROUNDING);
+ const __m128i s1_05_6 = _mm_srai_epi32(s1_05_4, DCT_CONST_BITS);
+ const __m128i s1_05_7 = _mm_srai_epi32(s1_05_5, DCT_CONST_BITS);
+ const __m128i s1_06_6 = _mm_srai_epi32(s1_06_4, DCT_CONST_BITS);
+ const __m128i s1_06_7 = _mm_srai_epi32(s1_06_5, DCT_CONST_BITS);
+ // Combine
+ step1[5] = _mm_packs_epi32(s1_05_6, s1_05_7);
+ step1[6] = _mm_packs_epi32(s1_06_6, s1_06_7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x2(&step1[5], &step1[6]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ const __m128i s1_18_0 = _mm_unpacklo_epi16(step3[18], step3[29]);
+ const __m128i s1_18_1 = _mm_unpackhi_epi16(step3[18], step3[29]);
+ const __m128i s1_19_0 = _mm_unpacklo_epi16(step3[19], step3[28]);
+ const __m128i s1_19_1 = _mm_unpackhi_epi16(step3[19], step3[28]);
+ const __m128i s1_20_0 = _mm_unpacklo_epi16(step3[20], step3[27]);
+ const __m128i s1_20_1 = _mm_unpackhi_epi16(step3[20], step3[27]);
+ const __m128i s1_21_0 = _mm_unpacklo_epi16(step3[21], step3[26]);
+ const __m128i s1_21_1 = _mm_unpackhi_epi16(step3[21], step3[26]);
+ const __m128i s1_18_2 = _mm_madd_epi16(s1_18_0, k__cospi_m08_p24);
+ const __m128i s1_18_3 = _mm_madd_epi16(s1_18_1, k__cospi_m08_p24);
+ const __m128i s1_19_2 = _mm_madd_epi16(s1_19_0, k__cospi_m08_p24);
+ const __m128i s1_19_3 = _mm_madd_epi16(s1_19_1, k__cospi_m08_p24);
+ const __m128i s1_20_2 = _mm_madd_epi16(s1_20_0, k__cospi_m24_m08);
+ const __m128i s1_20_3 = _mm_madd_epi16(s1_20_1, k__cospi_m24_m08);
+ const __m128i s1_21_2 = _mm_madd_epi16(s1_21_0, k__cospi_m24_m08);
+ const __m128i s1_21_3 = _mm_madd_epi16(s1_21_1, k__cospi_m24_m08);
+ const __m128i s1_26_2 = _mm_madd_epi16(s1_21_0, k__cospi_m08_p24);
+ const __m128i s1_26_3 = _mm_madd_epi16(s1_21_1, k__cospi_m08_p24);
+ const __m128i s1_27_2 = _mm_madd_epi16(s1_20_0, k__cospi_m08_p24);
+ const __m128i s1_27_3 = _mm_madd_epi16(s1_20_1, k__cospi_m08_p24);
+ const __m128i s1_28_2 = _mm_madd_epi16(s1_19_0, k__cospi_p24_p08);
+ const __m128i s1_28_3 = _mm_madd_epi16(s1_19_1, k__cospi_p24_p08);
+ const __m128i s1_29_2 = _mm_madd_epi16(s1_18_0, k__cospi_p24_p08);
+ const __m128i s1_29_3 = _mm_madd_epi16(s1_18_1, k__cospi_p24_p08);
+ // dct_const_round_shift
+ const __m128i s1_18_4 = _mm_add_epi32(s1_18_2, k__DCT_CONST_ROUNDING);
+ const __m128i s1_18_5 = _mm_add_epi32(s1_18_3, k__DCT_CONST_ROUNDING);
+ const __m128i s1_19_4 = _mm_add_epi32(s1_19_2, k__DCT_CONST_ROUNDING);
+ const __m128i s1_19_5 = _mm_add_epi32(s1_19_3, k__DCT_CONST_ROUNDING);
+ const __m128i s1_20_4 = _mm_add_epi32(s1_20_2, k__DCT_CONST_ROUNDING);
+ const __m128i s1_20_5 = _mm_add_epi32(s1_20_3, k__DCT_CONST_ROUNDING);
+ const __m128i s1_21_4 = _mm_add_epi32(s1_21_2, k__DCT_CONST_ROUNDING);
+ const __m128i s1_21_5 = _mm_add_epi32(s1_21_3, k__DCT_CONST_ROUNDING);
+ const __m128i s1_26_4 = _mm_add_epi32(s1_26_2, k__DCT_CONST_ROUNDING);
+ const __m128i s1_26_5 = _mm_add_epi32(s1_26_3, k__DCT_CONST_ROUNDING);
+ const __m128i s1_27_4 = _mm_add_epi32(s1_27_2, k__DCT_CONST_ROUNDING);
+ const __m128i s1_27_5 = _mm_add_epi32(s1_27_3, k__DCT_CONST_ROUNDING);
+ const __m128i s1_28_4 = _mm_add_epi32(s1_28_2, k__DCT_CONST_ROUNDING);
+ const __m128i s1_28_5 = _mm_add_epi32(s1_28_3, k__DCT_CONST_ROUNDING);
+ const __m128i s1_29_4 = _mm_add_epi32(s1_29_2, k__DCT_CONST_ROUNDING);
+ const __m128i s1_29_5 = _mm_add_epi32(s1_29_3, k__DCT_CONST_ROUNDING);
+ const __m128i s1_18_6 = _mm_srai_epi32(s1_18_4, DCT_CONST_BITS);
+ const __m128i s1_18_7 = _mm_srai_epi32(s1_18_5, DCT_CONST_BITS);
+ const __m128i s1_19_6 = _mm_srai_epi32(s1_19_4, DCT_CONST_BITS);
+ const __m128i s1_19_7 = _mm_srai_epi32(s1_19_5, DCT_CONST_BITS);
+ const __m128i s1_20_6 = _mm_srai_epi32(s1_20_4, DCT_CONST_BITS);
+ const __m128i s1_20_7 = _mm_srai_epi32(s1_20_5, DCT_CONST_BITS);
+ const __m128i s1_21_6 = _mm_srai_epi32(s1_21_4, DCT_CONST_BITS);
+ const __m128i s1_21_7 = _mm_srai_epi32(s1_21_5, DCT_CONST_BITS);
+ const __m128i s1_26_6 = _mm_srai_epi32(s1_26_4, DCT_CONST_BITS);
+ const __m128i s1_26_7 = _mm_srai_epi32(s1_26_5, DCT_CONST_BITS);
+ const __m128i s1_27_6 = _mm_srai_epi32(s1_27_4, DCT_CONST_BITS);
+ const __m128i s1_27_7 = _mm_srai_epi32(s1_27_5, DCT_CONST_BITS);
+ const __m128i s1_28_6 = _mm_srai_epi32(s1_28_4, DCT_CONST_BITS);
+ const __m128i s1_28_7 = _mm_srai_epi32(s1_28_5, DCT_CONST_BITS);
+ const __m128i s1_29_6 = _mm_srai_epi32(s1_29_4, DCT_CONST_BITS);
+ const __m128i s1_29_7 = _mm_srai_epi32(s1_29_5, DCT_CONST_BITS);
+ // Combine
+ step1[18] = _mm_packs_epi32(s1_18_6, s1_18_7);
+ step1[19] = _mm_packs_epi32(s1_19_6, s1_19_7);
+ step1[20] = _mm_packs_epi32(s1_20_6, s1_20_7);
+ step1[21] = _mm_packs_epi32(s1_21_6, s1_21_7);
+ step1[26] = _mm_packs_epi32(s1_26_6, s1_26_7);
+ step1[27] = _mm_packs_epi32(s1_27_6, s1_27_7);
+ step1[28] = _mm_packs_epi32(s1_28_6, s1_28_7);
+ step1[29] = _mm_packs_epi32(s1_29_6, s1_29_7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x8(&step1[18], &step1[19], &step1[20],
+ &step1[21], &step1[26], &step1[27],
+ &step1[28], &step1[29]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ // Stage 5
+ {
+ step2[4] = ADD_EPI16(step1[5], step3[4]);
+ step2[5] = SUB_EPI16(step3[4], step1[5]);
+ step2[6] = SUB_EPI16(step3[7], step1[6]);
+ step2[7] = ADD_EPI16(step1[6], step3[7]);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x4(&step2[4], &step2[5], &step2[6],
+ &step2[7]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ const __m128i out_00_0 = _mm_unpacklo_epi16(step1[0], step1[1]);
+ const __m128i out_00_1 = _mm_unpackhi_epi16(step1[0], step1[1]);
+ const __m128i out_08_0 = _mm_unpacklo_epi16(step1[2], step1[3]);
+ const __m128i out_08_1 = _mm_unpackhi_epi16(step1[2], step1[3]);
+ const __m128i out_00_2 = _mm_madd_epi16(out_00_0, k__cospi_p16_p16);
+ const __m128i out_00_3 = _mm_madd_epi16(out_00_1, k__cospi_p16_p16);
+ const __m128i out_16_2 = _mm_madd_epi16(out_00_0, k__cospi_p16_m16);
+ const __m128i out_16_3 = _mm_madd_epi16(out_00_1, k__cospi_p16_m16);
+ const __m128i out_08_2 = _mm_madd_epi16(out_08_0, k__cospi_p24_p08);
+ const __m128i out_08_3 = _mm_madd_epi16(out_08_1, k__cospi_p24_p08);
+ const __m128i out_24_2 = _mm_madd_epi16(out_08_0, k__cospi_m08_p24);
+ const __m128i out_24_3 = _mm_madd_epi16(out_08_1, k__cospi_m08_p24);
+ // dct_const_round_shift
+ const __m128i out_00_4 =
+ _mm_add_epi32(out_00_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_00_5 =
+ _mm_add_epi32(out_00_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_16_4 =
+ _mm_add_epi32(out_16_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_16_5 =
+ _mm_add_epi32(out_16_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_08_4 =
+ _mm_add_epi32(out_08_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_08_5 =
+ _mm_add_epi32(out_08_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_24_4 =
+ _mm_add_epi32(out_24_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_24_5 =
+ _mm_add_epi32(out_24_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_00_6 = _mm_srai_epi32(out_00_4, DCT_CONST_BITS);
+ const __m128i out_00_7 = _mm_srai_epi32(out_00_5, DCT_CONST_BITS);
+ const __m128i out_16_6 = _mm_srai_epi32(out_16_4, DCT_CONST_BITS);
+ const __m128i out_16_7 = _mm_srai_epi32(out_16_5, DCT_CONST_BITS);
+ const __m128i out_08_6 = _mm_srai_epi32(out_08_4, DCT_CONST_BITS);
+ const __m128i out_08_7 = _mm_srai_epi32(out_08_5, DCT_CONST_BITS);
+ const __m128i out_24_6 = _mm_srai_epi32(out_24_4, DCT_CONST_BITS);
+ const __m128i out_24_7 = _mm_srai_epi32(out_24_5, DCT_CONST_BITS);
+ // Combine
+ out[0] = _mm_packs_epi32(out_00_6, out_00_7);
+ out[16] = _mm_packs_epi32(out_16_6, out_16_7);
+ out[8] = _mm_packs_epi32(out_08_6, out_08_7);
+ out[24] = _mm_packs_epi32(out_24_6, out_24_7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x4(&out[0], &out[16], &out[8], &out[24]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ const __m128i s2_09_0 = _mm_unpacklo_epi16(step1[9], step1[14]);
+ const __m128i s2_09_1 = _mm_unpackhi_epi16(step1[9], step1[14]);
+ const __m128i s2_10_0 = _mm_unpacklo_epi16(step1[10], step1[13]);
+ const __m128i s2_10_1 = _mm_unpackhi_epi16(step1[10], step1[13]);
+ const __m128i s2_09_2 = _mm_madd_epi16(s2_09_0, k__cospi_m08_p24);
+ const __m128i s2_09_3 = _mm_madd_epi16(s2_09_1, k__cospi_m08_p24);
+ const __m128i s2_10_2 = _mm_madd_epi16(s2_10_0, k__cospi_m24_m08);
+ const __m128i s2_10_3 = _mm_madd_epi16(s2_10_1, k__cospi_m24_m08);
+ const __m128i s2_13_2 = _mm_madd_epi16(s2_10_0, k__cospi_m08_p24);
+ const __m128i s2_13_3 = _mm_madd_epi16(s2_10_1, k__cospi_m08_p24);
+ const __m128i s2_14_2 = _mm_madd_epi16(s2_09_0, k__cospi_p24_p08);
+ const __m128i s2_14_3 = _mm_madd_epi16(s2_09_1, k__cospi_p24_p08);
+ // dct_const_round_shift
+ const __m128i s2_09_4 = _mm_add_epi32(s2_09_2, k__DCT_CONST_ROUNDING);
+ const __m128i s2_09_5 = _mm_add_epi32(s2_09_3, k__DCT_CONST_ROUNDING);
+ const __m128i s2_10_4 = _mm_add_epi32(s2_10_2, k__DCT_CONST_ROUNDING);
+ const __m128i s2_10_5 = _mm_add_epi32(s2_10_3, k__DCT_CONST_ROUNDING);
+ const __m128i s2_13_4 = _mm_add_epi32(s2_13_2, k__DCT_CONST_ROUNDING);
+ const __m128i s2_13_5 = _mm_add_epi32(s2_13_3, k__DCT_CONST_ROUNDING);
+ const __m128i s2_14_4 = _mm_add_epi32(s2_14_2, k__DCT_CONST_ROUNDING);
+ const __m128i s2_14_5 = _mm_add_epi32(s2_14_3, k__DCT_CONST_ROUNDING);
+ const __m128i s2_09_6 = _mm_srai_epi32(s2_09_4, DCT_CONST_BITS);
+ const __m128i s2_09_7 = _mm_srai_epi32(s2_09_5, DCT_CONST_BITS);
+ const __m128i s2_10_6 = _mm_srai_epi32(s2_10_4, DCT_CONST_BITS);
+ const __m128i s2_10_7 = _mm_srai_epi32(s2_10_5, DCT_CONST_BITS);
+ const __m128i s2_13_6 = _mm_srai_epi32(s2_13_4, DCT_CONST_BITS);
+ const __m128i s2_13_7 = _mm_srai_epi32(s2_13_5, DCT_CONST_BITS);
+ const __m128i s2_14_6 = _mm_srai_epi32(s2_14_4, DCT_CONST_BITS);
+ const __m128i s2_14_7 = _mm_srai_epi32(s2_14_5, DCT_CONST_BITS);
+ // Combine
+ step2[9] = _mm_packs_epi32(s2_09_6, s2_09_7);
+ step2[10] = _mm_packs_epi32(s2_10_6, s2_10_7);
+ step2[13] = _mm_packs_epi32(s2_13_6, s2_13_7);
+ step2[14] = _mm_packs_epi32(s2_14_6, s2_14_7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x4(&step2[9], &step2[10], &step2[13],
+ &step2[14]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ step2[16] = ADD_EPI16(step1[19], step3[16]);
+ step2[17] = ADD_EPI16(step1[18], step3[17]);
+ step2[18] = SUB_EPI16(step3[17], step1[18]);
+ step2[19] = SUB_EPI16(step3[16], step1[19]);
+ step2[20] = SUB_EPI16(step3[23], step1[20]);
+ step2[21] = SUB_EPI16(step3[22], step1[21]);
+ step2[22] = ADD_EPI16(step1[21], step3[22]);
+ step2[23] = ADD_EPI16(step1[20], step3[23]);
+ step2[24] = ADD_EPI16(step1[27], step3[24]);
+ step2[25] = ADD_EPI16(step1[26], step3[25]);
+ step2[26] = SUB_EPI16(step3[25], step1[26]);
+ step2[27] = SUB_EPI16(step3[24], step1[27]);
+ step2[28] = SUB_EPI16(step3[31], step1[28]);
+ step2[29] = SUB_EPI16(step3[30], step1[29]);
+ step2[30] = ADD_EPI16(step1[29], step3[30]);
+ step2[31] = ADD_EPI16(step1[28], step3[31]);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x16(
+ &step2[16], &step2[17], &step2[18], &step2[19], &step2[20],
+ &step2[21], &step2[22], &step2[23], &step2[24], &step2[25],
+ &step2[26], &step2[27], &step2[28], &step2[29], &step2[30],
+ &step2[31]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ // Stage 6
+ {
+ const __m128i out_04_0 = _mm_unpacklo_epi16(step2[4], step2[7]);
+ const __m128i out_04_1 = _mm_unpackhi_epi16(step2[4], step2[7]);
+ const __m128i out_20_0 = _mm_unpacklo_epi16(step2[5], step2[6]);
+ const __m128i out_20_1 = _mm_unpackhi_epi16(step2[5], step2[6]);
+ const __m128i out_12_0 = _mm_unpacklo_epi16(step2[5], step2[6]);
+ const __m128i out_12_1 = _mm_unpackhi_epi16(step2[5], step2[6]);
+ const __m128i out_28_0 = _mm_unpacklo_epi16(step2[4], step2[7]);
+ const __m128i out_28_1 = _mm_unpackhi_epi16(step2[4], step2[7]);
+ const __m128i out_04_2 = _mm_madd_epi16(out_04_0, k__cospi_p28_p04);
+ const __m128i out_04_3 = _mm_madd_epi16(out_04_1, k__cospi_p28_p04);
+ const __m128i out_20_2 = _mm_madd_epi16(out_20_0, k__cospi_p12_p20);
+ const __m128i out_20_3 = _mm_madd_epi16(out_20_1, k__cospi_p12_p20);
+ const __m128i out_12_2 = _mm_madd_epi16(out_12_0, k__cospi_m20_p12);
+ const __m128i out_12_3 = _mm_madd_epi16(out_12_1, k__cospi_m20_p12);
+ const __m128i out_28_2 = _mm_madd_epi16(out_28_0, k__cospi_m04_p28);
+ const __m128i out_28_3 = _mm_madd_epi16(out_28_1, k__cospi_m04_p28);
+ // dct_const_round_shift
+ const __m128i out_04_4 =
+ _mm_add_epi32(out_04_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_04_5 =
+ _mm_add_epi32(out_04_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_20_4 =
+ _mm_add_epi32(out_20_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_20_5 =
+ _mm_add_epi32(out_20_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_12_4 =
+ _mm_add_epi32(out_12_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_12_5 =
+ _mm_add_epi32(out_12_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_28_4 =
+ _mm_add_epi32(out_28_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_28_5 =
+ _mm_add_epi32(out_28_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_04_6 = _mm_srai_epi32(out_04_4, DCT_CONST_BITS);
+ const __m128i out_04_7 = _mm_srai_epi32(out_04_5, DCT_CONST_BITS);
+ const __m128i out_20_6 = _mm_srai_epi32(out_20_4, DCT_CONST_BITS);
+ const __m128i out_20_7 = _mm_srai_epi32(out_20_5, DCT_CONST_BITS);
+ const __m128i out_12_6 = _mm_srai_epi32(out_12_4, DCT_CONST_BITS);
+ const __m128i out_12_7 = _mm_srai_epi32(out_12_5, DCT_CONST_BITS);
+ const __m128i out_28_6 = _mm_srai_epi32(out_28_4, DCT_CONST_BITS);
+ const __m128i out_28_7 = _mm_srai_epi32(out_28_5, DCT_CONST_BITS);
+ // Combine
+ out[4] = _mm_packs_epi32(out_04_6, out_04_7);
+ out[20] = _mm_packs_epi32(out_20_6, out_20_7);
+ out[12] = _mm_packs_epi32(out_12_6, out_12_7);
+ out[28] = _mm_packs_epi32(out_28_6, out_28_7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x4(&out[4], &out[20], &out[12], &out[28]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ step3[8] = ADD_EPI16(step2[9], step1[8]);
+ step3[9] = SUB_EPI16(step1[8], step2[9]);
+ step3[10] = SUB_EPI16(step1[11], step2[10]);
+ step3[11] = ADD_EPI16(step2[10], step1[11]);
+ step3[12] = ADD_EPI16(step2[13], step1[12]);
+ step3[13] = SUB_EPI16(step1[12], step2[13]);
+ step3[14] = SUB_EPI16(step1[15], step2[14]);
+ step3[15] = ADD_EPI16(step2[14], step1[15]);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x8(&step3[8], &step3[9], &step3[10],
+ &step3[11], &step3[12], &step3[13],
+ &step3[14], &step3[15]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ const __m128i s3_17_0 = _mm_unpacklo_epi16(step2[17], step2[30]);
+ const __m128i s3_17_1 = _mm_unpackhi_epi16(step2[17], step2[30]);
+ const __m128i s3_18_0 = _mm_unpacklo_epi16(step2[18], step2[29]);
+ const __m128i s3_18_1 = _mm_unpackhi_epi16(step2[18], step2[29]);
+ const __m128i s3_21_0 = _mm_unpacklo_epi16(step2[21], step2[26]);
+ const __m128i s3_21_1 = _mm_unpackhi_epi16(step2[21], step2[26]);
+ const __m128i s3_22_0 = _mm_unpacklo_epi16(step2[22], step2[25]);
+ const __m128i s3_22_1 = _mm_unpackhi_epi16(step2[22], step2[25]);
+ const __m128i s3_17_2 = _mm_madd_epi16(s3_17_0, k__cospi_m04_p28);
+ const __m128i s3_17_3 = _mm_madd_epi16(s3_17_1, k__cospi_m04_p28);
+ const __m128i s3_18_2 = _mm_madd_epi16(s3_18_0, k__cospi_m28_m04);
+ const __m128i s3_18_3 = _mm_madd_epi16(s3_18_1, k__cospi_m28_m04);
+ const __m128i s3_21_2 = _mm_madd_epi16(s3_21_0, k__cospi_m20_p12);
+ const __m128i s3_21_3 = _mm_madd_epi16(s3_21_1, k__cospi_m20_p12);
+ const __m128i s3_22_2 = _mm_madd_epi16(s3_22_0, k__cospi_m12_m20);
+ const __m128i s3_22_3 = _mm_madd_epi16(s3_22_1, k__cospi_m12_m20);
+ const __m128i s3_25_2 = _mm_madd_epi16(s3_22_0, k__cospi_m20_p12);
+ const __m128i s3_25_3 = _mm_madd_epi16(s3_22_1, k__cospi_m20_p12);
+ const __m128i s3_26_2 = _mm_madd_epi16(s3_21_0, k__cospi_p12_p20);
+ const __m128i s3_26_3 = _mm_madd_epi16(s3_21_1, k__cospi_p12_p20);
+ const __m128i s3_29_2 = _mm_madd_epi16(s3_18_0, k__cospi_m04_p28);
+ const __m128i s3_29_3 = _mm_madd_epi16(s3_18_1, k__cospi_m04_p28);
+ const __m128i s3_30_2 = _mm_madd_epi16(s3_17_0, k__cospi_p28_p04);
+ const __m128i s3_30_3 = _mm_madd_epi16(s3_17_1, k__cospi_p28_p04);
+ // dct_const_round_shift
+ const __m128i s3_17_4 = _mm_add_epi32(s3_17_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_17_5 = _mm_add_epi32(s3_17_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_18_4 = _mm_add_epi32(s3_18_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_18_5 = _mm_add_epi32(s3_18_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_21_4 = _mm_add_epi32(s3_21_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_21_5 = _mm_add_epi32(s3_21_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_22_4 = _mm_add_epi32(s3_22_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_22_5 = _mm_add_epi32(s3_22_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_17_6 = _mm_srai_epi32(s3_17_4, DCT_CONST_BITS);
+ const __m128i s3_17_7 = _mm_srai_epi32(s3_17_5, DCT_CONST_BITS);
+ const __m128i s3_18_6 = _mm_srai_epi32(s3_18_4, DCT_CONST_BITS);
+ const __m128i s3_18_7 = _mm_srai_epi32(s3_18_5, DCT_CONST_BITS);
+ const __m128i s3_21_6 = _mm_srai_epi32(s3_21_4, DCT_CONST_BITS);
+ const __m128i s3_21_7 = _mm_srai_epi32(s3_21_5, DCT_CONST_BITS);
+ const __m128i s3_22_6 = _mm_srai_epi32(s3_22_4, DCT_CONST_BITS);
+ const __m128i s3_22_7 = _mm_srai_epi32(s3_22_5, DCT_CONST_BITS);
+ const __m128i s3_25_4 = _mm_add_epi32(s3_25_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_25_5 = _mm_add_epi32(s3_25_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_26_4 = _mm_add_epi32(s3_26_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_26_5 = _mm_add_epi32(s3_26_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_29_4 = _mm_add_epi32(s3_29_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_29_5 = _mm_add_epi32(s3_29_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_30_4 = _mm_add_epi32(s3_30_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_30_5 = _mm_add_epi32(s3_30_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_25_6 = _mm_srai_epi32(s3_25_4, DCT_CONST_BITS);
+ const __m128i s3_25_7 = _mm_srai_epi32(s3_25_5, DCT_CONST_BITS);
+ const __m128i s3_26_6 = _mm_srai_epi32(s3_26_4, DCT_CONST_BITS);
+ const __m128i s3_26_7 = _mm_srai_epi32(s3_26_5, DCT_CONST_BITS);
+ const __m128i s3_29_6 = _mm_srai_epi32(s3_29_4, DCT_CONST_BITS);
+ const __m128i s3_29_7 = _mm_srai_epi32(s3_29_5, DCT_CONST_BITS);
+ const __m128i s3_30_6 = _mm_srai_epi32(s3_30_4, DCT_CONST_BITS);
+ const __m128i s3_30_7 = _mm_srai_epi32(s3_30_5, DCT_CONST_BITS);
+ // Combine
+ step3[17] = _mm_packs_epi32(s3_17_6, s3_17_7);
+ step3[18] = _mm_packs_epi32(s3_18_6, s3_18_7);
+ step3[21] = _mm_packs_epi32(s3_21_6, s3_21_7);
+ step3[22] = _mm_packs_epi32(s3_22_6, s3_22_7);
+ // Combine
+ step3[25] = _mm_packs_epi32(s3_25_6, s3_25_7);
+ step3[26] = _mm_packs_epi32(s3_26_6, s3_26_7);
+ step3[29] = _mm_packs_epi32(s3_29_6, s3_29_7);
+ step3[30] = _mm_packs_epi32(s3_30_6, s3_30_7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x8(&step3[17], &step3[18], &step3[21],
+ &step3[22], &step3[25], &step3[26],
+ &step3[29], &step3[30]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ // Stage 7
+ {
+ const __m128i out_02_0 = _mm_unpacklo_epi16(step3[8], step3[15]);
+ const __m128i out_02_1 = _mm_unpackhi_epi16(step3[8], step3[15]);
+ const __m128i out_18_0 = _mm_unpacklo_epi16(step3[9], step3[14]);
+ const __m128i out_18_1 = _mm_unpackhi_epi16(step3[9], step3[14]);
+ const __m128i out_10_0 = _mm_unpacklo_epi16(step3[10], step3[13]);
+ const __m128i out_10_1 = _mm_unpackhi_epi16(step3[10], step3[13]);
+ const __m128i out_26_0 = _mm_unpacklo_epi16(step3[11], step3[12]);
+ const __m128i out_26_1 = _mm_unpackhi_epi16(step3[11], step3[12]);
+ const __m128i out_02_2 = _mm_madd_epi16(out_02_0, k__cospi_p30_p02);
+ const __m128i out_02_3 = _mm_madd_epi16(out_02_1, k__cospi_p30_p02);
+ const __m128i out_18_2 = _mm_madd_epi16(out_18_0, k__cospi_p14_p18);
+ const __m128i out_18_3 = _mm_madd_epi16(out_18_1, k__cospi_p14_p18);
+ const __m128i out_10_2 = _mm_madd_epi16(out_10_0, k__cospi_p22_p10);
+ const __m128i out_10_3 = _mm_madd_epi16(out_10_1, k__cospi_p22_p10);
+ const __m128i out_26_2 = _mm_madd_epi16(out_26_0, k__cospi_p06_p26);
+ const __m128i out_26_3 = _mm_madd_epi16(out_26_1, k__cospi_p06_p26);
+ const __m128i out_06_2 = _mm_madd_epi16(out_26_0, k__cospi_m26_p06);
+ const __m128i out_06_3 = _mm_madd_epi16(out_26_1, k__cospi_m26_p06);
+ const __m128i out_22_2 = _mm_madd_epi16(out_10_0, k__cospi_m10_p22);
+ const __m128i out_22_3 = _mm_madd_epi16(out_10_1, k__cospi_m10_p22);
+ const __m128i out_14_2 = _mm_madd_epi16(out_18_0, k__cospi_m18_p14);
+ const __m128i out_14_3 = _mm_madd_epi16(out_18_1, k__cospi_m18_p14);
+ const __m128i out_30_2 = _mm_madd_epi16(out_02_0, k__cospi_m02_p30);
+ const __m128i out_30_3 = _mm_madd_epi16(out_02_1, k__cospi_m02_p30);
+ // dct_const_round_shift
+ const __m128i out_02_4 =
+ _mm_add_epi32(out_02_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_02_5 =
+ _mm_add_epi32(out_02_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_18_4 =
+ _mm_add_epi32(out_18_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_18_5 =
+ _mm_add_epi32(out_18_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_10_4 =
+ _mm_add_epi32(out_10_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_10_5 =
+ _mm_add_epi32(out_10_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_26_4 =
+ _mm_add_epi32(out_26_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_26_5 =
+ _mm_add_epi32(out_26_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_06_4 =
+ _mm_add_epi32(out_06_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_06_5 =
+ _mm_add_epi32(out_06_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_22_4 =
+ _mm_add_epi32(out_22_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_22_5 =
+ _mm_add_epi32(out_22_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_14_4 =
+ _mm_add_epi32(out_14_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_14_5 =
+ _mm_add_epi32(out_14_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_30_4 =
+ _mm_add_epi32(out_30_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_30_5 =
+ _mm_add_epi32(out_30_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_02_6 = _mm_srai_epi32(out_02_4, DCT_CONST_BITS);
+ const __m128i out_02_7 = _mm_srai_epi32(out_02_5, DCT_CONST_BITS);
+ const __m128i out_18_6 = _mm_srai_epi32(out_18_4, DCT_CONST_BITS);
+ const __m128i out_18_7 = _mm_srai_epi32(out_18_5, DCT_CONST_BITS);
+ const __m128i out_10_6 = _mm_srai_epi32(out_10_4, DCT_CONST_BITS);
+ const __m128i out_10_7 = _mm_srai_epi32(out_10_5, DCT_CONST_BITS);
+ const __m128i out_26_6 = _mm_srai_epi32(out_26_4, DCT_CONST_BITS);
+ const __m128i out_26_7 = _mm_srai_epi32(out_26_5, DCT_CONST_BITS);
+ const __m128i out_06_6 = _mm_srai_epi32(out_06_4, DCT_CONST_BITS);
+ const __m128i out_06_7 = _mm_srai_epi32(out_06_5, DCT_CONST_BITS);
+ const __m128i out_22_6 = _mm_srai_epi32(out_22_4, DCT_CONST_BITS);
+ const __m128i out_22_7 = _mm_srai_epi32(out_22_5, DCT_CONST_BITS);
+ const __m128i out_14_6 = _mm_srai_epi32(out_14_4, DCT_CONST_BITS);
+ const __m128i out_14_7 = _mm_srai_epi32(out_14_5, DCT_CONST_BITS);
+ const __m128i out_30_6 = _mm_srai_epi32(out_30_4, DCT_CONST_BITS);
+ const __m128i out_30_7 = _mm_srai_epi32(out_30_5, DCT_CONST_BITS);
+ // Combine
+ out[2] = _mm_packs_epi32(out_02_6, out_02_7);
+ out[18] = _mm_packs_epi32(out_18_6, out_18_7);
+ out[10] = _mm_packs_epi32(out_10_6, out_10_7);
+ out[26] = _mm_packs_epi32(out_26_6, out_26_7);
+ out[6] = _mm_packs_epi32(out_06_6, out_06_7);
+ out[22] = _mm_packs_epi32(out_22_6, out_22_7);
+ out[14] = _mm_packs_epi32(out_14_6, out_14_7);
+ out[30] = _mm_packs_epi32(out_30_6, out_30_7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x8(&out[2], &out[18], &out[10], &out[26],
+ &out[6], &out[22], &out[14], &out[30]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ step1[16] = ADD_EPI16(step3[17], step2[16]);
+ step1[17] = SUB_EPI16(step2[16], step3[17]);
+ step1[18] = SUB_EPI16(step2[19], step3[18]);
+ step1[19] = ADD_EPI16(step3[18], step2[19]);
+ step1[20] = ADD_EPI16(step3[21], step2[20]);
+ step1[21] = SUB_EPI16(step2[20], step3[21]);
+ step1[22] = SUB_EPI16(step2[23], step3[22]);
+ step1[23] = ADD_EPI16(step3[22], step2[23]);
+ step1[24] = ADD_EPI16(step3[25], step2[24]);
+ step1[25] = SUB_EPI16(step2[24], step3[25]);
+ step1[26] = SUB_EPI16(step2[27], step3[26]);
+ step1[27] = ADD_EPI16(step3[26], step2[27]);
+ step1[28] = ADD_EPI16(step3[29], step2[28]);
+ step1[29] = SUB_EPI16(step2[28], step3[29]);
+ step1[30] = SUB_EPI16(step2[31], step3[30]);
+ step1[31] = ADD_EPI16(step3[30], step2[31]);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x16(
+ &step1[16], &step1[17], &step1[18], &step1[19], &step1[20],
+ &step1[21], &step1[22], &step1[23], &step1[24], &step1[25],
+ &step1[26], &step1[27], &step1[28], &step1[29], &step1[30],
+ &step1[31]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ // Final stage --- outputs indices are bit-reversed.
+ {
+ const __m128i out_01_0 = _mm_unpacklo_epi16(step1[16], step1[31]);
+ const __m128i out_01_1 = _mm_unpackhi_epi16(step1[16], step1[31]);
+ const __m128i out_17_0 = _mm_unpacklo_epi16(step1[17], step1[30]);
+ const __m128i out_17_1 = _mm_unpackhi_epi16(step1[17], step1[30]);
+ const __m128i out_09_0 = _mm_unpacklo_epi16(step1[18], step1[29]);
+ const __m128i out_09_1 = _mm_unpackhi_epi16(step1[18], step1[29]);
+ const __m128i out_25_0 = _mm_unpacklo_epi16(step1[19], step1[28]);
+ const __m128i out_25_1 = _mm_unpackhi_epi16(step1[19], step1[28]);
+ const __m128i out_01_2 = _mm_madd_epi16(out_01_0, k__cospi_p31_p01);
+ const __m128i out_01_3 = _mm_madd_epi16(out_01_1, k__cospi_p31_p01);
+ const __m128i out_17_2 = _mm_madd_epi16(out_17_0, k__cospi_p15_p17);
+ const __m128i out_17_3 = _mm_madd_epi16(out_17_1, k__cospi_p15_p17);
+ const __m128i out_09_2 = _mm_madd_epi16(out_09_0, k__cospi_p23_p09);
+ const __m128i out_09_3 = _mm_madd_epi16(out_09_1, k__cospi_p23_p09);
+ const __m128i out_25_2 = _mm_madd_epi16(out_25_0, k__cospi_p07_p25);
+ const __m128i out_25_3 = _mm_madd_epi16(out_25_1, k__cospi_p07_p25);
+ const __m128i out_07_2 = _mm_madd_epi16(out_25_0, k__cospi_m25_p07);
+ const __m128i out_07_3 = _mm_madd_epi16(out_25_1, k__cospi_m25_p07);
+ const __m128i out_23_2 = _mm_madd_epi16(out_09_0, k__cospi_m09_p23);
+ const __m128i out_23_3 = _mm_madd_epi16(out_09_1, k__cospi_m09_p23);
+ const __m128i out_15_2 = _mm_madd_epi16(out_17_0, k__cospi_m17_p15);
+ const __m128i out_15_3 = _mm_madd_epi16(out_17_1, k__cospi_m17_p15);
+ const __m128i out_31_2 = _mm_madd_epi16(out_01_0, k__cospi_m01_p31);
+ const __m128i out_31_3 = _mm_madd_epi16(out_01_1, k__cospi_m01_p31);
+ // dct_const_round_shift
+ const __m128i out_01_4 =
+ _mm_add_epi32(out_01_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_01_5 =
+ _mm_add_epi32(out_01_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_17_4 =
+ _mm_add_epi32(out_17_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_17_5 =
+ _mm_add_epi32(out_17_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_09_4 =
+ _mm_add_epi32(out_09_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_09_5 =
+ _mm_add_epi32(out_09_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_25_4 =
+ _mm_add_epi32(out_25_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_25_5 =
+ _mm_add_epi32(out_25_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_07_4 =
+ _mm_add_epi32(out_07_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_07_5 =
+ _mm_add_epi32(out_07_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_23_4 =
+ _mm_add_epi32(out_23_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_23_5 =
+ _mm_add_epi32(out_23_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_15_4 =
+ _mm_add_epi32(out_15_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_15_5 =
+ _mm_add_epi32(out_15_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_31_4 =
+ _mm_add_epi32(out_31_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_31_5 =
+ _mm_add_epi32(out_31_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_01_6 = _mm_srai_epi32(out_01_4, DCT_CONST_BITS);
+ const __m128i out_01_7 = _mm_srai_epi32(out_01_5, DCT_CONST_BITS);
+ const __m128i out_17_6 = _mm_srai_epi32(out_17_4, DCT_CONST_BITS);
+ const __m128i out_17_7 = _mm_srai_epi32(out_17_5, DCT_CONST_BITS);
+ const __m128i out_09_6 = _mm_srai_epi32(out_09_4, DCT_CONST_BITS);
+ const __m128i out_09_7 = _mm_srai_epi32(out_09_5, DCT_CONST_BITS);
+ const __m128i out_25_6 = _mm_srai_epi32(out_25_4, DCT_CONST_BITS);
+ const __m128i out_25_7 = _mm_srai_epi32(out_25_5, DCT_CONST_BITS);
+ const __m128i out_07_6 = _mm_srai_epi32(out_07_4, DCT_CONST_BITS);
+ const __m128i out_07_7 = _mm_srai_epi32(out_07_5, DCT_CONST_BITS);
+ const __m128i out_23_6 = _mm_srai_epi32(out_23_4, DCT_CONST_BITS);
+ const __m128i out_23_7 = _mm_srai_epi32(out_23_5, DCT_CONST_BITS);
+ const __m128i out_15_6 = _mm_srai_epi32(out_15_4, DCT_CONST_BITS);
+ const __m128i out_15_7 = _mm_srai_epi32(out_15_5, DCT_CONST_BITS);
+ const __m128i out_31_6 = _mm_srai_epi32(out_31_4, DCT_CONST_BITS);
+ const __m128i out_31_7 = _mm_srai_epi32(out_31_5, DCT_CONST_BITS);
+ // Combine
+ out[1] = _mm_packs_epi32(out_01_6, out_01_7);
+ out[17] = _mm_packs_epi32(out_17_6, out_17_7);
+ out[9] = _mm_packs_epi32(out_09_6, out_09_7);
+ out[25] = _mm_packs_epi32(out_25_6, out_25_7);
+ out[7] = _mm_packs_epi32(out_07_6, out_07_7);
+ out[23] = _mm_packs_epi32(out_23_6, out_23_7);
+ out[15] = _mm_packs_epi32(out_15_6, out_15_7);
+ out[31] = _mm_packs_epi32(out_31_6, out_31_7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x8(&out[1], &out[17], &out[9], &out[25],
+ &out[7], &out[23], &out[15], &out[31]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ const __m128i out_05_0 = _mm_unpacklo_epi16(step1[20], step1[27]);
+ const __m128i out_05_1 = _mm_unpackhi_epi16(step1[20], step1[27]);
+ const __m128i out_21_0 = _mm_unpacklo_epi16(step1[21], step1[26]);
+ const __m128i out_21_1 = _mm_unpackhi_epi16(step1[21], step1[26]);
+ const __m128i out_13_0 = _mm_unpacklo_epi16(step1[22], step1[25]);
+ const __m128i out_13_1 = _mm_unpackhi_epi16(step1[22], step1[25]);
+ const __m128i out_29_0 = _mm_unpacklo_epi16(step1[23], step1[24]);
+ const __m128i out_29_1 = _mm_unpackhi_epi16(step1[23], step1[24]);
+ const __m128i out_05_2 = _mm_madd_epi16(out_05_0, k__cospi_p27_p05);
+ const __m128i out_05_3 = _mm_madd_epi16(out_05_1, k__cospi_p27_p05);
+ const __m128i out_21_2 = _mm_madd_epi16(out_21_0, k__cospi_p11_p21);
+ const __m128i out_21_3 = _mm_madd_epi16(out_21_1, k__cospi_p11_p21);
+ const __m128i out_13_2 = _mm_madd_epi16(out_13_0, k__cospi_p19_p13);
+ const __m128i out_13_3 = _mm_madd_epi16(out_13_1, k__cospi_p19_p13);
+ const __m128i out_29_2 = _mm_madd_epi16(out_29_0, k__cospi_p03_p29);
+ const __m128i out_29_3 = _mm_madd_epi16(out_29_1, k__cospi_p03_p29);
+ const __m128i out_03_2 = _mm_madd_epi16(out_29_0, k__cospi_m29_p03);
+ const __m128i out_03_3 = _mm_madd_epi16(out_29_1, k__cospi_m29_p03);
+ const __m128i out_19_2 = _mm_madd_epi16(out_13_0, k__cospi_m13_p19);
+ const __m128i out_19_3 = _mm_madd_epi16(out_13_1, k__cospi_m13_p19);
+ const __m128i out_11_2 = _mm_madd_epi16(out_21_0, k__cospi_m21_p11);
+ const __m128i out_11_3 = _mm_madd_epi16(out_21_1, k__cospi_m21_p11);
+ const __m128i out_27_2 = _mm_madd_epi16(out_05_0, k__cospi_m05_p27);
+ const __m128i out_27_3 = _mm_madd_epi16(out_05_1, k__cospi_m05_p27);
+ // dct_const_round_shift
+ const __m128i out_05_4 =
+ _mm_add_epi32(out_05_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_05_5 =
+ _mm_add_epi32(out_05_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_21_4 =
+ _mm_add_epi32(out_21_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_21_5 =
+ _mm_add_epi32(out_21_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_13_4 =
+ _mm_add_epi32(out_13_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_13_5 =
+ _mm_add_epi32(out_13_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_29_4 =
+ _mm_add_epi32(out_29_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_29_5 =
+ _mm_add_epi32(out_29_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_03_4 =
+ _mm_add_epi32(out_03_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_03_5 =
+ _mm_add_epi32(out_03_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_19_4 =
+ _mm_add_epi32(out_19_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_19_5 =
+ _mm_add_epi32(out_19_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_11_4 =
+ _mm_add_epi32(out_11_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_11_5 =
+ _mm_add_epi32(out_11_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_27_4 =
+ _mm_add_epi32(out_27_2, k__DCT_CONST_ROUNDING);
+ const __m128i out_27_5 =
+ _mm_add_epi32(out_27_3, k__DCT_CONST_ROUNDING);
+ const __m128i out_05_6 = _mm_srai_epi32(out_05_4, DCT_CONST_BITS);
+ const __m128i out_05_7 = _mm_srai_epi32(out_05_5, DCT_CONST_BITS);
+ const __m128i out_21_6 = _mm_srai_epi32(out_21_4, DCT_CONST_BITS);
+ const __m128i out_21_7 = _mm_srai_epi32(out_21_5, DCT_CONST_BITS);
+ const __m128i out_13_6 = _mm_srai_epi32(out_13_4, DCT_CONST_BITS);
+ const __m128i out_13_7 = _mm_srai_epi32(out_13_5, DCT_CONST_BITS);
+ const __m128i out_29_6 = _mm_srai_epi32(out_29_4, DCT_CONST_BITS);
+ const __m128i out_29_7 = _mm_srai_epi32(out_29_5, DCT_CONST_BITS);
+ const __m128i out_03_6 = _mm_srai_epi32(out_03_4, DCT_CONST_BITS);
+ const __m128i out_03_7 = _mm_srai_epi32(out_03_5, DCT_CONST_BITS);
+ const __m128i out_19_6 = _mm_srai_epi32(out_19_4, DCT_CONST_BITS);
+ const __m128i out_19_7 = _mm_srai_epi32(out_19_5, DCT_CONST_BITS);
+ const __m128i out_11_6 = _mm_srai_epi32(out_11_4, DCT_CONST_BITS);
+ const __m128i out_11_7 = _mm_srai_epi32(out_11_5, DCT_CONST_BITS);
+ const __m128i out_27_6 = _mm_srai_epi32(out_27_4, DCT_CONST_BITS);
+ const __m128i out_27_7 = _mm_srai_epi32(out_27_5, DCT_CONST_BITS);
+ // Combine
+ out[5] = _mm_packs_epi32(out_05_6, out_05_7);
+ out[21] = _mm_packs_epi32(out_21_6, out_21_7);
+ out[13] = _mm_packs_epi32(out_13_6, out_13_7);
+ out[29] = _mm_packs_epi32(out_29_6, out_29_7);
+ out[3] = _mm_packs_epi32(out_03_6, out_03_7);
+ out[19] = _mm_packs_epi32(out_19_6, out_19_7);
+ out[11] = _mm_packs_epi32(out_11_6, out_11_7);
+ out[27] = _mm_packs_epi32(out_27_6, out_27_7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x8(&out[5], &out[21], &out[13], &out[29],
+ &out[3], &out[19], &out[11], &out[27]);
+ if (overflow) {
+ if (pass == 0)
+ HIGH_FDCT32x32_2D_C(input, output_org, stride);
+ else
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+#if FDCT32x32_HIGH_PRECISION
+ } else {
+ __m128i lstep1[64], lstep2[64], lstep3[64];
+ __m128i u[32], v[32], sign[16];
+ const __m128i K32One = _mm_set_epi32(1, 1, 1, 1);
+ // start using 32-bit operations
+ // stage 3
+ {
+ // expanding to 32-bit length priori to addition operations
+ lstep2[0] = _mm_unpacklo_epi16(step2[0], kZero);
+ lstep2[1] = _mm_unpackhi_epi16(step2[0], kZero);
+ lstep2[2] = _mm_unpacklo_epi16(step2[1], kZero);
+ lstep2[3] = _mm_unpackhi_epi16(step2[1], kZero);
+ lstep2[4] = _mm_unpacklo_epi16(step2[2], kZero);
+ lstep2[5] = _mm_unpackhi_epi16(step2[2], kZero);
+ lstep2[6] = _mm_unpacklo_epi16(step2[3], kZero);
+ lstep2[7] = _mm_unpackhi_epi16(step2[3], kZero);
+ lstep2[8] = _mm_unpacklo_epi16(step2[4], kZero);
+ lstep2[9] = _mm_unpackhi_epi16(step2[4], kZero);
+ lstep2[10] = _mm_unpacklo_epi16(step2[5], kZero);
+ lstep2[11] = _mm_unpackhi_epi16(step2[5], kZero);
+ lstep2[12] = _mm_unpacklo_epi16(step2[6], kZero);
+ lstep2[13] = _mm_unpackhi_epi16(step2[6], kZero);
+ lstep2[14] = _mm_unpacklo_epi16(step2[7], kZero);
+ lstep2[15] = _mm_unpackhi_epi16(step2[7], kZero);
+ lstep2[0] = _mm_madd_epi16(lstep2[0], kOne);
+ lstep2[1] = _mm_madd_epi16(lstep2[1], kOne);
+ lstep2[2] = _mm_madd_epi16(lstep2[2], kOne);
+ lstep2[3] = _mm_madd_epi16(lstep2[3], kOne);
+ lstep2[4] = _mm_madd_epi16(lstep2[4], kOne);
+ lstep2[5] = _mm_madd_epi16(lstep2[5], kOne);
+ lstep2[6] = _mm_madd_epi16(lstep2[6], kOne);
+ lstep2[7] = _mm_madd_epi16(lstep2[7], kOne);
+ lstep2[8] = _mm_madd_epi16(lstep2[8], kOne);
+ lstep2[9] = _mm_madd_epi16(lstep2[9], kOne);
+ lstep2[10] = _mm_madd_epi16(lstep2[10], kOne);
+ lstep2[11] = _mm_madd_epi16(lstep2[11], kOne);
+ lstep2[12] = _mm_madd_epi16(lstep2[12], kOne);
+ lstep2[13] = _mm_madd_epi16(lstep2[13], kOne);
+ lstep2[14] = _mm_madd_epi16(lstep2[14], kOne);
+ lstep2[15] = _mm_madd_epi16(lstep2[15], kOne);
+
+ lstep3[0] = _mm_add_epi32(lstep2[14], lstep2[0]);
+ lstep3[1] = _mm_add_epi32(lstep2[15], lstep2[1]);
+ lstep3[2] = _mm_add_epi32(lstep2[12], lstep2[2]);
+ lstep3[3] = _mm_add_epi32(lstep2[13], lstep2[3]);
+ lstep3[4] = _mm_add_epi32(lstep2[10], lstep2[4]);
+ lstep3[5] = _mm_add_epi32(lstep2[11], lstep2[5]);
+ lstep3[6] = _mm_add_epi32(lstep2[8], lstep2[6]);
+ lstep3[7] = _mm_add_epi32(lstep2[9], lstep2[7]);
+ lstep3[8] = _mm_sub_epi32(lstep2[6], lstep2[8]);
+ lstep3[9] = _mm_sub_epi32(lstep2[7], lstep2[9]);
+ lstep3[10] = _mm_sub_epi32(lstep2[4], lstep2[10]);
+ lstep3[11] = _mm_sub_epi32(lstep2[5], lstep2[11]);
+ lstep3[12] = _mm_sub_epi32(lstep2[2], lstep2[12]);
+ lstep3[13] = _mm_sub_epi32(lstep2[3], lstep2[13]);
+ lstep3[14] = _mm_sub_epi32(lstep2[0], lstep2[14]);
+ lstep3[15] = _mm_sub_epi32(lstep2[1], lstep2[15]);
+ }
+ {
+ const __m128i s3_10_0 = _mm_unpacklo_epi16(step2[13], step2[10]);
+ const __m128i s3_10_1 = _mm_unpackhi_epi16(step2[13], step2[10]);
+ const __m128i s3_11_0 = _mm_unpacklo_epi16(step2[12], step2[11]);
+ const __m128i s3_11_1 = _mm_unpackhi_epi16(step2[12], step2[11]);
+ const __m128i s3_10_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_m16);
+ const __m128i s3_10_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_m16);
+ const __m128i s3_11_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_m16);
+ const __m128i s3_11_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_m16);
+ const __m128i s3_12_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_p16);
+ const __m128i s3_12_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_p16);
+ const __m128i s3_13_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_p16);
+ const __m128i s3_13_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_p16);
+ // dct_const_round_shift
+ const __m128i s3_10_4 = _mm_add_epi32(s3_10_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_10_5 = _mm_add_epi32(s3_10_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_11_4 = _mm_add_epi32(s3_11_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_11_5 = _mm_add_epi32(s3_11_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_12_4 = _mm_add_epi32(s3_12_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_12_5 = _mm_add_epi32(s3_12_3, k__DCT_CONST_ROUNDING);
+ const __m128i s3_13_4 = _mm_add_epi32(s3_13_2, k__DCT_CONST_ROUNDING);
+ const __m128i s3_13_5 = _mm_add_epi32(s3_13_3, k__DCT_CONST_ROUNDING);
+ lstep3[20] = _mm_srai_epi32(s3_10_4, DCT_CONST_BITS);
+ lstep3[21] = _mm_srai_epi32(s3_10_5, DCT_CONST_BITS);
+ lstep3[22] = _mm_srai_epi32(s3_11_4, DCT_CONST_BITS);
+ lstep3[23] = _mm_srai_epi32(s3_11_5, DCT_CONST_BITS);
+ lstep3[24] = _mm_srai_epi32(s3_12_4, DCT_CONST_BITS);
+ lstep3[25] = _mm_srai_epi32(s3_12_5, DCT_CONST_BITS);
+ lstep3[26] = _mm_srai_epi32(s3_13_4, DCT_CONST_BITS);
+ lstep3[27] = _mm_srai_epi32(s3_13_5, DCT_CONST_BITS);
+ }
+ {
+ lstep2[40] = _mm_unpacklo_epi16(step2[20], kZero);
+ lstep2[41] = _mm_unpackhi_epi16(step2[20], kZero);
+ lstep2[42] = _mm_unpacklo_epi16(step2[21], kZero);
+ lstep2[43] = _mm_unpackhi_epi16(step2[21], kZero);
+ lstep2[44] = _mm_unpacklo_epi16(step2[22], kZero);
+ lstep2[45] = _mm_unpackhi_epi16(step2[22], kZero);
+ lstep2[46] = _mm_unpacklo_epi16(step2[23], kZero);
+ lstep2[47] = _mm_unpackhi_epi16(step2[23], kZero);
+ lstep2[48] = _mm_unpacklo_epi16(step2[24], kZero);
+ lstep2[49] = _mm_unpackhi_epi16(step2[24], kZero);
+ lstep2[50] = _mm_unpacklo_epi16(step2[25], kZero);
+ lstep2[51] = _mm_unpackhi_epi16(step2[25], kZero);
+ lstep2[52] = _mm_unpacklo_epi16(step2[26], kZero);
+ lstep2[53] = _mm_unpackhi_epi16(step2[26], kZero);
+ lstep2[54] = _mm_unpacklo_epi16(step2[27], kZero);
+ lstep2[55] = _mm_unpackhi_epi16(step2[27], kZero);
+ lstep2[40] = _mm_madd_epi16(lstep2[40], kOne);
+ lstep2[41] = _mm_madd_epi16(lstep2[41], kOne);
+ lstep2[42] = _mm_madd_epi16(lstep2[42], kOne);
+ lstep2[43] = _mm_madd_epi16(lstep2[43], kOne);
+ lstep2[44] = _mm_madd_epi16(lstep2[44], kOne);
+ lstep2[45] = _mm_madd_epi16(lstep2[45], kOne);
+ lstep2[46] = _mm_madd_epi16(lstep2[46], kOne);
+ lstep2[47] = _mm_madd_epi16(lstep2[47], kOne);
+ lstep2[48] = _mm_madd_epi16(lstep2[48], kOne);
+ lstep2[49] = _mm_madd_epi16(lstep2[49], kOne);
+ lstep2[50] = _mm_madd_epi16(lstep2[50], kOne);
+ lstep2[51] = _mm_madd_epi16(lstep2[51], kOne);
+ lstep2[52] = _mm_madd_epi16(lstep2[52], kOne);
+ lstep2[53] = _mm_madd_epi16(lstep2[53], kOne);
+ lstep2[54] = _mm_madd_epi16(lstep2[54], kOne);
+ lstep2[55] = _mm_madd_epi16(lstep2[55], kOne);
+
+ lstep1[32] = _mm_unpacklo_epi16(step1[16], kZero);
+ lstep1[33] = _mm_unpackhi_epi16(step1[16], kZero);
+ lstep1[34] = _mm_unpacklo_epi16(step1[17], kZero);
+ lstep1[35] = _mm_unpackhi_epi16(step1[17], kZero);
+ lstep1[36] = _mm_unpacklo_epi16(step1[18], kZero);
+ lstep1[37] = _mm_unpackhi_epi16(step1[18], kZero);
+ lstep1[38] = _mm_unpacklo_epi16(step1[19], kZero);
+ lstep1[39] = _mm_unpackhi_epi16(step1[19], kZero);
+ lstep1[56] = _mm_unpacklo_epi16(step1[28], kZero);
+ lstep1[57] = _mm_unpackhi_epi16(step1[28], kZero);
+ lstep1[58] = _mm_unpacklo_epi16(step1[29], kZero);
+ lstep1[59] = _mm_unpackhi_epi16(step1[29], kZero);
+ lstep1[60] = _mm_unpacklo_epi16(step1[30], kZero);
+ lstep1[61] = _mm_unpackhi_epi16(step1[30], kZero);
+ lstep1[62] = _mm_unpacklo_epi16(step1[31], kZero);
+ lstep1[63] = _mm_unpackhi_epi16(step1[31], kZero);
+ lstep1[32] = _mm_madd_epi16(lstep1[32], kOne);
+ lstep1[33] = _mm_madd_epi16(lstep1[33], kOne);
+ lstep1[34] = _mm_madd_epi16(lstep1[34], kOne);
+ lstep1[35] = _mm_madd_epi16(lstep1[35], kOne);
+ lstep1[36] = _mm_madd_epi16(lstep1[36], kOne);
+ lstep1[37] = _mm_madd_epi16(lstep1[37], kOne);
+ lstep1[38] = _mm_madd_epi16(lstep1[38], kOne);
+ lstep1[39] = _mm_madd_epi16(lstep1[39], kOne);
+ lstep1[56] = _mm_madd_epi16(lstep1[56], kOne);
+ lstep1[57] = _mm_madd_epi16(lstep1[57], kOne);
+ lstep1[58] = _mm_madd_epi16(lstep1[58], kOne);
+ lstep1[59] = _mm_madd_epi16(lstep1[59], kOne);
+ lstep1[60] = _mm_madd_epi16(lstep1[60], kOne);
+ lstep1[61] = _mm_madd_epi16(lstep1[61], kOne);
+ lstep1[62] = _mm_madd_epi16(lstep1[62], kOne);
+ lstep1[63] = _mm_madd_epi16(lstep1[63], kOne);
+
+ lstep3[32] = _mm_add_epi32(lstep2[46], lstep1[32]);
+ lstep3[33] = _mm_add_epi32(lstep2[47], lstep1[33]);
+
+ lstep3[34] = _mm_add_epi32(lstep2[44], lstep1[34]);
+ lstep3[35] = _mm_add_epi32(lstep2[45], lstep1[35]);
+ lstep3[36] = _mm_add_epi32(lstep2[42], lstep1[36]);
+ lstep3[37] = _mm_add_epi32(lstep2[43], lstep1[37]);
+ lstep3[38] = _mm_add_epi32(lstep2[40], lstep1[38]);
+ lstep3[39] = _mm_add_epi32(lstep2[41], lstep1[39]);
+ lstep3[40] = _mm_sub_epi32(lstep1[38], lstep2[40]);
+ lstep3[41] = _mm_sub_epi32(lstep1[39], lstep2[41]);
+ lstep3[42] = _mm_sub_epi32(lstep1[36], lstep2[42]);
+ lstep3[43] = _mm_sub_epi32(lstep1[37], lstep2[43]);
+ lstep3[44] = _mm_sub_epi32(lstep1[34], lstep2[44]);
+ lstep3[45] = _mm_sub_epi32(lstep1[35], lstep2[45]);
+ lstep3[46] = _mm_sub_epi32(lstep1[32], lstep2[46]);
+ lstep3[47] = _mm_sub_epi32(lstep1[33], lstep2[47]);
+ lstep3[48] = _mm_sub_epi32(lstep1[62], lstep2[48]);
+ lstep3[49] = _mm_sub_epi32(lstep1[63], lstep2[49]);
+ lstep3[50] = _mm_sub_epi32(lstep1[60], lstep2[50]);
+ lstep3[51] = _mm_sub_epi32(lstep1[61], lstep2[51]);
+ lstep3[52] = _mm_sub_epi32(lstep1[58], lstep2[52]);
+ lstep3[53] = _mm_sub_epi32(lstep1[59], lstep2[53]);
+ lstep3[54] = _mm_sub_epi32(lstep1[56], lstep2[54]);
+ lstep3[55] = _mm_sub_epi32(lstep1[57], lstep2[55]);
+ lstep3[56] = _mm_add_epi32(lstep2[54], lstep1[56]);
+ lstep3[57] = _mm_add_epi32(lstep2[55], lstep1[57]);
+ lstep3[58] = _mm_add_epi32(lstep2[52], lstep1[58]);
+ lstep3[59] = _mm_add_epi32(lstep2[53], lstep1[59]);
+ lstep3[60] = _mm_add_epi32(lstep2[50], lstep1[60]);
+ lstep3[61] = _mm_add_epi32(lstep2[51], lstep1[61]);
+ lstep3[62] = _mm_add_epi32(lstep2[48], lstep1[62]);
+ lstep3[63] = _mm_add_epi32(lstep2[49], lstep1[63]);
+ }
+
+ // stage 4
+ {
+ // expanding to 32-bit length priori to addition operations
+ lstep2[16] = _mm_unpacklo_epi16(step2[8], kZero);
+ lstep2[17] = _mm_unpackhi_epi16(step2[8], kZero);
+ lstep2[18] = _mm_unpacklo_epi16(step2[9], kZero);
+ lstep2[19] = _mm_unpackhi_epi16(step2[9], kZero);
+ lstep2[28] = _mm_unpacklo_epi16(step2[14], kZero);
+ lstep2[29] = _mm_unpackhi_epi16(step2[14], kZero);
+ lstep2[30] = _mm_unpacklo_epi16(step2[15], kZero);
+ lstep2[31] = _mm_unpackhi_epi16(step2[15], kZero);
+ lstep2[16] = _mm_madd_epi16(lstep2[16], kOne);
+ lstep2[17] = _mm_madd_epi16(lstep2[17], kOne);
+ lstep2[18] = _mm_madd_epi16(lstep2[18], kOne);
+ lstep2[19] = _mm_madd_epi16(lstep2[19], kOne);
+ lstep2[28] = _mm_madd_epi16(lstep2[28], kOne);
+ lstep2[29] = _mm_madd_epi16(lstep2[29], kOne);
+ lstep2[30] = _mm_madd_epi16(lstep2[30], kOne);
+ lstep2[31] = _mm_madd_epi16(lstep2[31], kOne);
+
+ lstep1[0] = _mm_add_epi32(lstep3[6], lstep3[0]);
+ lstep1[1] = _mm_add_epi32(lstep3[7], lstep3[1]);
+ lstep1[2] = _mm_add_epi32(lstep3[4], lstep3[2]);
+ lstep1[3] = _mm_add_epi32(lstep3[5], lstep3[3]);
+ lstep1[4] = _mm_sub_epi32(lstep3[2], lstep3[4]);
+ lstep1[5] = _mm_sub_epi32(lstep3[3], lstep3[5]);
+ lstep1[6] = _mm_sub_epi32(lstep3[0], lstep3[6]);
+ lstep1[7] = _mm_sub_epi32(lstep3[1], lstep3[7]);
+ lstep1[16] = _mm_add_epi32(lstep3[22], lstep2[16]);
+ lstep1[17] = _mm_add_epi32(lstep3[23], lstep2[17]);
+ lstep1[18] = _mm_add_epi32(lstep3[20], lstep2[18]);
+ lstep1[19] = _mm_add_epi32(lstep3[21], lstep2[19]);
+ lstep1[20] = _mm_sub_epi32(lstep2[18], lstep3[20]);
+ lstep1[21] = _mm_sub_epi32(lstep2[19], lstep3[21]);
+ lstep1[22] = _mm_sub_epi32(lstep2[16], lstep3[22]);
+ lstep1[23] = _mm_sub_epi32(lstep2[17], lstep3[23]);
+ lstep1[24] = _mm_sub_epi32(lstep2[30], lstep3[24]);
+ lstep1[25] = _mm_sub_epi32(lstep2[31], lstep3[25]);
+ lstep1[26] = _mm_sub_epi32(lstep2[28], lstep3[26]);
+ lstep1[27] = _mm_sub_epi32(lstep2[29], lstep3[27]);
+ lstep1[28] = _mm_add_epi32(lstep3[26], lstep2[28]);
+ lstep1[29] = _mm_add_epi32(lstep3[27], lstep2[29]);
+ lstep1[30] = _mm_add_epi32(lstep3[24], lstep2[30]);
+ lstep1[31] = _mm_add_epi32(lstep3[25], lstep2[31]);
+ }
+ {
+ // to be continued...
+ //
+ const __m128i k32_p16_p16 = pair_set_epi32(cospi_16_64, cospi_16_64);
+ const __m128i k32_p16_m16 = pair_set_epi32(cospi_16_64, -cospi_16_64);
+
+ u[0] = _mm_unpacklo_epi32(lstep3[12], lstep3[10]);
+ u[1] = _mm_unpackhi_epi32(lstep3[12], lstep3[10]);
+ u[2] = _mm_unpacklo_epi32(lstep3[13], lstep3[11]);
+ u[3] = _mm_unpackhi_epi32(lstep3[13], lstep3[11]);
+
+ // TODO(jingning): manually inline k_madd_epi32_ to further hide
+ // instruction latency.
+ v[0] = k_madd_epi32(u[0], k32_p16_m16);
+ v[1] = k_madd_epi32(u[1], k32_p16_m16);
+ v[2] = k_madd_epi32(u[2], k32_p16_m16);
+ v[3] = k_madd_epi32(u[3], k32_p16_m16);
+ v[4] = k_madd_epi32(u[0], k32_p16_p16);
+ v[5] = k_madd_epi32(u[1], k32_p16_p16);
+ v[6] = k_madd_epi32(u[2], k32_p16_p16);
+ v[7] = k_madd_epi32(u[3], k32_p16_p16);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = k_check_epi32_overflow_8(&v[0], &v[1], &v[2], &v[3], &v[4],
+ &v[5], &v[6], &v[7], &kZero);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ u[0] = k_packs_epi64(v[0], v[1]);
+ u[1] = k_packs_epi64(v[2], v[3]);
+ u[2] = k_packs_epi64(v[4], v[5]);
+ u[3] = k_packs_epi64(v[6], v[7]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+
+ lstep1[10] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ lstep1[11] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ lstep1[12] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ lstep1[13] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+ }
+ {
+ const __m128i k32_m08_p24 = pair_set_epi32(-cospi_8_64, cospi_24_64);
+ const __m128i k32_m24_m08 = pair_set_epi32(-cospi_24_64, -cospi_8_64);
+ const __m128i k32_p24_p08 = pair_set_epi32(cospi_24_64, cospi_8_64);
+
+ u[0] = _mm_unpacklo_epi32(lstep3[36], lstep3[58]);
+ u[1] = _mm_unpackhi_epi32(lstep3[36], lstep3[58]);
+ u[2] = _mm_unpacklo_epi32(lstep3[37], lstep3[59]);
+ u[3] = _mm_unpackhi_epi32(lstep3[37], lstep3[59]);
+ u[4] = _mm_unpacklo_epi32(lstep3[38], lstep3[56]);
+ u[5] = _mm_unpackhi_epi32(lstep3[38], lstep3[56]);
+ u[6] = _mm_unpacklo_epi32(lstep3[39], lstep3[57]);
+ u[7] = _mm_unpackhi_epi32(lstep3[39], lstep3[57]);
+ u[8] = _mm_unpacklo_epi32(lstep3[40], lstep3[54]);
+ u[9] = _mm_unpackhi_epi32(lstep3[40], lstep3[54]);
+ u[10] = _mm_unpacklo_epi32(lstep3[41], lstep3[55]);
+ u[11] = _mm_unpackhi_epi32(lstep3[41], lstep3[55]);
+ u[12] = _mm_unpacklo_epi32(lstep3[42], lstep3[52]);
+ u[13] = _mm_unpackhi_epi32(lstep3[42], lstep3[52]);
+ u[14] = _mm_unpacklo_epi32(lstep3[43], lstep3[53]);
+ u[15] = _mm_unpackhi_epi32(lstep3[43], lstep3[53]);
+
+ v[0] = k_madd_epi32(u[0], k32_m08_p24);
+ v[1] = k_madd_epi32(u[1], k32_m08_p24);
+ v[2] = k_madd_epi32(u[2], k32_m08_p24);
+ v[3] = k_madd_epi32(u[3], k32_m08_p24);
+ v[4] = k_madd_epi32(u[4], k32_m08_p24);
+ v[5] = k_madd_epi32(u[5], k32_m08_p24);
+ v[6] = k_madd_epi32(u[6], k32_m08_p24);
+ v[7] = k_madd_epi32(u[7], k32_m08_p24);
+ v[8] = k_madd_epi32(u[8], k32_m24_m08);
+ v[9] = k_madd_epi32(u[9], k32_m24_m08);
+ v[10] = k_madd_epi32(u[10], k32_m24_m08);
+ v[11] = k_madd_epi32(u[11], k32_m24_m08);
+ v[12] = k_madd_epi32(u[12], k32_m24_m08);
+ v[13] = k_madd_epi32(u[13], k32_m24_m08);
+ v[14] = k_madd_epi32(u[14], k32_m24_m08);
+ v[15] = k_madd_epi32(u[15], k32_m24_m08);
+ v[16] = k_madd_epi32(u[12], k32_m08_p24);
+ v[17] = k_madd_epi32(u[13], k32_m08_p24);
+ v[18] = k_madd_epi32(u[14], k32_m08_p24);
+ v[19] = k_madd_epi32(u[15], k32_m08_p24);
+ v[20] = k_madd_epi32(u[8], k32_m08_p24);
+ v[21] = k_madd_epi32(u[9], k32_m08_p24);
+ v[22] = k_madd_epi32(u[10], k32_m08_p24);
+ v[23] = k_madd_epi32(u[11], k32_m08_p24);
+ v[24] = k_madd_epi32(u[4], k32_p24_p08);
+ v[25] = k_madd_epi32(u[5], k32_p24_p08);
+ v[26] = k_madd_epi32(u[6], k32_p24_p08);
+ v[27] = k_madd_epi32(u[7], k32_p24_p08);
+ v[28] = k_madd_epi32(u[0], k32_p24_p08);
+ v[29] = k_madd_epi32(u[1], k32_p24_p08);
+ v[30] = k_madd_epi32(u[2], k32_p24_p08);
+ v[31] = k_madd_epi32(u[3], k32_p24_p08);
+
+#if DCT_HIGH_BIT_DEPTH
+ overflow = k_check_epi32_overflow_32(
+ &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+ &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16],
+ &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24],
+ &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ u[0] = k_packs_epi64(v[0], v[1]);
+ u[1] = k_packs_epi64(v[2], v[3]);
+ u[2] = k_packs_epi64(v[4], v[5]);
+ u[3] = k_packs_epi64(v[6], v[7]);
+ u[4] = k_packs_epi64(v[8], v[9]);
+ u[5] = k_packs_epi64(v[10], v[11]);
+ u[6] = k_packs_epi64(v[12], v[13]);
+ u[7] = k_packs_epi64(v[14], v[15]);
+ u[8] = k_packs_epi64(v[16], v[17]);
+ u[9] = k_packs_epi64(v[18], v[19]);
+ u[10] = k_packs_epi64(v[20], v[21]);
+ u[11] = k_packs_epi64(v[22], v[23]);
+ u[12] = k_packs_epi64(v[24], v[25]);
+ u[13] = k_packs_epi64(v[26], v[27]);
+ u[14] = k_packs_epi64(v[28], v[29]);
+ u[15] = k_packs_epi64(v[30], v[31]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+ v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+ v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+ v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+ v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+ v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+ v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+ v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+ v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+ lstep1[36] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ lstep1[37] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ lstep1[38] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ lstep1[39] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+ lstep1[40] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+ lstep1[41] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+ lstep1[42] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+ lstep1[43] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+ lstep1[52] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+ lstep1[53] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+ lstep1[54] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+ lstep1[55] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+ lstep1[56] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+ lstep1[57] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+ lstep1[58] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+ lstep1[59] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+ }
+ // stage 5
+ {
+ lstep2[8] = _mm_add_epi32(lstep1[10], lstep3[8]);
+ lstep2[9] = _mm_add_epi32(lstep1[11], lstep3[9]);
+ lstep2[10] = _mm_sub_epi32(lstep3[8], lstep1[10]);
+ lstep2[11] = _mm_sub_epi32(lstep3[9], lstep1[11]);
+ lstep2[12] = _mm_sub_epi32(lstep3[14], lstep1[12]);
+ lstep2[13] = _mm_sub_epi32(lstep3[15], lstep1[13]);
+ lstep2[14] = _mm_add_epi32(lstep1[12], lstep3[14]);
+ lstep2[15] = _mm_add_epi32(lstep1[13], lstep3[15]);
+ }
+ {
+ const __m128i k32_p16_p16 = pair_set_epi32(cospi_16_64, cospi_16_64);
+ const __m128i k32_p16_m16 = pair_set_epi32(cospi_16_64, -cospi_16_64);
+ const __m128i k32_p24_p08 = pair_set_epi32(cospi_24_64, cospi_8_64);
+ const __m128i k32_m08_p24 = pair_set_epi32(-cospi_8_64, cospi_24_64);
+
+ u[0] = _mm_unpacklo_epi32(lstep1[0], lstep1[2]);
+ u[1] = _mm_unpackhi_epi32(lstep1[0], lstep1[2]);
+ u[2] = _mm_unpacklo_epi32(lstep1[1], lstep1[3]);
+ u[3] = _mm_unpackhi_epi32(lstep1[1], lstep1[3]);
+ u[4] = _mm_unpacklo_epi32(lstep1[4], lstep1[6]);
+ u[5] = _mm_unpackhi_epi32(lstep1[4], lstep1[6]);
+ u[6] = _mm_unpacklo_epi32(lstep1[5], lstep1[7]);
+ u[7] = _mm_unpackhi_epi32(lstep1[5], lstep1[7]);
+
+ // TODO(jingning): manually inline k_madd_epi32_ to further hide
+ // instruction latency.
+ v[0] = k_madd_epi32(u[0], k32_p16_p16);
+ v[1] = k_madd_epi32(u[1], k32_p16_p16);
+ v[2] = k_madd_epi32(u[2], k32_p16_p16);
+ v[3] = k_madd_epi32(u[3], k32_p16_p16);
+ v[4] = k_madd_epi32(u[0], k32_p16_m16);
+ v[5] = k_madd_epi32(u[1], k32_p16_m16);
+ v[6] = k_madd_epi32(u[2], k32_p16_m16);
+ v[7] = k_madd_epi32(u[3], k32_p16_m16);
+ v[8] = k_madd_epi32(u[4], k32_p24_p08);
+ v[9] = k_madd_epi32(u[5], k32_p24_p08);
+ v[10] = k_madd_epi32(u[6], k32_p24_p08);
+ v[11] = k_madd_epi32(u[7], k32_p24_p08);
+ v[12] = k_madd_epi32(u[4], k32_m08_p24);
+ v[13] = k_madd_epi32(u[5], k32_m08_p24);
+ v[14] = k_madd_epi32(u[6], k32_m08_p24);
+ v[15] = k_madd_epi32(u[7], k32_m08_p24);
+
+#if DCT_HIGH_BIT_DEPTH
+ overflow = k_check_epi32_overflow_16(
+ &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+ &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &kZero);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ u[0] = k_packs_epi64(v[0], v[1]);
+ u[1] = k_packs_epi64(v[2], v[3]);
+ u[2] = k_packs_epi64(v[4], v[5]);
+ u[3] = k_packs_epi64(v[6], v[7]);
+ u[4] = k_packs_epi64(v[8], v[9]);
+ u[5] = k_packs_epi64(v[10], v[11]);
+ u[6] = k_packs_epi64(v[12], v[13]);
+ u[7] = k_packs_epi64(v[14], v[15]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+
+ sign[0] = _mm_cmplt_epi32(u[0], kZero);
+ sign[1] = _mm_cmplt_epi32(u[1], kZero);
+ sign[2] = _mm_cmplt_epi32(u[2], kZero);
+ sign[3] = _mm_cmplt_epi32(u[3], kZero);
+ sign[4] = _mm_cmplt_epi32(u[4], kZero);
+ sign[5] = _mm_cmplt_epi32(u[5], kZero);
+ sign[6] = _mm_cmplt_epi32(u[6], kZero);
+ sign[7] = _mm_cmplt_epi32(u[7], kZero);
+
+ u[0] = _mm_sub_epi32(u[0], sign[0]);
+ u[1] = _mm_sub_epi32(u[1], sign[1]);
+ u[2] = _mm_sub_epi32(u[2], sign[2]);
+ u[3] = _mm_sub_epi32(u[3], sign[3]);
+ u[4] = _mm_sub_epi32(u[4], sign[4]);
+ u[5] = _mm_sub_epi32(u[5], sign[5]);
+ u[6] = _mm_sub_epi32(u[6], sign[6]);
+ u[7] = _mm_sub_epi32(u[7], sign[7]);
+
+ u[0] = _mm_add_epi32(u[0], K32One);
+ u[1] = _mm_add_epi32(u[1], K32One);
+ u[2] = _mm_add_epi32(u[2], K32One);
+ u[3] = _mm_add_epi32(u[3], K32One);
+ u[4] = _mm_add_epi32(u[4], K32One);
+ u[5] = _mm_add_epi32(u[5], K32One);
+ u[6] = _mm_add_epi32(u[6], K32One);
+ u[7] = _mm_add_epi32(u[7], K32One);
+
+ u[0] = _mm_srai_epi32(u[0], 2);
+ u[1] = _mm_srai_epi32(u[1], 2);
+ u[2] = _mm_srai_epi32(u[2], 2);
+ u[3] = _mm_srai_epi32(u[3], 2);
+ u[4] = _mm_srai_epi32(u[4], 2);
+ u[5] = _mm_srai_epi32(u[5], 2);
+ u[6] = _mm_srai_epi32(u[6], 2);
+ u[7] = _mm_srai_epi32(u[7], 2);
+
+ // Combine
+ out[0] = _mm_packs_epi32(u[0], u[1]);
+ out[16] = _mm_packs_epi32(u[2], u[3]);
+ out[8] = _mm_packs_epi32(u[4], u[5]);
+ out[24] = _mm_packs_epi32(u[6], u[7]);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x4(&out[0], &out[16], &out[8], &out[24]);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ const __m128i k32_m08_p24 = pair_set_epi32(-cospi_8_64, cospi_24_64);
+ const __m128i k32_m24_m08 = pair_set_epi32(-cospi_24_64, -cospi_8_64);
+ const __m128i k32_p24_p08 = pair_set_epi32(cospi_24_64, cospi_8_64);
+
+ u[0] = _mm_unpacklo_epi32(lstep1[18], lstep1[28]);
+ u[1] = _mm_unpackhi_epi32(lstep1[18], lstep1[28]);
+ u[2] = _mm_unpacklo_epi32(lstep1[19], lstep1[29]);
+ u[3] = _mm_unpackhi_epi32(lstep1[19], lstep1[29]);
+ u[4] = _mm_unpacklo_epi32(lstep1[20], lstep1[26]);
+ u[5] = _mm_unpackhi_epi32(lstep1[20], lstep1[26]);
+ u[6] = _mm_unpacklo_epi32(lstep1[21], lstep1[27]);
+ u[7] = _mm_unpackhi_epi32(lstep1[21], lstep1[27]);
+
+ v[0] = k_madd_epi32(u[0], k32_m08_p24);
+ v[1] = k_madd_epi32(u[1], k32_m08_p24);
+ v[2] = k_madd_epi32(u[2], k32_m08_p24);
+ v[3] = k_madd_epi32(u[3], k32_m08_p24);
+ v[4] = k_madd_epi32(u[4], k32_m24_m08);
+ v[5] = k_madd_epi32(u[5], k32_m24_m08);
+ v[6] = k_madd_epi32(u[6], k32_m24_m08);
+ v[7] = k_madd_epi32(u[7], k32_m24_m08);
+ v[8] = k_madd_epi32(u[4], k32_m08_p24);
+ v[9] = k_madd_epi32(u[5], k32_m08_p24);
+ v[10] = k_madd_epi32(u[6], k32_m08_p24);
+ v[11] = k_madd_epi32(u[7], k32_m08_p24);
+ v[12] = k_madd_epi32(u[0], k32_p24_p08);
+ v[13] = k_madd_epi32(u[1], k32_p24_p08);
+ v[14] = k_madd_epi32(u[2], k32_p24_p08);
+ v[15] = k_madd_epi32(u[3], k32_p24_p08);
+
+#if DCT_HIGH_BIT_DEPTH
+ overflow = k_check_epi32_overflow_16(
+ &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+ &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &kZero);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ u[0] = k_packs_epi64(v[0], v[1]);
+ u[1] = k_packs_epi64(v[2], v[3]);
+ u[2] = k_packs_epi64(v[4], v[5]);
+ u[3] = k_packs_epi64(v[6], v[7]);
+ u[4] = k_packs_epi64(v[8], v[9]);
+ u[5] = k_packs_epi64(v[10], v[11]);
+ u[6] = k_packs_epi64(v[12], v[13]);
+ u[7] = k_packs_epi64(v[14], v[15]);
+
+ u[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+
+ lstep2[18] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ lstep2[19] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ lstep2[20] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ lstep2[21] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ lstep2[26] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ lstep2[27] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ lstep2[28] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ lstep2[29] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+ }
+ {
+ lstep2[32] = _mm_add_epi32(lstep1[38], lstep3[32]);
+ lstep2[33] = _mm_add_epi32(lstep1[39], lstep3[33]);
+ lstep2[34] = _mm_add_epi32(lstep1[36], lstep3[34]);
+ lstep2[35] = _mm_add_epi32(lstep1[37], lstep3[35]);
+ lstep2[36] = _mm_sub_epi32(lstep3[34], lstep1[36]);
+ lstep2[37] = _mm_sub_epi32(lstep3[35], lstep1[37]);
+ lstep2[38] = _mm_sub_epi32(lstep3[32], lstep1[38]);
+ lstep2[39] = _mm_sub_epi32(lstep3[33], lstep1[39]);
+ lstep2[40] = _mm_sub_epi32(lstep3[46], lstep1[40]);
+ lstep2[41] = _mm_sub_epi32(lstep3[47], lstep1[41]);
+ lstep2[42] = _mm_sub_epi32(lstep3[44], lstep1[42]);
+ lstep2[43] = _mm_sub_epi32(lstep3[45], lstep1[43]);
+ lstep2[44] = _mm_add_epi32(lstep1[42], lstep3[44]);
+ lstep2[45] = _mm_add_epi32(lstep1[43], lstep3[45]);
+ lstep2[46] = _mm_add_epi32(lstep1[40], lstep3[46]);
+ lstep2[47] = _mm_add_epi32(lstep1[41], lstep3[47]);
+ lstep2[48] = _mm_add_epi32(lstep1[54], lstep3[48]);
+ lstep2[49] = _mm_add_epi32(lstep1[55], lstep3[49]);
+ lstep2[50] = _mm_add_epi32(lstep1[52], lstep3[50]);
+ lstep2[51] = _mm_add_epi32(lstep1[53], lstep3[51]);
+ lstep2[52] = _mm_sub_epi32(lstep3[50], lstep1[52]);
+ lstep2[53] = _mm_sub_epi32(lstep3[51], lstep1[53]);
+ lstep2[54] = _mm_sub_epi32(lstep3[48], lstep1[54]);
+ lstep2[55] = _mm_sub_epi32(lstep3[49], lstep1[55]);
+ lstep2[56] = _mm_sub_epi32(lstep3[62], lstep1[56]);
+ lstep2[57] = _mm_sub_epi32(lstep3[63], lstep1[57]);
+ lstep2[58] = _mm_sub_epi32(lstep3[60], lstep1[58]);
+ lstep2[59] = _mm_sub_epi32(lstep3[61], lstep1[59]);
+ lstep2[60] = _mm_add_epi32(lstep1[58], lstep3[60]);
+ lstep2[61] = _mm_add_epi32(lstep1[59], lstep3[61]);
+ lstep2[62] = _mm_add_epi32(lstep1[56], lstep3[62]);
+ lstep2[63] = _mm_add_epi32(lstep1[57], lstep3[63]);
+ }
+ // stage 6
+ {
+ const __m128i k32_p28_p04 = pair_set_epi32(cospi_28_64, cospi_4_64);
+ const __m128i k32_p12_p20 = pair_set_epi32(cospi_12_64, cospi_20_64);
+ const __m128i k32_m20_p12 = pair_set_epi32(-cospi_20_64, cospi_12_64);
+ const __m128i k32_m04_p28 = pair_set_epi32(-cospi_4_64, cospi_28_64);
+
+ u[0] = _mm_unpacklo_epi32(lstep2[8], lstep2[14]);
+ u[1] = _mm_unpackhi_epi32(lstep2[8], lstep2[14]);
+ u[2] = _mm_unpacklo_epi32(lstep2[9], lstep2[15]);
+ u[3] = _mm_unpackhi_epi32(lstep2[9], lstep2[15]);
+ u[4] = _mm_unpacklo_epi32(lstep2[10], lstep2[12]);
+ u[5] = _mm_unpackhi_epi32(lstep2[10], lstep2[12]);
+ u[6] = _mm_unpacklo_epi32(lstep2[11], lstep2[13]);
+ u[7] = _mm_unpackhi_epi32(lstep2[11], lstep2[13]);
+ u[8] = _mm_unpacklo_epi32(lstep2[10], lstep2[12]);
+ u[9] = _mm_unpackhi_epi32(lstep2[10], lstep2[12]);
+ u[10] = _mm_unpacklo_epi32(lstep2[11], lstep2[13]);
+ u[11] = _mm_unpackhi_epi32(lstep2[11], lstep2[13]);
+ u[12] = _mm_unpacklo_epi32(lstep2[8], lstep2[14]);
+ u[13] = _mm_unpackhi_epi32(lstep2[8], lstep2[14]);
+ u[14] = _mm_unpacklo_epi32(lstep2[9], lstep2[15]);
+ u[15] = _mm_unpackhi_epi32(lstep2[9], lstep2[15]);
+
+ v[0] = k_madd_epi32(u[0], k32_p28_p04);
+ v[1] = k_madd_epi32(u[1], k32_p28_p04);
+ v[2] = k_madd_epi32(u[2], k32_p28_p04);
+ v[3] = k_madd_epi32(u[3], k32_p28_p04);
+ v[4] = k_madd_epi32(u[4], k32_p12_p20);
+ v[5] = k_madd_epi32(u[5], k32_p12_p20);
+ v[6] = k_madd_epi32(u[6], k32_p12_p20);
+ v[7] = k_madd_epi32(u[7], k32_p12_p20);
+ v[8] = k_madd_epi32(u[8], k32_m20_p12);
+ v[9] = k_madd_epi32(u[9], k32_m20_p12);
+ v[10] = k_madd_epi32(u[10], k32_m20_p12);
+ v[11] = k_madd_epi32(u[11], k32_m20_p12);
+ v[12] = k_madd_epi32(u[12], k32_m04_p28);
+ v[13] = k_madd_epi32(u[13], k32_m04_p28);
+ v[14] = k_madd_epi32(u[14], k32_m04_p28);
+ v[15] = k_madd_epi32(u[15], k32_m04_p28);
+
+#if DCT_HIGH_BIT_DEPTH
+ overflow = k_check_epi32_overflow_16(
+ &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+ &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &kZero);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ u[0] = k_packs_epi64(v[0], v[1]);
+ u[1] = k_packs_epi64(v[2], v[3]);
+ u[2] = k_packs_epi64(v[4], v[5]);
+ u[3] = k_packs_epi64(v[6], v[7]);
+ u[4] = k_packs_epi64(v[8], v[9]);
+ u[5] = k_packs_epi64(v[10], v[11]);
+ u[6] = k_packs_epi64(v[12], v[13]);
+ u[7] = k_packs_epi64(v[14], v[15]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+
+ sign[0] = _mm_cmplt_epi32(u[0], kZero);
+ sign[1] = _mm_cmplt_epi32(u[1], kZero);
+ sign[2] = _mm_cmplt_epi32(u[2], kZero);
+ sign[3] = _mm_cmplt_epi32(u[3], kZero);
+ sign[4] = _mm_cmplt_epi32(u[4], kZero);
+ sign[5] = _mm_cmplt_epi32(u[5], kZero);
+ sign[6] = _mm_cmplt_epi32(u[6], kZero);
+ sign[7] = _mm_cmplt_epi32(u[7], kZero);
+
+ u[0] = _mm_sub_epi32(u[0], sign[0]);
+ u[1] = _mm_sub_epi32(u[1], sign[1]);
+ u[2] = _mm_sub_epi32(u[2], sign[2]);
+ u[3] = _mm_sub_epi32(u[3], sign[3]);
+ u[4] = _mm_sub_epi32(u[4], sign[4]);
+ u[5] = _mm_sub_epi32(u[5], sign[5]);
+ u[6] = _mm_sub_epi32(u[6], sign[6]);
+ u[7] = _mm_sub_epi32(u[7], sign[7]);
+
+ u[0] = _mm_add_epi32(u[0], K32One);
+ u[1] = _mm_add_epi32(u[1], K32One);
+ u[2] = _mm_add_epi32(u[2], K32One);
+ u[3] = _mm_add_epi32(u[3], K32One);
+ u[4] = _mm_add_epi32(u[4], K32One);
+ u[5] = _mm_add_epi32(u[5], K32One);
+ u[6] = _mm_add_epi32(u[6], K32One);
+ u[7] = _mm_add_epi32(u[7], K32One);
+
+ u[0] = _mm_srai_epi32(u[0], 2);
+ u[1] = _mm_srai_epi32(u[1], 2);
+ u[2] = _mm_srai_epi32(u[2], 2);
+ u[3] = _mm_srai_epi32(u[3], 2);
+ u[4] = _mm_srai_epi32(u[4], 2);
+ u[5] = _mm_srai_epi32(u[5], 2);
+ u[6] = _mm_srai_epi32(u[6], 2);
+ u[7] = _mm_srai_epi32(u[7], 2);
+
+ out[4] = _mm_packs_epi32(u[0], u[1]);
+ out[20] = _mm_packs_epi32(u[2], u[3]);
+ out[12] = _mm_packs_epi32(u[4], u[5]);
+ out[28] = _mm_packs_epi32(u[6], u[7]);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x4(&out[4], &out[20], &out[12], &out[28]);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ lstep3[16] = _mm_add_epi32(lstep2[18], lstep1[16]);
+ lstep3[17] = _mm_add_epi32(lstep2[19], lstep1[17]);
+ lstep3[18] = _mm_sub_epi32(lstep1[16], lstep2[18]);
+ lstep3[19] = _mm_sub_epi32(lstep1[17], lstep2[19]);
+ lstep3[20] = _mm_sub_epi32(lstep1[22], lstep2[20]);
+ lstep3[21] = _mm_sub_epi32(lstep1[23], lstep2[21]);
+ lstep3[22] = _mm_add_epi32(lstep2[20], lstep1[22]);
+ lstep3[23] = _mm_add_epi32(lstep2[21], lstep1[23]);
+ lstep3[24] = _mm_add_epi32(lstep2[26], lstep1[24]);
+ lstep3[25] = _mm_add_epi32(lstep2[27], lstep1[25]);
+ lstep3[26] = _mm_sub_epi32(lstep1[24], lstep2[26]);
+ lstep3[27] = _mm_sub_epi32(lstep1[25], lstep2[27]);
+ lstep3[28] = _mm_sub_epi32(lstep1[30], lstep2[28]);
+ lstep3[29] = _mm_sub_epi32(lstep1[31], lstep2[29]);
+ lstep3[30] = _mm_add_epi32(lstep2[28], lstep1[30]);
+ lstep3[31] = _mm_add_epi32(lstep2[29], lstep1[31]);
+ }
+ {
+ const __m128i k32_m04_p28 = pair_set_epi32(-cospi_4_64, cospi_28_64);
+ const __m128i k32_m28_m04 = pair_set_epi32(-cospi_28_64, -cospi_4_64);
+ const __m128i k32_m20_p12 = pair_set_epi32(-cospi_20_64, cospi_12_64);
+ const __m128i k32_m12_m20 =
+ pair_set_epi32(-cospi_12_64, -cospi_20_64);
+ const __m128i k32_p12_p20 = pair_set_epi32(cospi_12_64, cospi_20_64);
+ const __m128i k32_p28_p04 = pair_set_epi32(cospi_28_64, cospi_4_64);
+
+ u[0] = _mm_unpacklo_epi32(lstep2[34], lstep2[60]);
+ u[1] = _mm_unpackhi_epi32(lstep2[34], lstep2[60]);
+ u[2] = _mm_unpacklo_epi32(lstep2[35], lstep2[61]);
+ u[3] = _mm_unpackhi_epi32(lstep2[35], lstep2[61]);
+ u[4] = _mm_unpacklo_epi32(lstep2[36], lstep2[58]);
+ u[5] = _mm_unpackhi_epi32(lstep2[36], lstep2[58]);
+ u[6] = _mm_unpacklo_epi32(lstep2[37], lstep2[59]);
+ u[7] = _mm_unpackhi_epi32(lstep2[37], lstep2[59]);
+ u[8] = _mm_unpacklo_epi32(lstep2[42], lstep2[52]);
+ u[9] = _mm_unpackhi_epi32(lstep2[42], lstep2[52]);
+ u[10] = _mm_unpacklo_epi32(lstep2[43], lstep2[53]);
+ u[11] = _mm_unpackhi_epi32(lstep2[43], lstep2[53]);
+ u[12] = _mm_unpacklo_epi32(lstep2[44], lstep2[50]);
+ u[13] = _mm_unpackhi_epi32(lstep2[44], lstep2[50]);
+ u[14] = _mm_unpacklo_epi32(lstep2[45], lstep2[51]);
+ u[15] = _mm_unpackhi_epi32(lstep2[45], lstep2[51]);
+
+ v[0] = k_madd_epi32(u[0], k32_m04_p28);
+ v[1] = k_madd_epi32(u[1], k32_m04_p28);
+ v[2] = k_madd_epi32(u[2], k32_m04_p28);
+ v[3] = k_madd_epi32(u[3], k32_m04_p28);
+ v[4] = k_madd_epi32(u[4], k32_m28_m04);
+ v[5] = k_madd_epi32(u[5], k32_m28_m04);
+ v[6] = k_madd_epi32(u[6], k32_m28_m04);
+ v[7] = k_madd_epi32(u[7], k32_m28_m04);
+ v[8] = k_madd_epi32(u[8], k32_m20_p12);
+ v[9] = k_madd_epi32(u[9], k32_m20_p12);
+ v[10] = k_madd_epi32(u[10], k32_m20_p12);
+ v[11] = k_madd_epi32(u[11], k32_m20_p12);
+ v[12] = k_madd_epi32(u[12], k32_m12_m20);
+ v[13] = k_madd_epi32(u[13], k32_m12_m20);
+ v[14] = k_madd_epi32(u[14], k32_m12_m20);
+ v[15] = k_madd_epi32(u[15], k32_m12_m20);
+ v[16] = k_madd_epi32(u[12], k32_m20_p12);
+ v[17] = k_madd_epi32(u[13], k32_m20_p12);
+ v[18] = k_madd_epi32(u[14], k32_m20_p12);
+ v[19] = k_madd_epi32(u[15], k32_m20_p12);
+ v[20] = k_madd_epi32(u[8], k32_p12_p20);
+ v[21] = k_madd_epi32(u[9], k32_p12_p20);
+ v[22] = k_madd_epi32(u[10], k32_p12_p20);
+ v[23] = k_madd_epi32(u[11], k32_p12_p20);
+ v[24] = k_madd_epi32(u[4], k32_m04_p28);
+ v[25] = k_madd_epi32(u[5], k32_m04_p28);
+ v[26] = k_madd_epi32(u[6], k32_m04_p28);
+ v[27] = k_madd_epi32(u[7], k32_m04_p28);
+ v[28] = k_madd_epi32(u[0], k32_p28_p04);
+ v[29] = k_madd_epi32(u[1], k32_p28_p04);
+ v[30] = k_madd_epi32(u[2], k32_p28_p04);
+ v[31] = k_madd_epi32(u[3], k32_p28_p04);
+
+#if DCT_HIGH_BIT_DEPTH
+ overflow = k_check_epi32_overflow_32(
+ &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+ &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16],
+ &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24],
+ &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ u[0] = k_packs_epi64(v[0], v[1]);
+ u[1] = k_packs_epi64(v[2], v[3]);
+ u[2] = k_packs_epi64(v[4], v[5]);
+ u[3] = k_packs_epi64(v[6], v[7]);
+ u[4] = k_packs_epi64(v[8], v[9]);
+ u[5] = k_packs_epi64(v[10], v[11]);
+ u[6] = k_packs_epi64(v[12], v[13]);
+ u[7] = k_packs_epi64(v[14], v[15]);
+ u[8] = k_packs_epi64(v[16], v[17]);
+ u[9] = k_packs_epi64(v[18], v[19]);
+ u[10] = k_packs_epi64(v[20], v[21]);
+ u[11] = k_packs_epi64(v[22], v[23]);
+ u[12] = k_packs_epi64(v[24], v[25]);
+ u[13] = k_packs_epi64(v[26], v[27]);
+ u[14] = k_packs_epi64(v[28], v[29]);
+ u[15] = k_packs_epi64(v[30], v[31]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+ v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+ v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+ v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+ v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+ v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+ v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+ v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+ v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+ lstep3[34] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ lstep3[35] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ lstep3[36] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ lstep3[37] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+ lstep3[42] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+ lstep3[43] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+ lstep3[44] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+ lstep3[45] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+ lstep3[50] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+ lstep3[51] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+ lstep3[52] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+ lstep3[53] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+ lstep3[58] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+ lstep3[59] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+ lstep3[60] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+ lstep3[61] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+ }
+ // stage 7
+ {
+ const __m128i k32_p30_p02 = pair_set_epi32(cospi_30_64, cospi_2_64);
+ const __m128i k32_p14_p18 = pair_set_epi32(cospi_14_64, cospi_18_64);
+ const __m128i k32_p22_p10 = pair_set_epi32(cospi_22_64, cospi_10_64);
+ const __m128i k32_p06_p26 = pair_set_epi32(cospi_6_64, cospi_26_64);
+ const __m128i k32_m26_p06 = pair_set_epi32(-cospi_26_64, cospi_6_64);
+ const __m128i k32_m10_p22 = pair_set_epi32(-cospi_10_64, cospi_22_64);
+ const __m128i k32_m18_p14 = pair_set_epi32(-cospi_18_64, cospi_14_64);
+ const __m128i k32_m02_p30 = pair_set_epi32(-cospi_2_64, cospi_30_64);
+
+ u[0] = _mm_unpacklo_epi32(lstep3[16], lstep3[30]);
+ u[1] = _mm_unpackhi_epi32(lstep3[16], lstep3[30]);
+ u[2] = _mm_unpacklo_epi32(lstep3[17], lstep3[31]);
+ u[3] = _mm_unpackhi_epi32(lstep3[17], lstep3[31]);
+ u[4] = _mm_unpacklo_epi32(lstep3[18], lstep3[28]);
+ u[5] = _mm_unpackhi_epi32(lstep3[18], lstep3[28]);
+ u[6] = _mm_unpacklo_epi32(lstep3[19], lstep3[29]);
+ u[7] = _mm_unpackhi_epi32(lstep3[19], lstep3[29]);
+ u[8] = _mm_unpacklo_epi32(lstep3[20], lstep3[26]);
+ u[9] = _mm_unpackhi_epi32(lstep3[20], lstep3[26]);
+ u[10] = _mm_unpacklo_epi32(lstep3[21], lstep3[27]);
+ u[11] = _mm_unpackhi_epi32(lstep3[21], lstep3[27]);
+ u[12] = _mm_unpacklo_epi32(lstep3[22], lstep3[24]);
+ u[13] = _mm_unpackhi_epi32(lstep3[22], lstep3[24]);
+ u[14] = _mm_unpacklo_epi32(lstep3[23], lstep3[25]);
+ u[15] = _mm_unpackhi_epi32(lstep3[23], lstep3[25]);
+
+ v[0] = k_madd_epi32(u[0], k32_p30_p02);
+ v[1] = k_madd_epi32(u[1], k32_p30_p02);
+ v[2] = k_madd_epi32(u[2], k32_p30_p02);
+ v[3] = k_madd_epi32(u[3], k32_p30_p02);
+ v[4] = k_madd_epi32(u[4], k32_p14_p18);
+ v[5] = k_madd_epi32(u[5], k32_p14_p18);
+ v[6] = k_madd_epi32(u[6], k32_p14_p18);
+ v[7] = k_madd_epi32(u[7], k32_p14_p18);
+ v[8] = k_madd_epi32(u[8], k32_p22_p10);
+ v[9] = k_madd_epi32(u[9], k32_p22_p10);
+ v[10] = k_madd_epi32(u[10], k32_p22_p10);
+ v[11] = k_madd_epi32(u[11], k32_p22_p10);
+ v[12] = k_madd_epi32(u[12], k32_p06_p26);
+ v[13] = k_madd_epi32(u[13], k32_p06_p26);
+ v[14] = k_madd_epi32(u[14], k32_p06_p26);
+ v[15] = k_madd_epi32(u[15], k32_p06_p26);
+ v[16] = k_madd_epi32(u[12], k32_m26_p06);
+ v[17] = k_madd_epi32(u[13], k32_m26_p06);
+ v[18] = k_madd_epi32(u[14], k32_m26_p06);
+ v[19] = k_madd_epi32(u[15], k32_m26_p06);
+ v[20] = k_madd_epi32(u[8], k32_m10_p22);
+ v[21] = k_madd_epi32(u[9], k32_m10_p22);
+ v[22] = k_madd_epi32(u[10], k32_m10_p22);
+ v[23] = k_madd_epi32(u[11], k32_m10_p22);
+ v[24] = k_madd_epi32(u[4], k32_m18_p14);
+ v[25] = k_madd_epi32(u[5], k32_m18_p14);
+ v[26] = k_madd_epi32(u[6], k32_m18_p14);
+ v[27] = k_madd_epi32(u[7], k32_m18_p14);
+ v[28] = k_madd_epi32(u[0], k32_m02_p30);
+ v[29] = k_madd_epi32(u[1], k32_m02_p30);
+ v[30] = k_madd_epi32(u[2], k32_m02_p30);
+ v[31] = k_madd_epi32(u[3], k32_m02_p30);
+
+#if DCT_HIGH_BIT_DEPTH
+ overflow = k_check_epi32_overflow_32(
+ &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+ &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16],
+ &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24],
+ &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ u[0] = k_packs_epi64(v[0], v[1]);
+ u[1] = k_packs_epi64(v[2], v[3]);
+ u[2] = k_packs_epi64(v[4], v[5]);
+ u[3] = k_packs_epi64(v[6], v[7]);
+ u[4] = k_packs_epi64(v[8], v[9]);
+ u[5] = k_packs_epi64(v[10], v[11]);
+ u[6] = k_packs_epi64(v[12], v[13]);
+ u[7] = k_packs_epi64(v[14], v[15]);
+ u[8] = k_packs_epi64(v[16], v[17]);
+ u[9] = k_packs_epi64(v[18], v[19]);
+ u[10] = k_packs_epi64(v[20], v[21]);
+ u[11] = k_packs_epi64(v[22], v[23]);
+ u[12] = k_packs_epi64(v[24], v[25]);
+ u[13] = k_packs_epi64(v[26], v[27]);
+ u[14] = k_packs_epi64(v[28], v[29]);
+ u[15] = k_packs_epi64(v[30], v[31]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+ v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+ v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+ v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+ v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+ v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+ v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+ v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+ v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+ u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+ u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+ u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+ u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+ u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+ u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+ u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+ u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+
+ v[0] = _mm_cmplt_epi32(u[0], kZero);
+ v[1] = _mm_cmplt_epi32(u[1], kZero);
+ v[2] = _mm_cmplt_epi32(u[2], kZero);
+ v[3] = _mm_cmplt_epi32(u[3], kZero);
+ v[4] = _mm_cmplt_epi32(u[4], kZero);
+ v[5] = _mm_cmplt_epi32(u[5], kZero);
+ v[6] = _mm_cmplt_epi32(u[6], kZero);
+ v[7] = _mm_cmplt_epi32(u[7], kZero);
+ v[8] = _mm_cmplt_epi32(u[8], kZero);
+ v[9] = _mm_cmplt_epi32(u[9], kZero);
+ v[10] = _mm_cmplt_epi32(u[10], kZero);
+ v[11] = _mm_cmplt_epi32(u[11], kZero);
+ v[12] = _mm_cmplt_epi32(u[12], kZero);
+ v[13] = _mm_cmplt_epi32(u[13], kZero);
+ v[14] = _mm_cmplt_epi32(u[14], kZero);
+ v[15] = _mm_cmplt_epi32(u[15], kZero);
+
+ u[0] = _mm_sub_epi32(u[0], v[0]);
+ u[1] = _mm_sub_epi32(u[1], v[1]);
+ u[2] = _mm_sub_epi32(u[2], v[2]);
+ u[3] = _mm_sub_epi32(u[3], v[3]);
+ u[4] = _mm_sub_epi32(u[4], v[4]);
+ u[5] = _mm_sub_epi32(u[5], v[5]);
+ u[6] = _mm_sub_epi32(u[6], v[6]);
+ u[7] = _mm_sub_epi32(u[7], v[7]);
+ u[8] = _mm_sub_epi32(u[8], v[8]);
+ u[9] = _mm_sub_epi32(u[9], v[9]);
+ u[10] = _mm_sub_epi32(u[10], v[10]);
+ u[11] = _mm_sub_epi32(u[11], v[11]);
+ u[12] = _mm_sub_epi32(u[12], v[12]);
+ u[13] = _mm_sub_epi32(u[13], v[13]);
+ u[14] = _mm_sub_epi32(u[14], v[14]);
+ u[15] = _mm_sub_epi32(u[15], v[15]);
+
+ v[0] = _mm_add_epi32(u[0], K32One);
+ v[1] = _mm_add_epi32(u[1], K32One);
+ v[2] = _mm_add_epi32(u[2], K32One);
+ v[3] = _mm_add_epi32(u[3], K32One);
+ v[4] = _mm_add_epi32(u[4], K32One);
+ v[5] = _mm_add_epi32(u[5], K32One);
+ v[6] = _mm_add_epi32(u[6], K32One);
+ v[7] = _mm_add_epi32(u[7], K32One);
+ v[8] = _mm_add_epi32(u[8], K32One);
+ v[9] = _mm_add_epi32(u[9], K32One);
+ v[10] = _mm_add_epi32(u[10], K32One);
+ v[11] = _mm_add_epi32(u[11], K32One);
+ v[12] = _mm_add_epi32(u[12], K32One);
+ v[13] = _mm_add_epi32(u[13], K32One);
+ v[14] = _mm_add_epi32(u[14], K32One);
+ v[15] = _mm_add_epi32(u[15], K32One);
+
+ u[0] = _mm_srai_epi32(v[0], 2);
+ u[1] = _mm_srai_epi32(v[1], 2);
+ u[2] = _mm_srai_epi32(v[2], 2);
+ u[3] = _mm_srai_epi32(v[3], 2);
+ u[4] = _mm_srai_epi32(v[4], 2);
+ u[5] = _mm_srai_epi32(v[5], 2);
+ u[6] = _mm_srai_epi32(v[6], 2);
+ u[7] = _mm_srai_epi32(v[7], 2);
+ u[8] = _mm_srai_epi32(v[8], 2);
+ u[9] = _mm_srai_epi32(v[9], 2);
+ u[10] = _mm_srai_epi32(v[10], 2);
+ u[11] = _mm_srai_epi32(v[11], 2);
+ u[12] = _mm_srai_epi32(v[12], 2);
+ u[13] = _mm_srai_epi32(v[13], 2);
+ u[14] = _mm_srai_epi32(v[14], 2);
+ u[15] = _mm_srai_epi32(v[15], 2);
+
+ out[2] = _mm_packs_epi32(u[0], u[1]);
+ out[18] = _mm_packs_epi32(u[2], u[3]);
+ out[10] = _mm_packs_epi32(u[4], u[5]);
+ out[26] = _mm_packs_epi32(u[6], u[7]);
+ out[6] = _mm_packs_epi32(u[8], u[9]);
+ out[22] = _mm_packs_epi32(u[10], u[11]);
+ out[14] = _mm_packs_epi32(u[12], u[13]);
+ out[30] = _mm_packs_epi32(u[14], u[15]);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x8(&out[2], &out[18], &out[10], &out[26],
+ &out[6], &out[22], &out[14], &out[30]);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ lstep1[32] = _mm_add_epi32(lstep3[34], lstep2[32]);
+ lstep1[33] = _mm_add_epi32(lstep3[35], lstep2[33]);
+ lstep1[34] = _mm_sub_epi32(lstep2[32], lstep3[34]);
+ lstep1[35] = _mm_sub_epi32(lstep2[33], lstep3[35]);
+ lstep1[36] = _mm_sub_epi32(lstep2[38], lstep3[36]);
+ lstep1[37] = _mm_sub_epi32(lstep2[39], lstep3[37]);
+ lstep1[38] = _mm_add_epi32(lstep3[36], lstep2[38]);
+ lstep1[39] = _mm_add_epi32(lstep3[37], lstep2[39]);
+ lstep1[40] = _mm_add_epi32(lstep3[42], lstep2[40]);
+ lstep1[41] = _mm_add_epi32(lstep3[43], lstep2[41]);
+ lstep1[42] = _mm_sub_epi32(lstep2[40], lstep3[42]);
+ lstep1[43] = _mm_sub_epi32(lstep2[41], lstep3[43]);
+ lstep1[44] = _mm_sub_epi32(lstep2[46], lstep3[44]);
+ lstep1[45] = _mm_sub_epi32(lstep2[47], lstep3[45]);
+ lstep1[46] = _mm_add_epi32(lstep3[44], lstep2[46]);
+ lstep1[47] = _mm_add_epi32(lstep3[45], lstep2[47]);
+ lstep1[48] = _mm_add_epi32(lstep3[50], lstep2[48]);
+ lstep1[49] = _mm_add_epi32(lstep3[51], lstep2[49]);
+ lstep1[50] = _mm_sub_epi32(lstep2[48], lstep3[50]);
+ lstep1[51] = _mm_sub_epi32(lstep2[49], lstep3[51]);
+ lstep1[52] = _mm_sub_epi32(lstep2[54], lstep3[52]);
+ lstep1[53] = _mm_sub_epi32(lstep2[55], lstep3[53]);
+ lstep1[54] = _mm_add_epi32(lstep3[52], lstep2[54]);
+ lstep1[55] = _mm_add_epi32(lstep3[53], lstep2[55]);
+ lstep1[56] = _mm_add_epi32(lstep3[58], lstep2[56]);
+ lstep1[57] = _mm_add_epi32(lstep3[59], lstep2[57]);
+ lstep1[58] = _mm_sub_epi32(lstep2[56], lstep3[58]);
+ lstep1[59] = _mm_sub_epi32(lstep2[57], lstep3[59]);
+ lstep1[60] = _mm_sub_epi32(lstep2[62], lstep3[60]);
+ lstep1[61] = _mm_sub_epi32(lstep2[63], lstep3[61]);
+ lstep1[62] = _mm_add_epi32(lstep3[60], lstep2[62]);
+ lstep1[63] = _mm_add_epi32(lstep3[61], lstep2[63]);
+ }
+ // stage 8
+ {
+ const __m128i k32_p31_p01 = pair_set_epi32(cospi_31_64, cospi_1_64);
+ const __m128i k32_p15_p17 = pair_set_epi32(cospi_15_64, cospi_17_64);
+ const __m128i k32_p23_p09 = pair_set_epi32(cospi_23_64, cospi_9_64);
+ const __m128i k32_p07_p25 = pair_set_epi32(cospi_7_64, cospi_25_64);
+ const __m128i k32_m25_p07 = pair_set_epi32(-cospi_25_64, cospi_7_64);
+ const __m128i k32_m09_p23 = pair_set_epi32(-cospi_9_64, cospi_23_64);
+ const __m128i k32_m17_p15 = pair_set_epi32(-cospi_17_64, cospi_15_64);
+ const __m128i k32_m01_p31 = pair_set_epi32(-cospi_1_64, cospi_31_64);
+
+ u[0] = _mm_unpacklo_epi32(lstep1[32], lstep1[62]);
+ u[1] = _mm_unpackhi_epi32(lstep1[32], lstep1[62]);
+ u[2] = _mm_unpacklo_epi32(lstep1[33], lstep1[63]);
+ u[3] = _mm_unpackhi_epi32(lstep1[33], lstep1[63]);
+ u[4] = _mm_unpacklo_epi32(lstep1[34], lstep1[60]);
+ u[5] = _mm_unpackhi_epi32(lstep1[34], lstep1[60]);
+ u[6] = _mm_unpacklo_epi32(lstep1[35], lstep1[61]);
+ u[7] = _mm_unpackhi_epi32(lstep1[35], lstep1[61]);
+ u[8] = _mm_unpacklo_epi32(lstep1[36], lstep1[58]);
+ u[9] = _mm_unpackhi_epi32(lstep1[36], lstep1[58]);
+ u[10] = _mm_unpacklo_epi32(lstep1[37], lstep1[59]);
+ u[11] = _mm_unpackhi_epi32(lstep1[37], lstep1[59]);
+ u[12] = _mm_unpacklo_epi32(lstep1[38], lstep1[56]);
+ u[13] = _mm_unpackhi_epi32(lstep1[38], lstep1[56]);
+ u[14] = _mm_unpacklo_epi32(lstep1[39], lstep1[57]);
+ u[15] = _mm_unpackhi_epi32(lstep1[39], lstep1[57]);
+
+ v[0] = k_madd_epi32(u[0], k32_p31_p01);
+ v[1] = k_madd_epi32(u[1], k32_p31_p01);
+ v[2] = k_madd_epi32(u[2], k32_p31_p01);
+ v[3] = k_madd_epi32(u[3], k32_p31_p01);
+ v[4] = k_madd_epi32(u[4], k32_p15_p17);
+ v[5] = k_madd_epi32(u[5], k32_p15_p17);
+ v[6] = k_madd_epi32(u[6], k32_p15_p17);
+ v[7] = k_madd_epi32(u[7], k32_p15_p17);
+ v[8] = k_madd_epi32(u[8], k32_p23_p09);
+ v[9] = k_madd_epi32(u[9], k32_p23_p09);
+ v[10] = k_madd_epi32(u[10], k32_p23_p09);
+ v[11] = k_madd_epi32(u[11], k32_p23_p09);
+ v[12] = k_madd_epi32(u[12], k32_p07_p25);
+ v[13] = k_madd_epi32(u[13], k32_p07_p25);
+ v[14] = k_madd_epi32(u[14], k32_p07_p25);
+ v[15] = k_madd_epi32(u[15], k32_p07_p25);
+ v[16] = k_madd_epi32(u[12], k32_m25_p07);
+ v[17] = k_madd_epi32(u[13], k32_m25_p07);
+ v[18] = k_madd_epi32(u[14], k32_m25_p07);
+ v[19] = k_madd_epi32(u[15], k32_m25_p07);
+ v[20] = k_madd_epi32(u[8], k32_m09_p23);
+ v[21] = k_madd_epi32(u[9], k32_m09_p23);
+ v[22] = k_madd_epi32(u[10], k32_m09_p23);
+ v[23] = k_madd_epi32(u[11], k32_m09_p23);
+ v[24] = k_madd_epi32(u[4], k32_m17_p15);
+ v[25] = k_madd_epi32(u[5], k32_m17_p15);
+ v[26] = k_madd_epi32(u[6], k32_m17_p15);
+ v[27] = k_madd_epi32(u[7], k32_m17_p15);
+ v[28] = k_madd_epi32(u[0], k32_m01_p31);
+ v[29] = k_madd_epi32(u[1], k32_m01_p31);
+ v[30] = k_madd_epi32(u[2], k32_m01_p31);
+ v[31] = k_madd_epi32(u[3], k32_m01_p31);
+
+#if DCT_HIGH_BIT_DEPTH
+ overflow = k_check_epi32_overflow_32(
+ &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+ &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16],
+ &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24],
+ &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ u[0] = k_packs_epi64(v[0], v[1]);
+ u[1] = k_packs_epi64(v[2], v[3]);
+ u[2] = k_packs_epi64(v[4], v[5]);
+ u[3] = k_packs_epi64(v[6], v[7]);
+ u[4] = k_packs_epi64(v[8], v[9]);
+ u[5] = k_packs_epi64(v[10], v[11]);
+ u[6] = k_packs_epi64(v[12], v[13]);
+ u[7] = k_packs_epi64(v[14], v[15]);
+ u[8] = k_packs_epi64(v[16], v[17]);
+ u[9] = k_packs_epi64(v[18], v[19]);
+ u[10] = k_packs_epi64(v[20], v[21]);
+ u[11] = k_packs_epi64(v[22], v[23]);
+ u[12] = k_packs_epi64(v[24], v[25]);
+ u[13] = k_packs_epi64(v[26], v[27]);
+ u[14] = k_packs_epi64(v[28], v[29]);
+ u[15] = k_packs_epi64(v[30], v[31]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+ v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+ v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+ v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+ v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+ v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+ v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+ v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+ v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+ u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+ u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+ u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+ u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+ u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+ u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+ u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+ u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+
+ v[0] = _mm_cmplt_epi32(u[0], kZero);
+ v[1] = _mm_cmplt_epi32(u[1], kZero);
+ v[2] = _mm_cmplt_epi32(u[2], kZero);
+ v[3] = _mm_cmplt_epi32(u[3], kZero);
+ v[4] = _mm_cmplt_epi32(u[4], kZero);
+ v[5] = _mm_cmplt_epi32(u[5], kZero);
+ v[6] = _mm_cmplt_epi32(u[6], kZero);
+ v[7] = _mm_cmplt_epi32(u[7], kZero);
+ v[8] = _mm_cmplt_epi32(u[8], kZero);
+ v[9] = _mm_cmplt_epi32(u[9], kZero);
+ v[10] = _mm_cmplt_epi32(u[10], kZero);
+ v[11] = _mm_cmplt_epi32(u[11], kZero);
+ v[12] = _mm_cmplt_epi32(u[12], kZero);
+ v[13] = _mm_cmplt_epi32(u[13], kZero);
+ v[14] = _mm_cmplt_epi32(u[14], kZero);
+ v[15] = _mm_cmplt_epi32(u[15], kZero);
+
+ u[0] = _mm_sub_epi32(u[0], v[0]);
+ u[1] = _mm_sub_epi32(u[1], v[1]);
+ u[2] = _mm_sub_epi32(u[2], v[2]);
+ u[3] = _mm_sub_epi32(u[3], v[3]);
+ u[4] = _mm_sub_epi32(u[4], v[4]);
+ u[5] = _mm_sub_epi32(u[5], v[5]);
+ u[6] = _mm_sub_epi32(u[6], v[6]);
+ u[7] = _mm_sub_epi32(u[7], v[7]);
+ u[8] = _mm_sub_epi32(u[8], v[8]);
+ u[9] = _mm_sub_epi32(u[9], v[9]);
+ u[10] = _mm_sub_epi32(u[10], v[10]);
+ u[11] = _mm_sub_epi32(u[11], v[11]);
+ u[12] = _mm_sub_epi32(u[12], v[12]);
+ u[13] = _mm_sub_epi32(u[13], v[13]);
+ u[14] = _mm_sub_epi32(u[14], v[14]);
+ u[15] = _mm_sub_epi32(u[15], v[15]);
+
+ v[0] = _mm_add_epi32(u[0], K32One);
+ v[1] = _mm_add_epi32(u[1], K32One);
+ v[2] = _mm_add_epi32(u[2], K32One);
+ v[3] = _mm_add_epi32(u[3], K32One);
+ v[4] = _mm_add_epi32(u[4], K32One);
+ v[5] = _mm_add_epi32(u[5], K32One);
+ v[6] = _mm_add_epi32(u[6], K32One);
+ v[7] = _mm_add_epi32(u[7], K32One);
+ v[8] = _mm_add_epi32(u[8], K32One);
+ v[9] = _mm_add_epi32(u[9], K32One);
+ v[10] = _mm_add_epi32(u[10], K32One);
+ v[11] = _mm_add_epi32(u[11], K32One);
+ v[12] = _mm_add_epi32(u[12], K32One);
+ v[13] = _mm_add_epi32(u[13], K32One);
+ v[14] = _mm_add_epi32(u[14], K32One);
+ v[15] = _mm_add_epi32(u[15], K32One);
+
+ u[0] = _mm_srai_epi32(v[0], 2);
+ u[1] = _mm_srai_epi32(v[1], 2);
+ u[2] = _mm_srai_epi32(v[2], 2);
+ u[3] = _mm_srai_epi32(v[3], 2);
+ u[4] = _mm_srai_epi32(v[4], 2);
+ u[5] = _mm_srai_epi32(v[5], 2);
+ u[6] = _mm_srai_epi32(v[6], 2);
+ u[7] = _mm_srai_epi32(v[7], 2);
+ u[8] = _mm_srai_epi32(v[8], 2);
+ u[9] = _mm_srai_epi32(v[9], 2);
+ u[10] = _mm_srai_epi32(v[10], 2);
+ u[11] = _mm_srai_epi32(v[11], 2);
+ u[12] = _mm_srai_epi32(v[12], 2);
+ u[13] = _mm_srai_epi32(v[13], 2);
+ u[14] = _mm_srai_epi32(v[14], 2);
+ u[15] = _mm_srai_epi32(v[15], 2);
+
+ out[1] = _mm_packs_epi32(u[0], u[1]);
+ out[17] = _mm_packs_epi32(u[2], u[3]);
+ out[9] = _mm_packs_epi32(u[4], u[5]);
+ out[25] = _mm_packs_epi32(u[6], u[7]);
+ out[7] = _mm_packs_epi32(u[8], u[9]);
+ out[23] = _mm_packs_epi32(u[10], u[11]);
+ out[15] = _mm_packs_epi32(u[12], u[13]);
+ out[31] = _mm_packs_epi32(u[14], u[15]);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x8(&out[1], &out[17], &out[9], &out[25],
+ &out[7], &out[23], &out[15], &out[31]);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ const __m128i k32_p27_p05 = pair_set_epi32(cospi_27_64, cospi_5_64);
+ const __m128i k32_p11_p21 = pair_set_epi32(cospi_11_64, cospi_21_64);
+ const __m128i k32_p19_p13 = pair_set_epi32(cospi_19_64, cospi_13_64);
+ const __m128i k32_p03_p29 = pair_set_epi32(cospi_3_64, cospi_29_64);
+ const __m128i k32_m29_p03 = pair_set_epi32(-cospi_29_64, cospi_3_64);
+ const __m128i k32_m13_p19 = pair_set_epi32(-cospi_13_64, cospi_19_64);
+ const __m128i k32_m21_p11 = pair_set_epi32(-cospi_21_64, cospi_11_64);
+ const __m128i k32_m05_p27 = pair_set_epi32(-cospi_5_64, cospi_27_64);
+
+ u[0] = _mm_unpacklo_epi32(lstep1[40], lstep1[54]);
+ u[1] = _mm_unpackhi_epi32(lstep1[40], lstep1[54]);
+ u[2] = _mm_unpacklo_epi32(lstep1[41], lstep1[55]);
+ u[3] = _mm_unpackhi_epi32(lstep1[41], lstep1[55]);
+ u[4] = _mm_unpacklo_epi32(lstep1[42], lstep1[52]);
+ u[5] = _mm_unpackhi_epi32(lstep1[42], lstep1[52]);
+ u[6] = _mm_unpacklo_epi32(lstep1[43], lstep1[53]);
+ u[7] = _mm_unpackhi_epi32(lstep1[43], lstep1[53]);
+ u[8] = _mm_unpacklo_epi32(lstep1[44], lstep1[50]);
+ u[9] = _mm_unpackhi_epi32(lstep1[44], lstep1[50]);
+ u[10] = _mm_unpacklo_epi32(lstep1[45], lstep1[51]);
+ u[11] = _mm_unpackhi_epi32(lstep1[45], lstep1[51]);
+ u[12] = _mm_unpacklo_epi32(lstep1[46], lstep1[48]);
+ u[13] = _mm_unpackhi_epi32(lstep1[46], lstep1[48]);
+ u[14] = _mm_unpacklo_epi32(lstep1[47], lstep1[49]);
+ u[15] = _mm_unpackhi_epi32(lstep1[47], lstep1[49]);
+
+ v[0] = k_madd_epi32(u[0], k32_p27_p05);
+ v[1] = k_madd_epi32(u[1], k32_p27_p05);
+ v[2] = k_madd_epi32(u[2], k32_p27_p05);
+ v[3] = k_madd_epi32(u[3], k32_p27_p05);
+ v[4] = k_madd_epi32(u[4], k32_p11_p21);
+ v[5] = k_madd_epi32(u[5], k32_p11_p21);
+ v[6] = k_madd_epi32(u[6], k32_p11_p21);
+ v[7] = k_madd_epi32(u[7], k32_p11_p21);
+ v[8] = k_madd_epi32(u[8], k32_p19_p13);
+ v[9] = k_madd_epi32(u[9], k32_p19_p13);
+ v[10] = k_madd_epi32(u[10], k32_p19_p13);
+ v[11] = k_madd_epi32(u[11], k32_p19_p13);
+ v[12] = k_madd_epi32(u[12], k32_p03_p29);
+ v[13] = k_madd_epi32(u[13], k32_p03_p29);
+ v[14] = k_madd_epi32(u[14], k32_p03_p29);
+ v[15] = k_madd_epi32(u[15], k32_p03_p29);
+ v[16] = k_madd_epi32(u[12], k32_m29_p03);
+ v[17] = k_madd_epi32(u[13], k32_m29_p03);
+ v[18] = k_madd_epi32(u[14], k32_m29_p03);
+ v[19] = k_madd_epi32(u[15], k32_m29_p03);
+ v[20] = k_madd_epi32(u[8], k32_m13_p19);
+ v[21] = k_madd_epi32(u[9], k32_m13_p19);
+ v[22] = k_madd_epi32(u[10], k32_m13_p19);
+ v[23] = k_madd_epi32(u[11], k32_m13_p19);
+ v[24] = k_madd_epi32(u[4], k32_m21_p11);
+ v[25] = k_madd_epi32(u[5], k32_m21_p11);
+ v[26] = k_madd_epi32(u[6], k32_m21_p11);
+ v[27] = k_madd_epi32(u[7], k32_m21_p11);
+ v[28] = k_madd_epi32(u[0], k32_m05_p27);
+ v[29] = k_madd_epi32(u[1], k32_m05_p27);
+ v[30] = k_madd_epi32(u[2], k32_m05_p27);
+ v[31] = k_madd_epi32(u[3], k32_m05_p27);
+
+#if DCT_HIGH_BIT_DEPTH
+ overflow = k_check_epi32_overflow_32(
+ &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8],
+ &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16],
+ &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24],
+ &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ u[0] = k_packs_epi64(v[0], v[1]);
+ u[1] = k_packs_epi64(v[2], v[3]);
+ u[2] = k_packs_epi64(v[4], v[5]);
+ u[3] = k_packs_epi64(v[6], v[7]);
+ u[4] = k_packs_epi64(v[8], v[9]);
+ u[5] = k_packs_epi64(v[10], v[11]);
+ u[6] = k_packs_epi64(v[12], v[13]);
+ u[7] = k_packs_epi64(v[14], v[15]);
+ u[8] = k_packs_epi64(v[16], v[17]);
+ u[9] = k_packs_epi64(v[18], v[19]);
+ u[10] = k_packs_epi64(v[20], v[21]);
+ u[11] = k_packs_epi64(v[22], v[23]);
+ u[12] = k_packs_epi64(v[24], v[25]);
+ u[13] = k_packs_epi64(v[26], v[27]);
+ u[14] = k_packs_epi64(v[28], v[29]);
+ u[15] = k_packs_epi64(v[30], v[31]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+ v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+ v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+ v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+ v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+ v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+ v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+ v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+ v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+ u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+ u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+ u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+ u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+ u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+ u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+ u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+ u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+
+ v[0] = _mm_cmplt_epi32(u[0], kZero);
+ v[1] = _mm_cmplt_epi32(u[1], kZero);
+ v[2] = _mm_cmplt_epi32(u[2], kZero);
+ v[3] = _mm_cmplt_epi32(u[3], kZero);
+ v[4] = _mm_cmplt_epi32(u[4], kZero);
+ v[5] = _mm_cmplt_epi32(u[5], kZero);
+ v[6] = _mm_cmplt_epi32(u[6], kZero);
+ v[7] = _mm_cmplt_epi32(u[7], kZero);
+ v[8] = _mm_cmplt_epi32(u[8], kZero);
+ v[9] = _mm_cmplt_epi32(u[9], kZero);
+ v[10] = _mm_cmplt_epi32(u[10], kZero);
+ v[11] = _mm_cmplt_epi32(u[11], kZero);
+ v[12] = _mm_cmplt_epi32(u[12], kZero);
+ v[13] = _mm_cmplt_epi32(u[13], kZero);
+ v[14] = _mm_cmplt_epi32(u[14], kZero);
+ v[15] = _mm_cmplt_epi32(u[15], kZero);
+
+ u[0] = _mm_sub_epi32(u[0], v[0]);
+ u[1] = _mm_sub_epi32(u[1], v[1]);
+ u[2] = _mm_sub_epi32(u[2], v[2]);
+ u[3] = _mm_sub_epi32(u[3], v[3]);
+ u[4] = _mm_sub_epi32(u[4], v[4]);
+ u[5] = _mm_sub_epi32(u[5], v[5]);
+ u[6] = _mm_sub_epi32(u[6], v[6]);
+ u[7] = _mm_sub_epi32(u[7], v[7]);
+ u[8] = _mm_sub_epi32(u[8], v[8]);
+ u[9] = _mm_sub_epi32(u[9], v[9]);
+ u[10] = _mm_sub_epi32(u[10], v[10]);
+ u[11] = _mm_sub_epi32(u[11], v[11]);
+ u[12] = _mm_sub_epi32(u[12], v[12]);
+ u[13] = _mm_sub_epi32(u[13], v[13]);
+ u[14] = _mm_sub_epi32(u[14], v[14]);
+ u[15] = _mm_sub_epi32(u[15], v[15]);
+
+ v[0] = _mm_add_epi32(u[0], K32One);
+ v[1] = _mm_add_epi32(u[1], K32One);
+ v[2] = _mm_add_epi32(u[2], K32One);
+ v[3] = _mm_add_epi32(u[3], K32One);
+ v[4] = _mm_add_epi32(u[4], K32One);
+ v[5] = _mm_add_epi32(u[5], K32One);
+ v[6] = _mm_add_epi32(u[6], K32One);
+ v[7] = _mm_add_epi32(u[7], K32One);
+ v[8] = _mm_add_epi32(u[8], K32One);
+ v[9] = _mm_add_epi32(u[9], K32One);
+ v[10] = _mm_add_epi32(u[10], K32One);
+ v[11] = _mm_add_epi32(u[11], K32One);
+ v[12] = _mm_add_epi32(u[12], K32One);
+ v[13] = _mm_add_epi32(u[13], K32One);
+ v[14] = _mm_add_epi32(u[14], K32One);
+ v[15] = _mm_add_epi32(u[15], K32One);
+
+ u[0] = _mm_srai_epi32(v[0], 2);
+ u[1] = _mm_srai_epi32(v[1], 2);
+ u[2] = _mm_srai_epi32(v[2], 2);
+ u[3] = _mm_srai_epi32(v[3], 2);
+ u[4] = _mm_srai_epi32(v[4], 2);
+ u[5] = _mm_srai_epi32(v[5], 2);
+ u[6] = _mm_srai_epi32(v[6], 2);
+ u[7] = _mm_srai_epi32(v[7], 2);
+ u[8] = _mm_srai_epi32(v[8], 2);
+ u[9] = _mm_srai_epi32(v[9], 2);
+ u[10] = _mm_srai_epi32(v[10], 2);
+ u[11] = _mm_srai_epi32(v[11], 2);
+ u[12] = _mm_srai_epi32(v[12], 2);
+ u[13] = _mm_srai_epi32(v[13], 2);
+ u[14] = _mm_srai_epi32(v[14], 2);
+ u[15] = _mm_srai_epi32(v[15], 2);
+
+ out[5] = _mm_packs_epi32(u[0], u[1]);
+ out[21] = _mm_packs_epi32(u[2], u[3]);
+ out[13] = _mm_packs_epi32(u[4], u[5]);
+ out[29] = _mm_packs_epi32(u[6], u[7]);
+ out[3] = _mm_packs_epi32(u[8], u[9]);
+ out[19] = _mm_packs_epi32(u[10], u[11]);
+ out[11] = _mm_packs_epi32(u[12], u[13]);
+ out[27] = _mm_packs_epi32(u[14], u[15]);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x8(&out[5], &out[21], &out[13], &out[29],
+ &out[3], &out[19], &out[11], &out[27]);
+ if (overflow) {
+ HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ }
+#endif // FDCT32x32_HIGH_PRECISION
+ // Transpose the results, do it as four 8x8 transposes.
+ {
+ int transpose_block;
+ int16_t *output0 = &intermediate[column_start * 32];
+ tran_low_t *output1 = &output_org[column_start * 32];
+ for (transpose_block = 0; transpose_block < 4; ++transpose_block) {
+ __m128i *this_out = &out[8 * transpose_block];
+ // 00 01 02 03 04 05 06 07
+ // 10 11 12 13 14 15 16 17
+ // 20 21 22 23 24 25 26 27
+ // 30 31 32 33 34 35 36 37
+ // 40 41 42 43 44 45 46 47
+ // 50 51 52 53 54 55 56 57
+ // 60 61 62 63 64 65 66 67
+ // 70 71 72 73 74 75 76 77
+ const __m128i tr0_0 = _mm_unpacklo_epi16(this_out[0], this_out[1]);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(this_out[2], this_out[3]);
+ const __m128i tr0_2 = _mm_unpackhi_epi16(this_out[0], this_out[1]);
+ const __m128i tr0_3 = _mm_unpackhi_epi16(this_out[2], this_out[3]);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(this_out[4], this_out[5]);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(this_out[6], this_out[7]);
+ const __m128i tr0_6 = _mm_unpackhi_epi16(this_out[4], this_out[5]);
+ const __m128i tr0_7 = _mm_unpackhi_epi16(this_out[6], this_out[7]);
+ // 00 10 01 11 02 12 03 13
+ // 20 30 21 31 22 32 23 33
+ // 04 14 05 15 06 16 07 17
+ // 24 34 25 35 26 36 27 37
+ // 40 50 41 51 42 52 43 53
+ // 60 70 61 71 62 72 63 73
+ // 54 54 55 55 56 56 57 57
+ // 64 74 65 75 66 76 67 77
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+ // 00 10 20 30 01 11 21 31
+ // 40 50 60 70 41 51 61 71
+ // 02 12 22 32 03 13 23 33
+ // 42 52 62 72 43 53 63 73
+ // 04 14 24 34 05 15 21 36
+ // 44 54 64 74 45 55 61 76
+ // 06 16 26 36 07 17 27 37
+ // 46 56 66 76 47 57 67 77
+ __m128i tr2_0 = _mm_unpacklo_epi64(tr1_0, tr1_4);
+ __m128i tr2_1 = _mm_unpackhi_epi64(tr1_0, tr1_4);
+ __m128i tr2_2 = _mm_unpacklo_epi64(tr1_2, tr1_6);
+ __m128i tr2_3 = _mm_unpackhi_epi64(tr1_2, tr1_6);
+ __m128i tr2_4 = _mm_unpacklo_epi64(tr1_1, tr1_5);
+ __m128i tr2_5 = _mm_unpackhi_epi64(tr1_1, tr1_5);
+ __m128i tr2_6 = _mm_unpacklo_epi64(tr1_3, tr1_7);
+ __m128i tr2_7 = _mm_unpackhi_epi64(tr1_3, tr1_7);
+ // 00 10 20 30 40 50 60 70
+ // 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72
+ // 03 13 23 33 43 53 63 73
+ // 04 14 24 34 44 54 64 74
+ // 05 15 25 35 45 55 65 75
+ // 06 16 26 36 46 56 66 76
+ // 07 17 27 37 47 57 67 77
+ if (0 == pass) {
+ // output[j] = (output[j] + 1 + (output[j] > 0)) >> 2;
+ // TODO(cd): see quality impact of only doing
+ // output[j] = (output[j] + 1) >> 2;
+ // which would remove the code between here ...
+ __m128i tr2_0_0 = _mm_cmpgt_epi16(tr2_0, kZero);
+ __m128i tr2_1_0 = _mm_cmpgt_epi16(tr2_1, kZero);
+ __m128i tr2_2_0 = _mm_cmpgt_epi16(tr2_2, kZero);
+ __m128i tr2_3_0 = _mm_cmpgt_epi16(tr2_3, kZero);
+ __m128i tr2_4_0 = _mm_cmpgt_epi16(tr2_4, kZero);
+ __m128i tr2_5_0 = _mm_cmpgt_epi16(tr2_5, kZero);
+ __m128i tr2_6_0 = _mm_cmpgt_epi16(tr2_6, kZero);
+ __m128i tr2_7_0 = _mm_cmpgt_epi16(tr2_7, kZero);
+ tr2_0 = _mm_sub_epi16(tr2_0, tr2_0_0);
+ tr2_1 = _mm_sub_epi16(tr2_1, tr2_1_0);
+ tr2_2 = _mm_sub_epi16(tr2_2, tr2_2_0);
+ tr2_3 = _mm_sub_epi16(tr2_3, tr2_3_0);
+ tr2_4 = _mm_sub_epi16(tr2_4, tr2_4_0);
+ tr2_5 = _mm_sub_epi16(tr2_5, tr2_5_0);
+ tr2_6 = _mm_sub_epi16(tr2_6, tr2_6_0);
+ tr2_7 = _mm_sub_epi16(tr2_7, tr2_7_0);
+ // ... and here.
+ // PS: also change code in av1/encoder/dct.c
+ tr2_0 = _mm_add_epi16(tr2_0, kOne);
+ tr2_1 = _mm_add_epi16(tr2_1, kOne);
+ tr2_2 = _mm_add_epi16(tr2_2, kOne);
+ tr2_3 = _mm_add_epi16(tr2_3, kOne);
+ tr2_4 = _mm_add_epi16(tr2_4, kOne);
+ tr2_5 = _mm_add_epi16(tr2_5, kOne);
+ tr2_6 = _mm_add_epi16(tr2_6, kOne);
+ tr2_7 = _mm_add_epi16(tr2_7, kOne);
+ tr2_0 = _mm_srai_epi16(tr2_0, 2);
+ tr2_1 = _mm_srai_epi16(tr2_1, 2);
+ tr2_2 = _mm_srai_epi16(tr2_2, 2);
+ tr2_3 = _mm_srai_epi16(tr2_3, 2);
+ tr2_4 = _mm_srai_epi16(tr2_4, 2);
+ tr2_5 = _mm_srai_epi16(tr2_5, 2);
+ tr2_6 = _mm_srai_epi16(tr2_6, 2);
+ tr2_7 = _mm_srai_epi16(tr2_7, 2);
+ }
+ // Note: even though all these stores are aligned, using the aligned
+ // intrinsic make the code slightly slower.
+ if (pass == 0) {
+ _mm_storeu_si128((__m128i *)(output0 + 0 * 32), tr2_0);
+ _mm_storeu_si128((__m128i *)(output0 + 1 * 32), tr2_1);
+ _mm_storeu_si128((__m128i *)(output0 + 2 * 32), tr2_2);
+ _mm_storeu_si128((__m128i *)(output0 + 3 * 32), tr2_3);
+ _mm_storeu_si128((__m128i *)(output0 + 4 * 32), tr2_4);
+ _mm_storeu_si128((__m128i *)(output0 + 5 * 32), tr2_5);
+ _mm_storeu_si128((__m128i *)(output0 + 6 * 32), tr2_6);
+ _mm_storeu_si128((__m128i *)(output0 + 7 * 32), tr2_7);
+ // Process next 8x8
+ output0 += 8;
+ } else {
+ storeu_output(&tr2_0, (output1 + 0 * 32));
+ storeu_output(&tr2_1, (output1 + 1 * 32));
+ storeu_output(&tr2_2, (output1 + 2 * 32));
+ storeu_output(&tr2_3, (output1 + 3 * 32));
+ storeu_output(&tr2_4, (output1 + 4 * 32));
+ storeu_output(&tr2_5, (output1 + 5 * 32));
+ storeu_output(&tr2_6, (output1 + 6 * 32));
+ storeu_output(&tr2_7, (output1 + 7 * 32));
+ // Process next 8x8
+ output1 += 8;
+ }
+ }
+ }
+ }
+ }
+} // NOLINT
+
+#undef ADD_EPI16
+#undef SUB_EPI16
+#undef HIGH_FDCT32x32_2D_C
+#undef HIGH_FDCT32x32_2D_ROWS_C
diff --git a/av1/common/x86/vp10_fwd_txfm1d_sse4.c b/av1/common/x86/vp10_fwd_txfm1d_sse4.c
new file mode 100644
index 0000000..902c9b2
--- /dev/null
+++ b/av1/common/x86/vp10_fwd_txfm1d_sse4.c
@@ -0,0 +1,1689 @@
+#include "av1/common/x86/vp10_txfm1d_sse4.h"
+
+void vp10_fdct4_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int txfm_size = 4;
+ const int num_per_128 = 4;
+ const int32_t *cospi;
+ __m128i buf0[4];
+ __m128i buf1[4];
+ int col_num = txfm_size / num_per_128;
+ int bit;
+ int col;
+ (void)stage_range;
+ for (col = 0; col < col_num; col++) {
+ // stage 0;
+ int32_t stage_idx = 0;
+ buf0[0] = input[0 * col_num + col];
+ buf0[1] = input[1 * col_num + col];
+ buf0[2] = input[2 * col_num + col];
+ buf0[3] = input[3 * col_num + col];
+
+ // stage 1
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[3]);
+ buf1[3] = _mm_sub_epi32(buf0[0], buf0[3]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[2]);
+ buf1[2] = _mm_sub_epi32(buf0[1], buf0[2]);
+
+ // stage 2
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[0], buf1[1], buf0[0],
+ buf0[1], bit);
+ btf_32_sse4_1_type1(cospi[48], cospi[16], buf1[2], buf1[3], buf0[2],
+ buf0[3], bit);
+
+ // stage 3
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[0];
+ buf1[1] = buf0[2];
+ buf1[2] = buf0[1];
+ buf1[3] = buf0[3];
+
+ output[0 * col_num + col] = buf1[0];
+ output[1 * col_num + col] = buf1[1];
+ output[2 * col_num + col] = buf1[2];
+ output[3 * col_num + col] = buf1[3];
+ }
+}
+
+void vp10_fdct8_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int txfm_size = 8;
+ const int num_per_128 = 4;
+ const int32_t *cospi;
+ __m128i buf0[8];
+ __m128i buf1[8];
+ int col_num = txfm_size / num_per_128;
+ int bit;
+ int col;
+ (void)stage_range;
+ for (col = 0; col < col_num; col++) {
+ // stage 0;
+ int32_t stage_idx = 0;
+ buf0[0] = input[0 * col_num + col];
+ buf0[1] = input[1 * col_num + col];
+ buf0[2] = input[2 * col_num + col];
+ buf0[3] = input[3 * col_num + col];
+ buf0[4] = input[4 * col_num + col];
+ buf0[5] = input[5 * col_num + col];
+ buf0[6] = input[6 * col_num + col];
+ buf0[7] = input[7 * col_num + col];
+
+ // stage 1
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[7]);
+ buf1[7] = _mm_sub_epi32(buf0[0], buf0[7]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[6]);
+ buf1[6] = _mm_sub_epi32(buf0[1], buf0[6]);
+ buf1[2] = _mm_add_epi32(buf0[2], buf0[5]);
+ buf1[5] = _mm_sub_epi32(buf0[2], buf0[5]);
+ buf1[3] = _mm_add_epi32(buf0[3], buf0[4]);
+ buf1[4] = _mm_sub_epi32(buf0[3], buf0[4]);
+
+ // stage 2
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = _mm_add_epi32(buf1[0], buf1[3]);
+ buf0[3] = _mm_sub_epi32(buf1[0], buf1[3]);
+ buf0[1] = _mm_add_epi32(buf1[1], buf1[2]);
+ buf0[2] = _mm_sub_epi32(buf1[1], buf1[2]);
+ buf0[4] = buf1[4];
+ btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[5], buf1[6], buf0[5],
+ buf0[6], bit);
+ buf0[7] = buf1[7];
+
+ // stage 3
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf0[0], buf0[1], buf1[0],
+ buf1[1], bit);
+ btf_32_sse4_1_type1(cospi[48], cospi[16], buf0[2], buf0[3], buf1[2],
+ buf1[3], bit);
+ buf1[4] = _mm_add_epi32(buf0[4], buf0[5]);
+ buf1[5] = _mm_sub_epi32(buf0[4], buf0[5]);
+ buf1[6] = _mm_sub_epi32(buf0[7], buf0[6]);
+ buf1[7] = _mm_add_epi32(buf0[7], buf0[6]);
+
+ // stage 4
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ buf0[2] = buf1[2];
+ buf0[3] = buf1[3];
+ btf_32_sse4_1_type1(cospi[56], cospi[8], buf1[4], buf1[7], buf0[4], buf0[7],
+ bit);
+ btf_32_sse4_1_type1(cospi[24], cospi[40], buf1[5], buf1[6], buf0[5],
+ buf0[6], bit);
+
+ // stage 5
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[0];
+ buf1[1] = buf0[4];
+ buf1[2] = buf0[2];
+ buf1[3] = buf0[6];
+ buf1[4] = buf0[1];
+ buf1[5] = buf0[5];
+ buf1[6] = buf0[3];
+ buf1[7] = buf0[7];
+
+ output[0 * col_num + col] = buf1[0];
+ output[1 * col_num + col] = buf1[1];
+ output[2 * col_num + col] = buf1[2];
+ output[3 * col_num + col] = buf1[3];
+ output[4 * col_num + col] = buf1[4];
+ output[5 * col_num + col] = buf1[5];
+ output[6 * col_num + col] = buf1[6];
+ output[7 * col_num + col] = buf1[7];
+ }
+}
+
+void vp10_fdct16_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int txfm_size = 16;
+ const int num_per_128 = 4;
+ const int32_t *cospi;
+ __m128i buf0[16];
+ __m128i buf1[16];
+ int col_num = txfm_size / num_per_128;
+ int bit;
+ int col;
+ (void)stage_range;
+ for (col = 0; col < col_num; col++) {
+ // stage 0;
+ int32_t stage_idx = 0;
+ buf0[0] = input[0 * col_num + col];
+ buf0[1] = input[1 * col_num + col];
+ buf0[2] = input[2 * col_num + col];
+ buf0[3] = input[3 * col_num + col];
+ buf0[4] = input[4 * col_num + col];
+ buf0[5] = input[5 * col_num + col];
+ buf0[6] = input[6 * col_num + col];
+ buf0[7] = input[7 * col_num + col];
+ buf0[8] = input[8 * col_num + col];
+ buf0[9] = input[9 * col_num + col];
+ buf0[10] = input[10 * col_num + col];
+ buf0[11] = input[11 * col_num + col];
+ buf0[12] = input[12 * col_num + col];
+ buf0[13] = input[13 * col_num + col];
+ buf0[14] = input[14 * col_num + col];
+ buf0[15] = input[15 * col_num + col];
+
+ // stage 1
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[15]);
+ buf1[15] = _mm_sub_epi32(buf0[0], buf0[15]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[14]);
+ buf1[14] = _mm_sub_epi32(buf0[1], buf0[14]);
+ buf1[2] = _mm_add_epi32(buf0[2], buf0[13]);
+ buf1[13] = _mm_sub_epi32(buf0[2], buf0[13]);
+ buf1[3] = _mm_add_epi32(buf0[3], buf0[12]);
+ buf1[12] = _mm_sub_epi32(buf0[3], buf0[12]);
+ buf1[4] = _mm_add_epi32(buf0[4], buf0[11]);
+ buf1[11] = _mm_sub_epi32(buf0[4], buf0[11]);
+ buf1[5] = _mm_add_epi32(buf0[5], buf0[10]);
+ buf1[10] = _mm_sub_epi32(buf0[5], buf0[10]);
+ buf1[6] = _mm_add_epi32(buf0[6], buf0[9]);
+ buf1[9] = _mm_sub_epi32(buf0[6], buf0[9]);
+ buf1[7] = _mm_add_epi32(buf0[7], buf0[8]);
+ buf1[8] = _mm_sub_epi32(buf0[7], buf0[8]);
+
+ // stage 2
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = _mm_add_epi32(buf1[0], buf1[7]);
+ buf0[7] = _mm_sub_epi32(buf1[0], buf1[7]);
+ buf0[1] = _mm_add_epi32(buf1[1], buf1[6]);
+ buf0[6] = _mm_sub_epi32(buf1[1], buf1[6]);
+ buf0[2] = _mm_add_epi32(buf1[2], buf1[5]);
+ buf0[5] = _mm_sub_epi32(buf1[2], buf1[5]);
+ buf0[3] = _mm_add_epi32(buf1[3], buf1[4]);
+ buf0[4] = _mm_sub_epi32(buf1[3], buf1[4]);
+ buf0[8] = buf1[8];
+ buf0[9] = buf1[9];
+ btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[10], buf1[13], buf0[10],
+ buf0[13], bit);
+ btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[11], buf1[12], buf0[11],
+ buf0[12], bit);
+ buf0[14] = buf1[14];
+ buf0[15] = buf1[15];
+
+ // stage 3
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[3]);
+ buf1[3] = _mm_sub_epi32(buf0[0], buf0[3]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[2]);
+ buf1[2] = _mm_sub_epi32(buf0[1], buf0[2]);
+ buf1[4] = buf0[4];
+ btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[5], buf0[6], buf1[5],
+ buf1[6], bit);
+ buf1[7] = buf0[7];
+ buf1[8] = _mm_add_epi32(buf0[8], buf0[11]);
+ buf1[11] = _mm_sub_epi32(buf0[8], buf0[11]);
+ buf1[9] = _mm_add_epi32(buf0[9], buf0[10]);
+ buf1[10] = _mm_sub_epi32(buf0[9], buf0[10]);
+ buf1[12] = _mm_sub_epi32(buf0[15], buf0[12]);
+ buf1[15] = _mm_add_epi32(buf0[15], buf0[12]);
+ buf1[13] = _mm_sub_epi32(buf0[14], buf0[13]);
+ buf1[14] = _mm_add_epi32(buf0[14], buf0[13]);
+
+ // stage 4
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[0], buf1[1], buf0[0],
+ buf0[1], bit);
+ btf_32_sse4_1_type1(cospi[48], cospi[16], buf1[2], buf1[3], buf0[2],
+ buf0[3], bit);
+ buf0[4] = _mm_add_epi32(buf1[4], buf1[5]);
+ buf0[5] = _mm_sub_epi32(buf1[4], buf1[5]);
+ buf0[6] = _mm_sub_epi32(buf1[7], buf1[6]);
+ buf0[7] = _mm_add_epi32(buf1[7], buf1[6]);
+ buf0[8] = buf1[8];
+ btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[9], buf1[14], buf0[9],
+ buf0[14], bit);
+ btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[10], buf1[13], buf0[10],
+ buf0[13], bit);
+ buf0[11] = buf1[11];
+ buf0[12] = buf1[12];
+ buf0[15] = buf1[15];
+
+ // stage 5
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[0];
+ buf1[1] = buf0[1];
+ buf1[2] = buf0[2];
+ buf1[3] = buf0[3];
+ btf_32_sse4_1_type1(cospi[56], cospi[8], buf0[4], buf0[7], buf1[4], buf1[7],
+ bit);
+ btf_32_sse4_1_type1(cospi[24], cospi[40], buf0[5], buf0[6], buf1[5],
+ buf1[6], bit);
+ buf1[8] = _mm_add_epi32(buf0[8], buf0[9]);
+ buf1[9] = _mm_sub_epi32(buf0[8], buf0[9]);
+ buf1[10] = _mm_sub_epi32(buf0[11], buf0[10]);
+ buf1[11] = _mm_add_epi32(buf0[11], buf0[10]);
+ buf1[12] = _mm_add_epi32(buf0[12], buf0[13]);
+ buf1[13] = _mm_sub_epi32(buf0[12], buf0[13]);
+ buf1[14] = _mm_sub_epi32(buf0[15], buf0[14]);
+ buf1[15] = _mm_add_epi32(buf0[15], buf0[14]);
+
+ // stage 6
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ buf0[2] = buf1[2];
+ buf0[3] = buf1[3];
+ buf0[4] = buf1[4];
+ buf0[5] = buf1[5];
+ buf0[6] = buf1[6];
+ buf0[7] = buf1[7];
+ btf_32_sse4_1_type1(cospi[60], cospi[4], buf1[8], buf1[15], buf0[8],
+ buf0[15], bit);
+ btf_32_sse4_1_type1(cospi[28], cospi[36], buf1[9], buf1[14], buf0[9],
+ buf0[14], bit);
+ btf_32_sse4_1_type1(cospi[44], cospi[20], buf1[10], buf1[13], buf0[10],
+ buf0[13], bit);
+ btf_32_sse4_1_type1(cospi[12], cospi[52], buf1[11], buf1[12], buf0[11],
+ buf0[12], bit);
+
+ // stage 7
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[0];
+ buf1[1] = buf0[8];
+ buf1[2] = buf0[4];
+ buf1[3] = buf0[12];
+ buf1[4] = buf0[2];
+ buf1[5] = buf0[10];
+ buf1[6] = buf0[6];
+ buf1[7] = buf0[14];
+ buf1[8] = buf0[1];
+ buf1[9] = buf0[9];
+ buf1[10] = buf0[5];
+ buf1[11] = buf0[13];
+ buf1[12] = buf0[3];
+ buf1[13] = buf0[11];
+ buf1[14] = buf0[7];
+ buf1[15] = buf0[15];
+
+ output[0 * col_num + col] = buf1[0];
+ output[1 * col_num + col] = buf1[1];
+ output[2 * col_num + col] = buf1[2];
+ output[3 * col_num + col] = buf1[3];
+ output[4 * col_num + col] = buf1[4];
+ output[5 * col_num + col] = buf1[5];
+ output[6 * col_num + col] = buf1[6];
+ output[7 * col_num + col] = buf1[7];
+ output[8 * col_num + col] = buf1[8];
+ output[9 * col_num + col] = buf1[9];
+ output[10 * col_num + col] = buf1[10];
+ output[11 * col_num + col] = buf1[11];
+ output[12 * col_num + col] = buf1[12];
+ output[13 * col_num + col] = buf1[13];
+ output[14 * col_num + col] = buf1[14];
+ output[15 * col_num + col] = buf1[15];
+ }
+}
+
+void vp10_fdct32_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int txfm_size = 32;
+ const int num_per_128 = 4;
+ const int32_t *cospi;
+ __m128i buf0[32];
+ __m128i buf1[32];
+ int col_num = txfm_size / num_per_128;
+ int bit;
+ int col;
+ (void)stage_range;
+ for (col = 0; col < col_num; col++) {
+ // stage 0;
+ int32_t stage_idx = 0;
+ buf0[0] = input[0 * col_num + col];
+ buf0[1] = input[1 * col_num + col];
+ buf0[2] = input[2 * col_num + col];
+ buf0[3] = input[3 * col_num + col];
+ buf0[4] = input[4 * col_num + col];
+ buf0[5] = input[5 * col_num + col];
+ buf0[6] = input[6 * col_num + col];
+ buf0[7] = input[7 * col_num + col];
+ buf0[8] = input[8 * col_num + col];
+ buf0[9] = input[9 * col_num + col];
+ buf0[10] = input[10 * col_num + col];
+ buf0[11] = input[11 * col_num + col];
+ buf0[12] = input[12 * col_num + col];
+ buf0[13] = input[13 * col_num + col];
+ buf0[14] = input[14 * col_num + col];
+ buf0[15] = input[15 * col_num + col];
+ buf0[16] = input[16 * col_num + col];
+ buf0[17] = input[17 * col_num + col];
+ buf0[18] = input[18 * col_num + col];
+ buf0[19] = input[19 * col_num + col];
+ buf0[20] = input[20 * col_num + col];
+ buf0[21] = input[21 * col_num + col];
+ buf0[22] = input[22 * col_num + col];
+ buf0[23] = input[23 * col_num + col];
+ buf0[24] = input[24 * col_num + col];
+ buf0[25] = input[25 * col_num + col];
+ buf0[26] = input[26 * col_num + col];
+ buf0[27] = input[27 * col_num + col];
+ buf0[28] = input[28 * col_num + col];
+ buf0[29] = input[29 * col_num + col];
+ buf0[30] = input[30 * col_num + col];
+ buf0[31] = input[31 * col_num + col];
+
+ // stage 1
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[31]);
+ buf1[31] = _mm_sub_epi32(buf0[0], buf0[31]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[30]);
+ buf1[30] = _mm_sub_epi32(buf0[1], buf0[30]);
+ buf1[2] = _mm_add_epi32(buf0[2], buf0[29]);
+ buf1[29] = _mm_sub_epi32(buf0[2], buf0[29]);
+ buf1[3] = _mm_add_epi32(buf0[3], buf0[28]);
+ buf1[28] = _mm_sub_epi32(buf0[3], buf0[28]);
+ buf1[4] = _mm_add_epi32(buf0[4], buf0[27]);
+ buf1[27] = _mm_sub_epi32(buf0[4], buf0[27]);
+ buf1[5] = _mm_add_epi32(buf0[5], buf0[26]);
+ buf1[26] = _mm_sub_epi32(buf0[5], buf0[26]);
+ buf1[6] = _mm_add_epi32(buf0[6], buf0[25]);
+ buf1[25] = _mm_sub_epi32(buf0[6], buf0[25]);
+ buf1[7] = _mm_add_epi32(buf0[7], buf0[24]);
+ buf1[24] = _mm_sub_epi32(buf0[7], buf0[24]);
+ buf1[8] = _mm_add_epi32(buf0[8], buf0[23]);
+ buf1[23] = _mm_sub_epi32(buf0[8], buf0[23]);
+ buf1[9] = _mm_add_epi32(buf0[9], buf0[22]);
+ buf1[22] = _mm_sub_epi32(buf0[9], buf0[22]);
+ buf1[10] = _mm_add_epi32(buf0[10], buf0[21]);
+ buf1[21] = _mm_sub_epi32(buf0[10], buf0[21]);
+ buf1[11] = _mm_add_epi32(buf0[11], buf0[20]);
+ buf1[20] = _mm_sub_epi32(buf0[11], buf0[20]);
+ buf1[12] = _mm_add_epi32(buf0[12], buf0[19]);
+ buf1[19] = _mm_sub_epi32(buf0[12], buf0[19]);
+ buf1[13] = _mm_add_epi32(buf0[13], buf0[18]);
+ buf1[18] = _mm_sub_epi32(buf0[13], buf0[18]);
+ buf1[14] = _mm_add_epi32(buf0[14], buf0[17]);
+ buf1[17] = _mm_sub_epi32(buf0[14], buf0[17]);
+ buf1[15] = _mm_add_epi32(buf0[15], buf0[16]);
+ buf1[16] = _mm_sub_epi32(buf0[15], buf0[16]);
+
+ // stage 2
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = _mm_add_epi32(buf1[0], buf1[15]);
+ buf0[15] = _mm_sub_epi32(buf1[0], buf1[15]);
+ buf0[1] = _mm_add_epi32(buf1[1], buf1[14]);
+ buf0[14] = _mm_sub_epi32(buf1[1], buf1[14]);
+ buf0[2] = _mm_add_epi32(buf1[2], buf1[13]);
+ buf0[13] = _mm_sub_epi32(buf1[2], buf1[13]);
+ buf0[3] = _mm_add_epi32(buf1[3], buf1[12]);
+ buf0[12] = _mm_sub_epi32(buf1[3], buf1[12]);
+ buf0[4] = _mm_add_epi32(buf1[4], buf1[11]);
+ buf0[11] = _mm_sub_epi32(buf1[4], buf1[11]);
+ buf0[5] = _mm_add_epi32(buf1[5], buf1[10]);
+ buf0[10] = _mm_sub_epi32(buf1[5], buf1[10]);
+ buf0[6] = _mm_add_epi32(buf1[6], buf1[9]);
+ buf0[9] = _mm_sub_epi32(buf1[6], buf1[9]);
+ buf0[7] = _mm_add_epi32(buf1[7], buf1[8]);
+ buf0[8] = _mm_sub_epi32(buf1[7], buf1[8]);
+ buf0[16] = buf1[16];
+ buf0[17] = buf1[17];
+ buf0[18] = buf1[18];
+ buf0[19] = buf1[19];
+ btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[20], buf1[27], buf0[20],
+ buf0[27], bit);
+ btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[21], buf1[26], buf0[21],
+ buf0[26], bit);
+ btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[22], buf1[25], buf0[22],
+ buf0[25], bit);
+ btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[23], buf1[24], buf0[23],
+ buf0[24], bit);
+ buf0[28] = buf1[28];
+ buf0[29] = buf1[29];
+ buf0[30] = buf1[30];
+ buf0[31] = buf1[31];
+
+ // stage 3
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[7]);
+ buf1[7] = _mm_sub_epi32(buf0[0], buf0[7]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[6]);
+ buf1[6] = _mm_sub_epi32(buf0[1], buf0[6]);
+ buf1[2] = _mm_add_epi32(buf0[2], buf0[5]);
+ buf1[5] = _mm_sub_epi32(buf0[2], buf0[5]);
+ buf1[3] = _mm_add_epi32(buf0[3], buf0[4]);
+ buf1[4] = _mm_sub_epi32(buf0[3], buf0[4]);
+ buf1[8] = buf0[8];
+ buf1[9] = buf0[9];
+ btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[10], buf0[13], buf1[10],
+ buf1[13], bit);
+ btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[11], buf0[12], buf1[11],
+ buf1[12], bit);
+ buf1[14] = buf0[14];
+ buf1[15] = buf0[15];
+ buf1[16] = _mm_add_epi32(buf0[16], buf0[23]);
+ buf1[23] = _mm_sub_epi32(buf0[16], buf0[23]);
+ buf1[17] = _mm_add_epi32(buf0[17], buf0[22]);
+ buf1[22] = _mm_sub_epi32(buf0[17], buf0[22]);
+ buf1[18] = _mm_add_epi32(buf0[18], buf0[21]);
+ buf1[21] = _mm_sub_epi32(buf0[18], buf0[21]);
+ buf1[19] = _mm_add_epi32(buf0[19], buf0[20]);
+ buf1[20] = _mm_sub_epi32(buf0[19], buf0[20]);
+ buf1[24] = _mm_sub_epi32(buf0[31], buf0[24]);
+ buf1[31] = _mm_add_epi32(buf0[31], buf0[24]);
+ buf1[25] = _mm_sub_epi32(buf0[30], buf0[25]);
+ buf1[30] = _mm_add_epi32(buf0[30], buf0[25]);
+ buf1[26] = _mm_sub_epi32(buf0[29], buf0[26]);
+ buf1[29] = _mm_add_epi32(buf0[29], buf0[26]);
+ buf1[27] = _mm_sub_epi32(buf0[28], buf0[27]);
+ buf1[28] = _mm_add_epi32(buf0[28], buf0[27]);
+
+ // stage 4
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = _mm_add_epi32(buf1[0], buf1[3]);
+ buf0[3] = _mm_sub_epi32(buf1[0], buf1[3]);
+ buf0[1] = _mm_add_epi32(buf1[1], buf1[2]);
+ buf0[2] = _mm_sub_epi32(buf1[1], buf1[2]);
+ buf0[4] = buf1[4];
+ btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[5], buf1[6], buf0[5],
+ buf0[6], bit);
+ buf0[7] = buf1[7];
+ buf0[8] = _mm_add_epi32(buf1[8], buf1[11]);
+ buf0[11] = _mm_sub_epi32(buf1[8], buf1[11]);
+ buf0[9] = _mm_add_epi32(buf1[9], buf1[10]);
+ buf0[10] = _mm_sub_epi32(buf1[9], buf1[10]);
+ buf0[12] = _mm_sub_epi32(buf1[15], buf1[12]);
+ buf0[15] = _mm_add_epi32(buf1[15], buf1[12]);
+ buf0[13] = _mm_sub_epi32(buf1[14], buf1[13]);
+ buf0[14] = _mm_add_epi32(buf1[14], buf1[13]);
+ buf0[16] = buf1[16];
+ buf0[17] = buf1[17];
+ btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[18], buf1[29], buf0[18],
+ buf0[29], bit);
+ btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[19], buf1[28], buf0[19],
+ buf0[28], bit);
+ btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[20], buf1[27], buf0[20],
+ buf0[27], bit);
+ btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[21], buf1[26], buf0[21],
+ buf0[26], bit);
+ buf0[22] = buf1[22];
+ buf0[23] = buf1[23];
+ buf0[24] = buf1[24];
+ buf0[25] = buf1[25];
+ buf0[30] = buf1[30];
+ buf0[31] = buf1[31];
+
+ // stage 5
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf0[0], buf0[1], buf1[0],
+ buf1[1], bit);
+ btf_32_sse4_1_type1(cospi[48], cospi[16], buf0[2], buf0[3], buf1[2],
+ buf1[3], bit);
+ buf1[4] = _mm_add_epi32(buf0[4], buf0[5]);
+ buf1[5] = _mm_sub_epi32(buf0[4], buf0[5]);
+ buf1[6] = _mm_sub_epi32(buf0[7], buf0[6]);
+ buf1[7] = _mm_add_epi32(buf0[7], buf0[6]);
+ buf1[8] = buf0[8];
+ btf_32_sse4_1_type0(-cospi[16], cospi[48], buf0[9], buf0[14], buf1[9],
+ buf1[14], bit);
+ btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf0[10], buf0[13], buf1[10],
+ buf1[13], bit);
+ buf1[11] = buf0[11];
+ buf1[12] = buf0[12];
+ buf1[15] = buf0[15];
+ buf1[16] = _mm_add_epi32(buf0[16], buf0[19]);
+ buf1[19] = _mm_sub_epi32(buf0[16], buf0[19]);
+ buf1[17] = _mm_add_epi32(buf0[17], buf0[18]);
+ buf1[18] = _mm_sub_epi32(buf0[17], buf0[18]);
+ buf1[20] = _mm_sub_epi32(buf0[23], buf0[20]);
+ buf1[23] = _mm_add_epi32(buf0[23], buf0[20]);
+ buf1[21] = _mm_sub_epi32(buf0[22], buf0[21]);
+ buf1[22] = _mm_add_epi32(buf0[22], buf0[21]);
+ buf1[24] = _mm_add_epi32(buf0[24], buf0[27]);
+ buf1[27] = _mm_sub_epi32(buf0[24], buf0[27]);
+ buf1[25] = _mm_add_epi32(buf0[25], buf0[26]);
+ buf1[26] = _mm_sub_epi32(buf0[25], buf0[26]);
+ buf1[28] = _mm_sub_epi32(buf0[31], buf0[28]);
+ buf1[31] = _mm_add_epi32(buf0[31], buf0[28]);
+ buf1[29] = _mm_sub_epi32(buf0[30], buf0[29]);
+ buf1[30] = _mm_add_epi32(buf0[30], buf0[29]);
+
+ // stage 6
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ buf0[2] = buf1[2];
+ buf0[3] = buf1[3];
+ btf_32_sse4_1_type1(cospi[56], cospi[8], buf1[4], buf1[7], buf0[4], buf0[7],
+ bit);
+ btf_32_sse4_1_type1(cospi[24], cospi[40], buf1[5], buf1[6], buf0[5],
+ buf0[6], bit);
+ buf0[8] = _mm_add_epi32(buf1[8], buf1[9]);
+ buf0[9] = _mm_sub_epi32(buf1[8], buf1[9]);
+ buf0[10] = _mm_sub_epi32(buf1[11], buf1[10]);
+ buf0[11] = _mm_add_epi32(buf1[11], buf1[10]);
+ buf0[12] = _mm_add_epi32(buf1[12], buf1[13]);
+ buf0[13] = _mm_sub_epi32(buf1[12], buf1[13]);
+ buf0[14] = _mm_sub_epi32(buf1[15], buf1[14]);
+ buf0[15] = _mm_add_epi32(buf1[15], buf1[14]);
+ buf0[16] = buf1[16];
+ btf_32_sse4_1_type0(-cospi[8], cospi[56], buf1[17], buf1[30], buf0[17],
+ buf0[30], bit);
+ btf_32_sse4_1_type0(-cospi[56], -cospi[8], buf1[18], buf1[29], buf0[18],
+ buf0[29], bit);
+ buf0[19] = buf1[19];
+ buf0[20] = buf1[20];
+ btf_32_sse4_1_type0(-cospi[40], cospi[24], buf1[21], buf1[26], buf0[21],
+ buf0[26], bit);
+ btf_32_sse4_1_type0(-cospi[24], -cospi[40], buf1[22], buf1[25], buf0[22],
+ buf0[25], bit);
+ buf0[23] = buf1[23];
+ buf0[24] = buf1[24];
+ buf0[27] = buf1[27];
+ buf0[28] = buf1[28];
+ buf0[31] = buf1[31];
+
+ // stage 7
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[0];
+ buf1[1] = buf0[1];
+ buf1[2] = buf0[2];
+ buf1[3] = buf0[3];
+ buf1[4] = buf0[4];
+ buf1[5] = buf0[5];
+ buf1[6] = buf0[6];
+ buf1[7] = buf0[7];
+ btf_32_sse4_1_type1(cospi[60], cospi[4], buf0[8], buf0[15], buf1[8],
+ buf1[15], bit);
+ btf_32_sse4_1_type1(cospi[28], cospi[36], buf0[9], buf0[14], buf1[9],
+ buf1[14], bit);
+ btf_32_sse4_1_type1(cospi[44], cospi[20], buf0[10], buf0[13], buf1[10],
+ buf1[13], bit);
+ btf_32_sse4_1_type1(cospi[12], cospi[52], buf0[11], buf0[12], buf1[11],
+ buf1[12], bit);
+ buf1[16] = _mm_add_epi32(buf0[16], buf0[17]);
+ buf1[17] = _mm_sub_epi32(buf0[16], buf0[17]);
+ buf1[18] = _mm_sub_epi32(buf0[19], buf0[18]);
+ buf1[19] = _mm_add_epi32(buf0[19], buf0[18]);
+ buf1[20] = _mm_add_epi32(buf0[20], buf0[21]);
+ buf1[21] = _mm_sub_epi32(buf0[20], buf0[21]);
+ buf1[22] = _mm_sub_epi32(buf0[23], buf0[22]);
+ buf1[23] = _mm_add_epi32(buf0[23], buf0[22]);
+ buf1[24] = _mm_add_epi32(buf0[24], buf0[25]);
+ buf1[25] = _mm_sub_epi32(buf0[24], buf0[25]);
+ buf1[26] = _mm_sub_epi32(buf0[27], buf0[26]);
+ buf1[27] = _mm_add_epi32(buf0[27], buf0[26]);
+ buf1[28] = _mm_add_epi32(buf0[28], buf0[29]);
+ buf1[29] = _mm_sub_epi32(buf0[28], buf0[29]);
+ buf1[30] = _mm_sub_epi32(buf0[31], buf0[30]);
+ buf1[31] = _mm_add_epi32(buf0[31], buf0[30]);
+
+ // stage 8
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ buf0[2] = buf1[2];
+ buf0[3] = buf1[3];
+ buf0[4] = buf1[4];
+ buf0[5] = buf1[5];
+ buf0[6] = buf1[6];
+ buf0[7] = buf1[7];
+ buf0[8] = buf1[8];
+ buf0[9] = buf1[9];
+ buf0[10] = buf1[10];
+ buf0[11] = buf1[11];
+ buf0[12] = buf1[12];
+ buf0[13] = buf1[13];
+ buf0[14] = buf1[14];
+ buf0[15] = buf1[15];
+ btf_32_sse4_1_type1(cospi[62], cospi[2], buf1[16], buf1[31], buf0[16],
+ buf0[31], bit);
+ btf_32_sse4_1_type1(cospi[30], cospi[34], buf1[17], buf1[30], buf0[17],
+ buf0[30], bit);
+ btf_32_sse4_1_type1(cospi[46], cospi[18], buf1[18], buf1[29], buf0[18],
+ buf0[29], bit);
+ btf_32_sse4_1_type1(cospi[14], cospi[50], buf1[19], buf1[28], buf0[19],
+ buf0[28], bit);
+ btf_32_sse4_1_type1(cospi[54], cospi[10], buf1[20], buf1[27], buf0[20],
+ buf0[27], bit);
+ btf_32_sse4_1_type1(cospi[22], cospi[42], buf1[21], buf1[26], buf0[21],
+ buf0[26], bit);
+ btf_32_sse4_1_type1(cospi[38], cospi[26], buf1[22], buf1[25], buf0[22],
+ buf0[25], bit);
+ btf_32_sse4_1_type1(cospi[6], cospi[58], buf1[23], buf1[24], buf0[23],
+ buf0[24], bit);
+
+ // stage 9
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[0];
+ buf1[1] = buf0[16];
+ buf1[2] = buf0[8];
+ buf1[3] = buf0[24];
+ buf1[4] = buf0[4];
+ buf1[5] = buf0[20];
+ buf1[6] = buf0[12];
+ buf1[7] = buf0[28];
+ buf1[8] = buf0[2];
+ buf1[9] = buf0[18];
+ buf1[10] = buf0[10];
+ buf1[11] = buf0[26];
+ buf1[12] = buf0[6];
+ buf1[13] = buf0[22];
+ buf1[14] = buf0[14];
+ buf1[15] = buf0[30];
+ buf1[16] = buf0[1];
+ buf1[17] = buf0[17];
+ buf1[18] = buf0[9];
+ buf1[19] = buf0[25];
+ buf1[20] = buf0[5];
+ buf1[21] = buf0[21];
+ buf1[22] = buf0[13];
+ buf1[23] = buf0[29];
+ buf1[24] = buf0[3];
+ buf1[25] = buf0[19];
+ buf1[26] = buf0[11];
+ buf1[27] = buf0[27];
+ buf1[28] = buf0[7];
+ buf1[29] = buf0[23];
+ buf1[30] = buf0[15];
+ buf1[31] = buf0[31];
+
+ output[0 * col_num + col] = buf1[0];
+ output[1 * col_num + col] = buf1[1];
+ output[2 * col_num + col] = buf1[2];
+ output[3 * col_num + col] = buf1[3];
+ output[4 * col_num + col] = buf1[4];
+ output[5 * col_num + col] = buf1[5];
+ output[6 * col_num + col] = buf1[6];
+ output[7 * col_num + col] = buf1[7];
+ output[8 * col_num + col] = buf1[8];
+ output[9 * col_num + col] = buf1[9];
+ output[10 * col_num + col] = buf1[10];
+ output[11 * col_num + col] = buf1[11];
+ output[12 * col_num + col] = buf1[12];
+ output[13 * col_num + col] = buf1[13];
+ output[14 * col_num + col] = buf1[14];
+ output[15 * col_num + col] = buf1[15];
+ output[16 * col_num + col] = buf1[16];
+ output[17 * col_num + col] = buf1[17];
+ output[18 * col_num + col] = buf1[18];
+ output[19 * col_num + col] = buf1[19];
+ output[20 * col_num + col] = buf1[20];
+ output[21 * col_num + col] = buf1[21];
+ output[22 * col_num + col] = buf1[22];
+ output[23 * col_num + col] = buf1[23];
+ output[24 * col_num + col] = buf1[24];
+ output[25 * col_num + col] = buf1[25];
+ output[26 * col_num + col] = buf1[26];
+ output[27 * col_num + col] = buf1[27];
+ output[28 * col_num + col] = buf1[28];
+ output[29 * col_num + col] = buf1[29];
+ output[30 * col_num + col] = buf1[30];
+ output[31 * col_num + col] = buf1[31];
+ }
+}
+
+void vp10_fadst4_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int txfm_size = 4;
+ const int num_per_128 = 4;
+ const int32_t *cospi;
+ __m128i buf0[4];
+ __m128i buf1[4];
+ int col_num = txfm_size / num_per_128;
+ int bit;
+ int col;
+ (void)stage_range;
+ for (col = 0; col < col_num; col++) {
+ // stage 0;
+ int32_t stage_idx = 0;
+ buf0[0] = input[0 * col_num + col];
+ buf0[1] = input[1 * col_num + col];
+ buf0[2] = input[2 * col_num + col];
+ buf0[3] = input[3 * col_num + col];
+
+ // stage 1
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[3];
+ buf1[1] = buf0[0];
+ buf1[2] = buf0[1];
+ buf1[3] = buf0[2];
+
+ // stage 2
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[0], buf1[1], buf0[0], buf0[1],
+ bit);
+ btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[2], buf1[3], buf0[2],
+ buf0[3], bit);
+
+ // stage 3
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[2]);
+ buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[3]);
+ buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]);
+
+ // stage 4
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2],
+ buf0[3], bit);
+
+ // stage 5
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[0];
+ buf1[1] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[2]);
+ buf1[2] = buf0[3];
+ buf1[3] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[1]);
+
+ output[0 * col_num + col] = buf1[0];
+ output[1 * col_num + col] = buf1[1];
+ output[2 * col_num + col] = buf1[2];
+ output[3 * col_num + col] = buf1[3];
+ }
+}
+
+void vp10_fadst8_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int txfm_size = 8;
+ const int num_per_128 = 4;
+ const int32_t *cospi;
+ __m128i buf0[8];
+ __m128i buf1[8];
+ int col_num = txfm_size / num_per_128;
+ int bit;
+ int col;
+ (void)stage_range;
+ for (col = 0; col < col_num; col++) {
+ // stage 0;
+ int32_t stage_idx = 0;
+ buf0[0] = input[0 * col_num + col];
+ buf0[1] = input[1 * col_num + col];
+ buf0[2] = input[2 * col_num + col];
+ buf0[3] = input[3 * col_num + col];
+ buf0[4] = input[4 * col_num + col];
+ buf0[5] = input[5 * col_num + col];
+ buf0[6] = input[6 * col_num + col];
+ buf0[7] = input[7 * col_num + col];
+
+ // stage 1
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[7];
+ buf1[1] = buf0[0];
+ buf1[2] = buf0[5];
+ buf1[3] = buf0[2];
+ buf1[4] = buf0[3];
+ buf1[5] = buf0[4];
+ buf1[6] = buf0[1];
+ buf1[7] = buf0[6];
+
+ // stage 2
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ btf_32_sse4_1_type0(cospi[4], cospi[60], buf1[0], buf1[1], buf0[0], buf0[1],
+ bit);
+ btf_32_sse4_1_type0(cospi[20], cospi[44], buf1[2], buf1[3], buf0[2],
+ buf0[3], bit);
+ btf_32_sse4_1_type0(cospi[36], cospi[28], buf1[4], buf1[5], buf0[4],
+ buf0[5], bit);
+ btf_32_sse4_1_type0(cospi[52], cospi[12], buf1[6], buf1[7], buf0[6],
+ buf0[7], bit);
+
+ // stage 3
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[4]);
+ buf1[4] = _mm_sub_epi32(buf0[0], buf0[4]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[5]);
+ buf1[5] = _mm_sub_epi32(buf0[1], buf0[5]);
+ buf1[2] = _mm_add_epi32(buf0[2], buf0[6]);
+ buf1[6] = _mm_sub_epi32(buf0[2], buf0[6]);
+ buf1[3] = _mm_add_epi32(buf0[3], buf0[7]);
+ buf1[7] = _mm_sub_epi32(buf0[3], buf0[7]);
+
+ // stage 4
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ buf0[2] = buf1[2];
+ buf0[3] = buf1[3];
+ btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[4], buf1[5], buf0[4],
+ buf0[5], bit);
+ btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[6], buf1[7], buf0[6],
+ buf0[7], bit);
+
+ // stage 5
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[2]);
+ buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[3]);
+ buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]);
+ buf1[4] = _mm_add_epi32(buf0[4], buf0[6]);
+ buf1[6] = _mm_sub_epi32(buf0[4], buf0[6]);
+ buf1[5] = _mm_add_epi32(buf0[5], buf0[7]);
+ buf1[7] = _mm_sub_epi32(buf0[5], buf0[7]);
+
+ // stage 6
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2],
+ buf0[3], bit);
+ buf0[4] = buf1[4];
+ buf0[5] = buf1[5];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[6], buf1[7], buf0[6],
+ buf0[7], bit);
+
+ // stage 7
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[0];
+ buf1[1] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[4]);
+ buf1[2] = buf0[6];
+ buf1[3] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[2]);
+ buf1[4] = buf0[3];
+ buf1[5] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[7]);
+ buf1[6] = buf0[5];
+ buf1[7] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[1]);
+
+ output[0 * col_num + col] = buf1[0];
+ output[1 * col_num + col] = buf1[1];
+ output[2 * col_num + col] = buf1[2];
+ output[3 * col_num + col] = buf1[3];
+ output[4 * col_num + col] = buf1[4];
+ output[5 * col_num + col] = buf1[5];
+ output[6 * col_num + col] = buf1[6];
+ output[7 * col_num + col] = buf1[7];
+ }
+}
+
+void vp10_fadst16_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int txfm_size = 16;
+ const int num_per_128 = 4;
+ const int32_t *cospi;
+ __m128i buf0[16];
+ __m128i buf1[16];
+ int col_num = txfm_size / num_per_128;
+ int bit;
+ int col;
+ (void)stage_range;
+ for (col = 0; col < col_num; col++) {
+ // stage 0;
+ int32_t stage_idx = 0;
+ buf0[0] = input[0 * col_num + col];
+ buf0[1] = input[1 * col_num + col];
+ buf0[2] = input[2 * col_num + col];
+ buf0[3] = input[3 * col_num + col];
+ buf0[4] = input[4 * col_num + col];
+ buf0[5] = input[5 * col_num + col];
+ buf0[6] = input[6 * col_num + col];
+ buf0[7] = input[7 * col_num + col];
+ buf0[8] = input[8 * col_num + col];
+ buf0[9] = input[9 * col_num + col];
+ buf0[10] = input[10 * col_num + col];
+ buf0[11] = input[11 * col_num + col];
+ buf0[12] = input[12 * col_num + col];
+ buf0[13] = input[13 * col_num + col];
+ buf0[14] = input[14 * col_num + col];
+ buf0[15] = input[15 * col_num + col];
+
+ // stage 1
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[15];
+ buf1[1] = buf0[0];
+ buf1[2] = buf0[13];
+ buf1[3] = buf0[2];
+ buf1[4] = buf0[11];
+ buf1[5] = buf0[4];
+ buf1[6] = buf0[9];
+ buf1[7] = buf0[6];
+ buf1[8] = buf0[7];
+ buf1[9] = buf0[8];
+ buf1[10] = buf0[5];
+ buf1[11] = buf0[10];
+ buf1[12] = buf0[3];
+ buf1[13] = buf0[12];
+ buf1[14] = buf0[1];
+ buf1[15] = buf0[14];
+
+ // stage 2
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ btf_32_sse4_1_type0(cospi[2], cospi[62], buf1[0], buf1[1], buf0[0], buf0[1],
+ bit);
+ btf_32_sse4_1_type0(cospi[10], cospi[54], buf1[2], buf1[3], buf0[2],
+ buf0[3], bit);
+ btf_32_sse4_1_type0(cospi[18], cospi[46], buf1[4], buf1[5], buf0[4],
+ buf0[5], bit);
+ btf_32_sse4_1_type0(cospi[26], cospi[38], buf1[6], buf1[7], buf0[6],
+ buf0[7], bit);
+ btf_32_sse4_1_type0(cospi[34], cospi[30], buf1[8], buf1[9], buf0[8],
+ buf0[9], bit);
+ btf_32_sse4_1_type0(cospi[42], cospi[22], buf1[10], buf1[11], buf0[10],
+ buf0[11], bit);
+ btf_32_sse4_1_type0(cospi[50], cospi[14], buf1[12], buf1[13], buf0[12],
+ buf0[13], bit);
+ btf_32_sse4_1_type0(cospi[58], cospi[6], buf1[14], buf1[15], buf0[14],
+ buf0[15], bit);
+
+ // stage 3
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[8]);
+ buf1[8] = _mm_sub_epi32(buf0[0], buf0[8]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[9]);
+ buf1[9] = _mm_sub_epi32(buf0[1], buf0[9]);
+ buf1[2] = _mm_add_epi32(buf0[2], buf0[10]);
+ buf1[10] = _mm_sub_epi32(buf0[2], buf0[10]);
+ buf1[3] = _mm_add_epi32(buf0[3], buf0[11]);
+ buf1[11] = _mm_sub_epi32(buf0[3], buf0[11]);
+ buf1[4] = _mm_add_epi32(buf0[4], buf0[12]);
+ buf1[12] = _mm_sub_epi32(buf0[4], buf0[12]);
+ buf1[5] = _mm_add_epi32(buf0[5], buf0[13]);
+ buf1[13] = _mm_sub_epi32(buf0[5], buf0[13]);
+ buf1[6] = _mm_add_epi32(buf0[6], buf0[14]);
+ buf1[14] = _mm_sub_epi32(buf0[6], buf0[14]);
+ buf1[7] = _mm_add_epi32(buf0[7], buf0[15]);
+ buf1[15] = _mm_sub_epi32(buf0[7], buf0[15]);
+
+ // stage 4
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ buf0[2] = buf1[2];
+ buf0[3] = buf1[3];
+ buf0[4] = buf1[4];
+ buf0[5] = buf1[5];
+ buf0[6] = buf1[6];
+ buf0[7] = buf1[7];
+ btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[8], buf1[9], buf0[8], buf0[9],
+ bit);
+ btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[10], buf1[11], buf0[10],
+ buf0[11], bit);
+ btf_32_sse4_1_type0(-cospi[56], cospi[8], buf1[12], buf1[13], buf0[12],
+ buf0[13], bit);
+ btf_32_sse4_1_type0(-cospi[24], cospi[40], buf1[14], buf1[15], buf0[14],
+ buf0[15], bit);
+
+ // stage 5
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[4]);
+ buf1[4] = _mm_sub_epi32(buf0[0], buf0[4]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[5]);
+ buf1[5] = _mm_sub_epi32(buf0[1], buf0[5]);
+ buf1[2] = _mm_add_epi32(buf0[2], buf0[6]);
+ buf1[6] = _mm_sub_epi32(buf0[2], buf0[6]);
+ buf1[3] = _mm_add_epi32(buf0[3], buf0[7]);
+ buf1[7] = _mm_sub_epi32(buf0[3], buf0[7]);
+ buf1[8] = _mm_add_epi32(buf0[8], buf0[12]);
+ buf1[12] = _mm_sub_epi32(buf0[8], buf0[12]);
+ buf1[9] = _mm_add_epi32(buf0[9], buf0[13]);
+ buf1[13] = _mm_sub_epi32(buf0[9], buf0[13]);
+ buf1[10] = _mm_add_epi32(buf0[10], buf0[14]);
+ buf1[14] = _mm_sub_epi32(buf0[10], buf0[14]);
+ buf1[11] = _mm_add_epi32(buf0[11], buf0[15]);
+ buf1[15] = _mm_sub_epi32(buf0[11], buf0[15]);
+
+ // stage 6
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ buf0[2] = buf1[2];
+ buf0[3] = buf1[3];
+ btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[4], buf1[5], buf0[4],
+ buf0[5], bit);
+ btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[6], buf1[7], buf0[6],
+ buf0[7], bit);
+ buf0[8] = buf1[8];
+ buf0[9] = buf1[9];
+ buf0[10] = buf1[10];
+ buf0[11] = buf1[11];
+ btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[12], buf1[13], buf0[12],
+ buf0[13], bit);
+ btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[14], buf1[15], buf0[14],
+ buf0[15], bit);
+
+ // stage 7
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[2]);
+ buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[3]);
+ buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]);
+ buf1[4] = _mm_add_epi32(buf0[4], buf0[6]);
+ buf1[6] = _mm_sub_epi32(buf0[4], buf0[6]);
+ buf1[5] = _mm_add_epi32(buf0[5], buf0[7]);
+ buf1[7] = _mm_sub_epi32(buf0[5], buf0[7]);
+ buf1[8] = _mm_add_epi32(buf0[8], buf0[10]);
+ buf1[10] = _mm_sub_epi32(buf0[8], buf0[10]);
+ buf1[9] = _mm_add_epi32(buf0[9], buf0[11]);
+ buf1[11] = _mm_sub_epi32(buf0[9], buf0[11]);
+ buf1[12] = _mm_add_epi32(buf0[12], buf0[14]);
+ buf1[14] = _mm_sub_epi32(buf0[12], buf0[14]);
+ buf1[13] = _mm_add_epi32(buf0[13], buf0[15]);
+ buf1[15] = _mm_sub_epi32(buf0[13], buf0[15]);
+
+ // stage 8
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2],
+ buf0[3], bit);
+ buf0[4] = buf1[4];
+ buf0[5] = buf1[5];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[6], buf1[7], buf0[6],
+ buf0[7], bit);
+ buf0[8] = buf1[8];
+ buf0[9] = buf1[9];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[10], buf1[11], buf0[10],
+ buf0[11], bit);
+ buf0[12] = buf1[12];
+ buf0[13] = buf1[13];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[14], buf1[15], buf0[14],
+ buf0[15], bit);
+
+ // stage 9
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[0];
+ buf1[1] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[8]);
+ buf1[2] = buf0[12];
+ buf1[3] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[4]);
+ buf1[4] = buf0[6];
+ buf1[5] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[14]);
+ buf1[6] = buf0[10];
+ buf1[7] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[2]);
+ buf1[8] = buf0[3];
+ buf1[9] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[11]);
+ buf1[10] = buf0[15];
+ buf1[11] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[7]);
+ buf1[12] = buf0[5];
+ buf1[13] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[13]);
+ buf1[14] = buf0[9];
+ buf1[15] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[1]);
+
+ output[0 * col_num + col] = buf1[0];
+ output[1 * col_num + col] = buf1[1];
+ output[2 * col_num + col] = buf1[2];
+ output[3 * col_num + col] = buf1[3];
+ output[4 * col_num + col] = buf1[4];
+ output[5 * col_num + col] = buf1[5];
+ output[6 * col_num + col] = buf1[6];
+ output[7 * col_num + col] = buf1[7];
+ output[8 * col_num + col] = buf1[8];
+ output[9 * col_num + col] = buf1[9];
+ output[10 * col_num + col] = buf1[10];
+ output[11 * col_num + col] = buf1[11];
+ output[12 * col_num + col] = buf1[12];
+ output[13 * col_num + col] = buf1[13];
+ output[14 * col_num + col] = buf1[14];
+ output[15 * col_num + col] = buf1[15];
+ }
+}
+
+void vp10_fadst32_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range) {
+ const int txfm_size = 32;
+ const int num_per_128 = 4;
+ const int32_t *cospi;
+ __m128i buf0[32];
+ __m128i buf1[32];
+ int col_num = txfm_size / num_per_128;
+ int bit;
+ int col;
+ (void)stage_range;
+ for (col = 0; col < col_num; col++) {
+ // stage 0;
+ int32_t stage_idx = 0;
+ buf0[0] = input[0 * col_num + col];
+ buf0[1] = input[1 * col_num + col];
+ buf0[2] = input[2 * col_num + col];
+ buf0[3] = input[3 * col_num + col];
+ buf0[4] = input[4 * col_num + col];
+ buf0[5] = input[5 * col_num + col];
+ buf0[6] = input[6 * col_num + col];
+ buf0[7] = input[7 * col_num + col];
+ buf0[8] = input[8 * col_num + col];
+ buf0[9] = input[9 * col_num + col];
+ buf0[10] = input[10 * col_num + col];
+ buf0[11] = input[11 * col_num + col];
+ buf0[12] = input[12 * col_num + col];
+ buf0[13] = input[13 * col_num + col];
+ buf0[14] = input[14 * col_num + col];
+ buf0[15] = input[15 * col_num + col];
+ buf0[16] = input[16 * col_num + col];
+ buf0[17] = input[17 * col_num + col];
+ buf0[18] = input[18 * col_num + col];
+ buf0[19] = input[19 * col_num + col];
+ buf0[20] = input[20 * col_num + col];
+ buf0[21] = input[21 * col_num + col];
+ buf0[22] = input[22 * col_num + col];
+ buf0[23] = input[23 * col_num + col];
+ buf0[24] = input[24 * col_num + col];
+ buf0[25] = input[25 * col_num + col];
+ buf0[26] = input[26 * col_num + col];
+ buf0[27] = input[27 * col_num + col];
+ buf0[28] = input[28 * col_num + col];
+ buf0[29] = input[29 * col_num + col];
+ buf0[30] = input[30 * col_num + col];
+ buf0[31] = input[31 * col_num + col];
+
+ // stage 1
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[31];
+ buf1[1] = buf0[0];
+ buf1[2] = buf0[29];
+ buf1[3] = buf0[2];
+ buf1[4] = buf0[27];
+ buf1[5] = buf0[4];
+ buf1[6] = buf0[25];
+ buf1[7] = buf0[6];
+ buf1[8] = buf0[23];
+ buf1[9] = buf0[8];
+ buf1[10] = buf0[21];
+ buf1[11] = buf0[10];
+ buf1[12] = buf0[19];
+ buf1[13] = buf0[12];
+ buf1[14] = buf0[17];
+ buf1[15] = buf0[14];
+ buf1[16] = buf0[15];
+ buf1[17] = buf0[16];
+ buf1[18] = buf0[13];
+ buf1[19] = buf0[18];
+ buf1[20] = buf0[11];
+ buf1[21] = buf0[20];
+ buf1[22] = buf0[9];
+ buf1[23] = buf0[22];
+ buf1[24] = buf0[7];
+ buf1[25] = buf0[24];
+ buf1[26] = buf0[5];
+ buf1[27] = buf0[26];
+ buf1[28] = buf0[3];
+ buf1[29] = buf0[28];
+ buf1[30] = buf0[1];
+ buf1[31] = buf0[30];
+
+ // stage 2
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ btf_32_sse4_1_type0(cospi[1], cospi[63], buf1[0], buf1[1], buf0[0], buf0[1],
+ bit);
+ btf_32_sse4_1_type0(cospi[5], cospi[59], buf1[2], buf1[3], buf0[2], buf0[3],
+ bit);
+ btf_32_sse4_1_type0(cospi[9], cospi[55], buf1[4], buf1[5], buf0[4], buf0[5],
+ bit);
+ btf_32_sse4_1_type0(cospi[13], cospi[51], buf1[6], buf1[7], buf0[6],
+ buf0[7], bit);
+ btf_32_sse4_1_type0(cospi[17], cospi[47], buf1[8], buf1[9], buf0[8],
+ buf0[9], bit);
+ btf_32_sse4_1_type0(cospi[21], cospi[43], buf1[10], buf1[11], buf0[10],
+ buf0[11], bit);
+ btf_32_sse4_1_type0(cospi[25], cospi[39], buf1[12], buf1[13], buf0[12],
+ buf0[13], bit);
+ btf_32_sse4_1_type0(cospi[29], cospi[35], buf1[14], buf1[15], buf0[14],
+ buf0[15], bit);
+ btf_32_sse4_1_type0(cospi[33], cospi[31], buf1[16], buf1[17], buf0[16],
+ buf0[17], bit);
+ btf_32_sse4_1_type0(cospi[37], cospi[27], buf1[18], buf1[19], buf0[18],
+ buf0[19], bit);
+ btf_32_sse4_1_type0(cospi[41], cospi[23], buf1[20], buf1[21], buf0[20],
+ buf0[21], bit);
+ btf_32_sse4_1_type0(cospi[45], cospi[19], buf1[22], buf1[23], buf0[22],
+ buf0[23], bit);
+ btf_32_sse4_1_type0(cospi[49], cospi[15], buf1[24], buf1[25], buf0[24],
+ buf0[25], bit);
+ btf_32_sse4_1_type0(cospi[53], cospi[11], buf1[26], buf1[27], buf0[26],
+ buf0[27], bit);
+ btf_32_sse4_1_type0(cospi[57], cospi[7], buf1[28], buf1[29], buf0[28],
+ buf0[29], bit);
+ btf_32_sse4_1_type0(cospi[61], cospi[3], buf1[30], buf1[31], buf0[30],
+ buf0[31], bit);
+
+ // stage 3
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[16]);
+ buf1[16] = _mm_sub_epi32(buf0[0], buf0[16]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[17]);
+ buf1[17] = _mm_sub_epi32(buf0[1], buf0[17]);
+ buf1[2] = _mm_add_epi32(buf0[2], buf0[18]);
+ buf1[18] = _mm_sub_epi32(buf0[2], buf0[18]);
+ buf1[3] = _mm_add_epi32(buf0[3], buf0[19]);
+ buf1[19] = _mm_sub_epi32(buf0[3], buf0[19]);
+ buf1[4] = _mm_add_epi32(buf0[4], buf0[20]);
+ buf1[20] = _mm_sub_epi32(buf0[4], buf0[20]);
+ buf1[5] = _mm_add_epi32(buf0[5], buf0[21]);
+ buf1[21] = _mm_sub_epi32(buf0[5], buf0[21]);
+ buf1[6] = _mm_add_epi32(buf0[6], buf0[22]);
+ buf1[22] = _mm_sub_epi32(buf0[6], buf0[22]);
+ buf1[7] = _mm_add_epi32(buf0[7], buf0[23]);
+ buf1[23] = _mm_sub_epi32(buf0[7], buf0[23]);
+ buf1[8] = _mm_add_epi32(buf0[8], buf0[24]);
+ buf1[24] = _mm_sub_epi32(buf0[8], buf0[24]);
+ buf1[9] = _mm_add_epi32(buf0[9], buf0[25]);
+ buf1[25] = _mm_sub_epi32(buf0[9], buf0[25]);
+ buf1[10] = _mm_add_epi32(buf0[10], buf0[26]);
+ buf1[26] = _mm_sub_epi32(buf0[10], buf0[26]);
+ buf1[11] = _mm_add_epi32(buf0[11], buf0[27]);
+ buf1[27] = _mm_sub_epi32(buf0[11], buf0[27]);
+ buf1[12] = _mm_add_epi32(buf0[12], buf0[28]);
+ buf1[28] = _mm_sub_epi32(buf0[12], buf0[28]);
+ buf1[13] = _mm_add_epi32(buf0[13], buf0[29]);
+ buf1[29] = _mm_sub_epi32(buf0[13], buf0[29]);
+ buf1[14] = _mm_add_epi32(buf0[14], buf0[30]);
+ buf1[30] = _mm_sub_epi32(buf0[14], buf0[30]);
+ buf1[15] = _mm_add_epi32(buf0[15], buf0[31]);
+ buf1[31] = _mm_sub_epi32(buf0[15], buf0[31]);
+
+ // stage 4
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ buf0[2] = buf1[2];
+ buf0[3] = buf1[3];
+ buf0[4] = buf1[4];
+ buf0[5] = buf1[5];
+ buf0[6] = buf1[6];
+ buf0[7] = buf1[7];
+ buf0[8] = buf1[8];
+ buf0[9] = buf1[9];
+ buf0[10] = buf1[10];
+ buf0[11] = buf1[11];
+ buf0[12] = buf1[12];
+ buf0[13] = buf1[13];
+ buf0[14] = buf1[14];
+ buf0[15] = buf1[15];
+ btf_32_sse4_1_type0(cospi[4], cospi[60], buf1[16], buf1[17], buf0[16],
+ buf0[17], bit);
+ btf_32_sse4_1_type0(cospi[20], cospi[44], buf1[18], buf1[19], buf0[18],
+ buf0[19], bit);
+ btf_32_sse4_1_type0(cospi[36], cospi[28], buf1[20], buf1[21], buf0[20],
+ buf0[21], bit);
+ btf_32_sse4_1_type0(cospi[52], cospi[12], buf1[22], buf1[23], buf0[22],
+ buf0[23], bit);
+ btf_32_sse4_1_type0(-cospi[60], cospi[4], buf1[24], buf1[25], buf0[24],
+ buf0[25], bit);
+ btf_32_sse4_1_type0(-cospi[44], cospi[20], buf1[26], buf1[27], buf0[26],
+ buf0[27], bit);
+ btf_32_sse4_1_type0(-cospi[28], cospi[36], buf1[28], buf1[29], buf0[28],
+ buf0[29], bit);
+ btf_32_sse4_1_type0(-cospi[12], cospi[52], buf1[30], buf1[31], buf0[30],
+ buf0[31], bit);
+
+ // stage 5
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[8]);
+ buf1[8] = _mm_sub_epi32(buf0[0], buf0[8]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[9]);
+ buf1[9] = _mm_sub_epi32(buf0[1], buf0[9]);
+ buf1[2] = _mm_add_epi32(buf0[2], buf0[10]);
+ buf1[10] = _mm_sub_epi32(buf0[2], buf0[10]);
+ buf1[3] = _mm_add_epi32(buf0[3], buf0[11]);
+ buf1[11] = _mm_sub_epi32(buf0[3], buf0[11]);
+ buf1[4] = _mm_add_epi32(buf0[4], buf0[12]);
+ buf1[12] = _mm_sub_epi32(buf0[4], buf0[12]);
+ buf1[5] = _mm_add_epi32(buf0[5], buf0[13]);
+ buf1[13] = _mm_sub_epi32(buf0[5], buf0[13]);
+ buf1[6] = _mm_add_epi32(buf0[6], buf0[14]);
+ buf1[14] = _mm_sub_epi32(buf0[6], buf0[14]);
+ buf1[7] = _mm_add_epi32(buf0[7], buf0[15]);
+ buf1[15] = _mm_sub_epi32(buf0[7], buf0[15]);
+ buf1[16] = _mm_add_epi32(buf0[16], buf0[24]);
+ buf1[24] = _mm_sub_epi32(buf0[16], buf0[24]);
+ buf1[17] = _mm_add_epi32(buf0[17], buf0[25]);
+ buf1[25] = _mm_sub_epi32(buf0[17], buf0[25]);
+ buf1[18] = _mm_add_epi32(buf0[18], buf0[26]);
+ buf1[26] = _mm_sub_epi32(buf0[18], buf0[26]);
+ buf1[19] = _mm_add_epi32(buf0[19], buf0[27]);
+ buf1[27] = _mm_sub_epi32(buf0[19], buf0[27]);
+ buf1[20] = _mm_add_epi32(buf0[20], buf0[28]);
+ buf1[28] = _mm_sub_epi32(buf0[20], buf0[28]);
+ buf1[21] = _mm_add_epi32(buf0[21], buf0[29]);
+ buf1[29] = _mm_sub_epi32(buf0[21], buf0[29]);
+ buf1[22] = _mm_add_epi32(buf0[22], buf0[30]);
+ buf1[30] = _mm_sub_epi32(buf0[22], buf0[30]);
+ buf1[23] = _mm_add_epi32(buf0[23], buf0[31]);
+ buf1[31] = _mm_sub_epi32(buf0[23], buf0[31]);
+
+ // stage 6
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ buf0[2] = buf1[2];
+ buf0[3] = buf1[3];
+ buf0[4] = buf1[4];
+ buf0[5] = buf1[5];
+ buf0[6] = buf1[6];
+ buf0[7] = buf1[7];
+ btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[8], buf1[9], buf0[8], buf0[9],
+ bit);
+ btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[10], buf1[11], buf0[10],
+ buf0[11], bit);
+ btf_32_sse4_1_type0(-cospi[56], cospi[8], buf1[12], buf1[13], buf0[12],
+ buf0[13], bit);
+ btf_32_sse4_1_type0(-cospi[24], cospi[40], buf1[14], buf1[15], buf0[14],
+ buf0[15], bit);
+ buf0[16] = buf1[16];
+ buf0[17] = buf1[17];
+ buf0[18] = buf1[18];
+ buf0[19] = buf1[19];
+ buf0[20] = buf1[20];
+ buf0[21] = buf1[21];
+ buf0[22] = buf1[22];
+ buf0[23] = buf1[23];
+ btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[24], buf1[25], buf0[24],
+ buf0[25], bit);
+ btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[26], buf1[27], buf0[26],
+ buf0[27], bit);
+ btf_32_sse4_1_type0(-cospi[56], cospi[8], buf1[28], buf1[29], buf0[28],
+ buf0[29], bit);
+ btf_32_sse4_1_type0(-cospi[24], cospi[40], buf1[30], buf1[31], buf0[30],
+ buf0[31], bit);
+
+ // stage 7
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[4]);
+ buf1[4] = _mm_sub_epi32(buf0[0], buf0[4]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[5]);
+ buf1[5] = _mm_sub_epi32(buf0[1], buf0[5]);
+ buf1[2] = _mm_add_epi32(buf0[2], buf0[6]);
+ buf1[6] = _mm_sub_epi32(buf0[2], buf0[6]);
+ buf1[3] = _mm_add_epi32(buf0[3], buf0[7]);
+ buf1[7] = _mm_sub_epi32(buf0[3], buf0[7]);
+ buf1[8] = _mm_add_epi32(buf0[8], buf0[12]);
+ buf1[12] = _mm_sub_epi32(buf0[8], buf0[12]);
+ buf1[9] = _mm_add_epi32(buf0[9], buf0[13]);
+ buf1[13] = _mm_sub_epi32(buf0[9], buf0[13]);
+ buf1[10] = _mm_add_epi32(buf0[10], buf0[14]);
+ buf1[14] = _mm_sub_epi32(buf0[10], buf0[14]);
+ buf1[11] = _mm_add_epi32(buf0[11], buf0[15]);
+ buf1[15] = _mm_sub_epi32(buf0[11], buf0[15]);
+ buf1[16] = _mm_add_epi32(buf0[16], buf0[20]);
+ buf1[20] = _mm_sub_epi32(buf0[16], buf0[20]);
+ buf1[17] = _mm_add_epi32(buf0[17], buf0[21]);
+ buf1[21] = _mm_sub_epi32(buf0[17], buf0[21]);
+ buf1[18] = _mm_add_epi32(buf0[18], buf0[22]);
+ buf1[22] = _mm_sub_epi32(buf0[18], buf0[22]);
+ buf1[19] = _mm_add_epi32(buf0[19], buf0[23]);
+ buf1[23] = _mm_sub_epi32(buf0[19], buf0[23]);
+ buf1[24] = _mm_add_epi32(buf0[24], buf0[28]);
+ buf1[28] = _mm_sub_epi32(buf0[24], buf0[28]);
+ buf1[25] = _mm_add_epi32(buf0[25], buf0[29]);
+ buf1[29] = _mm_sub_epi32(buf0[25], buf0[29]);
+ buf1[26] = _mm_add_epi32(buf0[26], buf0[30]);
+ buf1[30] = _mm_sub_epi32(buf0[26], buf0[30]);
+ buf1[27] = _mm_add_epi32(buf0[27], buf0[31]);
+ buf1[31] = _mm_sub_epi32(buf0[27], buf0[31]);
+
+ // stage 8
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ buf0[2] = buf1[2];
+ buf0[3] = buf1[3];
+ btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[4], buf1[5], buf0[4],
+ buf0[5], bit);
+ btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[6], buf1[7], buf0[6],
+ buf0[7], bit);
+ buf0[8] = buf1[8];
+ buf0[9] = buf1[9];
+ buf0[10] = buf1[10];
+ buf0[11] = buf1[11];
+ btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[12], buf1[13], buf0[12],
+ buf0[13], bit);
+ btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[14], buf1[15], buf0[14],
+ buf0[15], bit);
+ buf0[16] = buf1[16];
+ buf0[17] = buf1[17];
+ buf0[18] = buf1[18];
+ buf0[19] = buf1[19];
+ btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[20], buf1[21], buf0[20],
+ buf0[21], bit);
+ btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[22], buf1[23], buf0[22],
+ buf0[23], bit);
+ buf0[24] = buf1[24];
+ buf0[25] = buf1[25];
+ buf0[26] = buf1[26];
+ buf0[27] = buf1[27];
+ btf_32_sse4_1_type0(cospi[16], cospi[48], buf1[28], buf1[29], buf0[28],
+ buf0[29], bit);
+ btf_32_sse4_1_type0(-cospi[48], cospi[16], buf1[30], buf1[31], buf0[30],
+ buf0[31], bit);
+
+ // stage 9
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = _mm_add_epi32(buf0[0], buf0[2]);
+ buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]);
+ buf1[1] = _mm_add_epi32(buf0[1], buf0[3]);
+ buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]);
+ buf1[4] = _mm_add_epi32(buf0[4], buf0[6]);
+ buf1[6] = _mm_sub_epi32(buf0[4], buf0[6]);
+ buf1[5] = _mm_add_epi32(buf0[5], buf0[7]);
+ buf1[7] = _mm_sub_epi32(buf0[5], buf0[7]);
+ buf1[8] = _mm_add_epi32(buf0[8], buf0[10]);
+ buf1[10] = _mm_sub_epi32(buf0[8], buf0[10]);
+ buf1[9] = _mm_add_epi32(buf0[9], buf0[11]);
+ buf1[11] = _mm_sub_epi32(buf0[9], buf0[11]);
+ buf1[12] = _mm_add_epi32(buf0[12], buf0[14]);
+ buf1[14] = _mm_sub_epi32(buf0[12], buf0[14]);
+ buf1[13] = _mm_add_epi32(buf0[13], buf0[15]);
+ buf1[15] = _mm_sub_epi32(buf0[13], buf0[15]);
+ buf1[16] = _mm_add_epi32(buf0[16], buf0[18]);
+ buf1[18] = _mm_sub_epi32(buf0[16], buf0[18]);
+ buf1[17] = _mm_add_epi32(buf0[17], buf0[19]);
+ buf1[19] = _mm_sub_epi32(buf0[17], buf0[19]);
+ buf1[20] = _mm_add_epi32(buf0[20], buf0[22]);
+ buf1[22] = _mm_sub_epi32(buf0[20], buf0[22]);
+ buf1[21] = _mm_add_epi32(buf0[21], buf0[23]);
+ buf1[23] = _mm_sub_epi32(buf0[21], buf0[23]);
+ buf1[24] = _mm_add_epi32(buf0[24], buf0[26]);
+ buf1[26] = _mm_sub_epi32(buf0[24], buf0[26]);
+ buf1[25] = _mm_add_epi32(buf0[25], buf0[27]);
+ buf1[27] = _mm_sub_epi32(buf0[25], buf0[27]);
+ buf1[28] = _mm_add_epi32(buf0[28], buf0[30]);
+ buf1[30] = _mm_sub_epi32(buf0[28], buf0[30]);
+ buf1[29] = _mm_add_epi32(buf0[29], buf0[31]);
+ buf1[31] = _mm_sub_epi32(buf0[29], buf0[31]);
+
+ // stage 10
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf0[0] = buf1[0];
+ buf0[1] = buf1[1];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2],
+ buf0[3], bit);
+ buf0[4] = buf1[4];
+ buf0[5] = buf1[5];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[6], buf1[7], buf0[6],
+ buf0[7], bit);
+ buf0[8] = buf1[8];
+ buf0[9] = buf1[9];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[10], buf1[11], buf0[10],
+ buf0[11], bit);
+ buf0[12] = buf1[12];
+ buf0[13] = buf1[13];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[14], buf1[15], buf0[14],
+ buf0[15], bit);
+ buf0[16] = buf1[16];
+ buf0[17] = buf1[17];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[18], buf1[19], buf0[18],
+ buf0[19], bit);
+ buf0[20] = buf1[20];
+ buf0[21] = buf1[21];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[22], buf1[23], buf0[22],
+ buf0[23], bit);
+ buf0[24] = buf1[24];
+ buf0[25] = buf1[25];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[26], buf1[27], buf0[26],
+ buf0[27], bit);
+ buf0[28] = buf1[28];
+ buf0[29] = buf1[29];
+ btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[30], buf1[31], buf0[30],
+ buf0[31], bit);
+
+ // stage 11
+ stage_idx++;
+ bit = cos_bit[stage_idx];
+ cospi = cospi_arr[bit - cos_bit_min];
+ buf1[0] = buf0[0];
+ buf1[1] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[16]);
+ buf1[2] = buf0[24];
+ buf1[3] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[8]);
+ buf1[4] = buf0[12];
+ buf1[5] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[28]);
+ buf1[6] = buf0[20];
+ buf1[7] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[4]);
+ buf1[8] = buf0[6];
+ buf1[9] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[22]);
+ buf1[10] = buf0[30];
+ buf1[11] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[14]);
+ buf1[12] = buf0[10];
+ buf1[13] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[26]);
+ buf1[14] = buf0[18];
+ buf1[15] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[2]);
+ buf1[16] = buf0[3];
+ buf1[17] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[19]);
+ buf1[18] = buf0[27];
+ buf1[19] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[11]);
+ buf1[20] = buf0[15];
+ buf1[21] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[31]);
+ buf1[22] = buf0[23];
+ buf1[23] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[7]);
+ buf1[24] = buf0[5];
+ buf1[25] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[21]);
+ buf1[26] = buf0[29];
+ buf1[27] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[13]);
+ buf1[28] = buf0[9];
+ buf1[29] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[25]);
+ buf1[30] = buf0[17];
+ buf1[31] = _mm_sub_epi32(_mm_set1_epi32(0), buf0[1]);
+
+ output[0 * col_num + col] = buf1[0];
+ output[1 * col_num + col] = buf1[1];
+ output[2 * col_num + col] = buf1[2];
+ output[3 * col_num + col] = buf1[3];
+ output[4 * col_num + col] = buf1[4];
+ output[5 * col_num + col] = buf1[5];
+ output[6 * col_num + col] = buf1[6];
+ output[7 * col_num + col] = buf1[7];
+ output[8 * col_num + col] = buf1[8];
+ output[9 * col_num + col] = buf1[9];
+ output[10 * col_num + col] = buf1[10];
+ output[11 * col_num + col] = buf1[11];
+ output[12 * col_num + col] = buf1[12];
+ output[13 * col_num + col] = buf1[13];
+ output[14 * col_num + col] = buf1[14];
+ output[15 * col_num + col] = buf1[15];
+ output[16 * col_num + col] = buf1[16];
+ output[17 * col_num + col] = buf1[17];
+ output[18 * col_num + col] = buf1[18];
+ output[19 * col_num + col] = buf1[19];
+ output[20 * col_num + col] = buf1[20];
+ output[21 * col_num + col] = buf1[21];
+ output[22 * col_num + col] = buf1[22];
+ output[23 * col_num + col] = buf1[23];
+ output[24 * col_num + col] = buf1[24];
+ output[25 * col_num + col] = buf1[25];
+ output[26 * col_num + col] = buf1[26];
+ output[27 * col_num + col] = buf1[27];
+ output[28 * col_num + col] = buf1[28];
+ output[29 * col_num + col] = buf1[29];
+ output[30 * col_num + col] = buf1[30];
+ output[31 * col_num + col] = buf1[31];
+ }
+}
diff --git a/av1/common/x86/vp10_fwd_txfm2d_sse4.c b/av1/common/x86/vp10_fwd_txfm2d_sse4.c
new file mode 100644
index 0000000..a59a0c8
--- /dev/null
+++ b/av1/common/x86/vp10_fwd_txfm2d_sse4.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "av1/common/enums.h"
+#include "av1/common/vp10_txfm.h"
+#include "av1/common/x86/vp10_txfm1d_sse4.h"
+
+static INLINE void int16_array_with_stride_to_int32_array_without_stride(
+ const int16_t *input, int stride, int32_t *output, int txfm1d_size) {
+ int r, c;
+ for (r = 0; r < txfm1d_size; r++) {
+ for (c = 0; c < txfm1d_size; c++) {
+ output[r * txfm1d_size + c] = (int32_t)input[r * stride + c];
+ }
+ }
+}
+
+typedef void (*TxfmFuncSSE2)(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) {
+ switch (txfm_type) {
+ case TXFM_TYPE_DCT4: return vp10_fdct4_new_sse4_1; break;
+ case TXFM_TYPE_DCT8: return vp10_fdct8_new_sse4_1; break;
+ case TXFM_TYPE_DCT16: return vp10_fdct16_new_sse4_1; break;
+ case TXFM_TYPE_DCT32: return vp10_fdct32_new_sse4_1; break;
+ case TXFM_TYPE_ADST4: return vp10_fadst4_new_sse4_1; break;
+ case TXFM_TYPE_ADST8: return vp10_fadst8_new_sse4_1; break;
+ case TXFM_TYPE_ADST16: return vp10_fadst16_new_sse4_1; break;
+ case TXFM_TYPE_ADST32: return vp10_fadst32_new_sse4_1; break;
+ default: assert(0);
+ }
+ return NULL;
+}
+
+static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output,
+ const int stride, const TXFM_2D_CFG *cfg,
+ int32_t *txfm_buf) {
+ const int txfm_size = cfg->txfm_size;
+ const int8_t *shift = cfg->shift;
+ const int8_t *stage_range_col = cfg->stage_range_col;
+ const int8_t *stage_range_row = cfg->stage_range_row;
+ const int8_t *cos_bit_col = cfg->cos_bit_col;
+ const int8_t *cos_bit_row = cfg->cos_bit_row;
+ const TxfmFuncSSE2 txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col);
+ const TxfmFuncSSE2 txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row);
+
+ __m128i *buf_128 = (__m128i *)txfm_buf;
+ __m128i *out_128 = (__m128i *)output;
+ int num_per_128 = 4;
+ int txfm2d_size_128 = txfm_size * txfm_size / num_per_128;
+
+ int16_array_with_stride_to_int32_array_without_stride(input, stride, txfm_buf,
+ txfm_size);
+ round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[0]);
+ txfm_func_col(out_128, buf_128, cos_bit_col, stage_range_col);
+ round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[1]);
+ transpose_32(txfm_size, out_128, buf_128);
+ txfm_func_row(buf_128, out_128, cos_bit_row, stage_range_row);
+ round_shift_array_32_sse4_1(out_128, buf_128, txfm2d_size_128, -shift[2]);
+ transpose_32(txfm_size, buf_128, out_128);
+}
+
+void vp10_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
+ int stride, int tx_type, int bd) {
+ int32_t txfm_buf[1024];
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_cfg(tx_type, TX_32X32);
+ (void)bd;
+ fwd_txfm2d_sse4_1(input, output, stride, cfg.cfg, txfm_buf);
+}
+
+void vp10_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
+ int stride, int tx_type, int bd) {
+ int32_t txfm_buf[4096];
+ TXFM_2D_FLIP_CFG cfg = vp10_get_fwd_txfm_64x64_cfg(tx_type);
+ (void)bd;
+ fwd_txfm2d_sse4_1(input, output, stride, cfg.cfg, txfm_buf);
+}
diff --git a/av1/common/x86/vp10_fwd_txfm_impl_sse2.h b/av1/common/x86/vp10_fwd_txfm_impl_sse2.h
new file mode 100644
index 0000000..9bb8abc
--- /dev/null
+++ b/av1/common/x86/vp10_fwd_txfm_impl_sse2.h
@@ -0,0 +1,1013 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h> // SSE2
+
+#include "./vpx_dsp_rtcd.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_dsp/x86/fwd_txfm_sse2.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+#include "aom_ports/mem.h"
+
+// TODO(jingning) The high bit-depth functions need rework for performance.
+// After we properly fix the high bit-depth function implementations, this
+// file's dependency should be substantially simplified.
+#if DCT_HIGH_BIT_DEPTH
+#define ADD_EPI16 _mm_adds_epi16
+#define SUB_EPI16 _mm_subs_epi16
+
+#else
+#define ADD_EPI16 _mm_add_epi16
+#define SUB_EPI16 _mm_sub_epi16
+#endif
+
+void FDCT4x4_2D(const int16_t *input, tran_low_t *output, int stride) {
+ // This 2D transform implements 4 vertical 1D transforms followed
+ // by 4 horizontal 1D transforms. The multiplies and adds are as given
+ // by Chen, Smith and Fralick ('77). The commands for moving the data
+ // around have been minimized by hand.
+ // For the purposes of the comments, the 16 inputs are referred to at i0
+ // through iF (in raster order), intermediate variables are a0, b0, c0
+ // through f, and correspond to the in-place computations mapped to input
+ // locations. The outputs, o0 through oF are labeled according to the
+ // output locations.
+
+ // Constants
+ // These are the coefficients used for the multiplies.
+ // In the comments, pN means cos(N pi /64) and mN is -cos(N pi /64),
+ // where cospi_N_64 = cos(N pi /64)
+ const __m128i k__cospi_A =
+ octa_set_epi16(cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64,
+ cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_B =
+ octa_set_epi16(cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64,
+ cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64);
+ const __m128i k__cospi_C =
+ octa_set_epi16(cospi_8_64, cospi_24_64, cospi_8_64, cospi_24_64,
+ cospi_24_64, -cospi_8_64, cospi_24_64, -cospi_8_64);
+ const __m128i k__cospi_D =
+ octa_set_epi16(cospi_24_64, -cospi_8_64, cospi_24_64, -cospi_8_64,
+ cospi_8_64, cospi_24_64, cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_E =
+ octa_set_epi16(cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64,
+ cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64);
+ const __m128i k__cospi_F =
+ octa_set_epi16(cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64,
+ cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_G =
+ octa_set_epi16(cospi_8_64, cospi_24_64, cospi_8_64, cospi_24_64,
+ -cospi_8_64, -cospi_24_64, -cospi_8_64, -cospi_24_64);
+ const __m128i k__cospi_H =
+ octa_set_epi16(cospi_24_64, -cospi_8_64, cospi_24_64, -cospi_8_64,
+ -cospi_24_64, cospi_8_64, -cospi_24_64, cospi_8_64);
+
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ // This second rounding constant saves doing some extra adds at the end
+ const __m128i k__DCT_CONST_ROUNDING2 =
+ _mm_set1_epi32(DCT_CONST_ROUNDING + (DCT_CONST_ROUNDING << 1));
+ const int DCT_CONST_BITS2 = DCT_CONST_BITS + 2;
+ const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1);
+ const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
+ __m128i in0, in1;
+#if DCT_HIGH_BIT_DEPTH
+ __m128i cmp0, cmp1;
+ int test, overflow;
+#endif
+
+ // Load inputs.
+ in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+ in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+ in1 = _mm_unpacklo_epi64(
+ in1, _mm_loadl_epi64((const __m128i *)(input + 2 * stride)));
+ in0 = _mm_unpacklo_epi64(
+ in0, _mm_loadl_epi64((const __m128i *)(input + 3 * stride)));
+// in0 = [i0 i1 i2 i3 iC iD iE iF]
+// in1 = [i4 i5 i6 i7 i8 i9 iA iB]
+#if DCT_HIGH_BIT_DEPTH
+ // Check inputs small enough to use optimised code
+ cmp0 = _mm_xor_si128(_mm_cmpgt_epi16(in0, _mm_set1_epi16(0x3ff)),
+ _mm_cmplt_epi16(in0, _mm_set1_epi16(0xfc00)));
+ cmp1 = _mm_xor_si128(_mm_cmpgt_epi16(in1, _mm_set1_epi16(0x3ff)),
+ _mm_cmplt_epi16(in1, _mm_set1_epi16(0xfc00)));
+ test = _mm_movemask_epi8(_mm_or_si128(cmp0, cmp1));
+ if (test) {
+ vpx_highbd_fdct4x4_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+
+ // multiply by 16 to give some extra precision
+ in0 = _mm_slli_epi16(in0, 4);
+ in1 = _mm_slli_epi16(in1, 4);
+ // if (i == 0 && input[0]) input[0] += 1;
+ // add 1 to the upper left pixel if it is non-zero, which helps reduce
+ // the round-trip error
+ {
+ // The mask will only contain whether the first value is zero, all
+ // other comparison will fail as something shifted by 4 (above << 4)
+ // can never be equal to one. To increment in the non-zero case, we
+ // add the mask and one for the first element:
+ // - if zero, mask = -1, v = v - 1 + 1 = v
+ // - if non-zero, mask = 0, v = v + 0 + 1 = v + 1
+ __m128i mask = _mm_cmpeq_epi16(in0, k__nonzero_bias_a);
+ in0 = _mm_add_epi16(in0, mask);
+ in0 = _mm_add_epi16(in0, k__nonzero_bias_b);
+ }
+ // There are 4 total stages, alternating between an add/subtract stage
+ // followed by an multiply-and-add stage.
+ {
+ // Stage 1: Add/subtract
+
+ // in0 = [i0 i1 i2 i3 iC iD iE iF]
+ // in1 = [i4 i5 i6 i7 i8 i9 iA iB]
+ const __m128i r0 = _mm_unpacklo_epi16(in0, in1);
+ const __m128i r1 = _mm_unpackhi_epi16(in0, in1);
+ // r0 = [i0 i4 i1 i5 i2 i6 i3 i7]
+ // r1 = [iC i8 iD i9 iE iA iF iB]
+ const __m128i r2 = _mm_shuffle_epi32(r0, 0xB4);
+ const __m128i r3 = _mm_shuffle_epi32(r1, 0xB4);
+ // r2 = [i0 i4 i1 i5 i3 i7 i2 i6]
+ // r3 = [iC i8 iD i9 iF iB iE iA]
+
+ const __m128i t0 = _mm_add_epi16(r2, r3);
+ const __m128i t1 = _mm_sub_epi16(r2, r3);
+ // t0 = [a0 a4 a1 a5 a3 a7 a2 a6]
+ // t1 = [aC a8 aD a9 aF aB aE aA]
+
+ // Stage 2: multiply by constants (which gets us into 32 bits).
+ // The constants needed here are:
+ // k__cospi_A = [p16 p16 p16 p16 p16 m16 p16 m16]
+ // k__cospi_B = [p16 m16 p16 m16 p16 p16 p16 p16]
+ // k__cospi_C = [p08 p24 p08 p24 p24 m08 p24 m08]
+ // k__cospi_D = [p24 m08 p24 m08 p08 p24 p08 p24]
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_A);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_B);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_C);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_D);
+ // Then add and right-shift to get back to 16-bit range
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ // w0 = [b0 b1 b7 b6]
+ // w1 = [b8 b9 bF bE]
+ // w2 = [b4 b5 b3 b2]
+ // w3 = [bC bD bB bA]
+ const __m128i x0 = _mm_packs_epi32(w0, w1);
+ const __m128i x1 = _mm_packs_epi32(w2, w3);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x2(&x0, &x1);
+ if (overflow) {
+ vpx_highbd_fdct4x4_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ // x0 = [b0 b1 b7 b6 b8 b9 bF bE]
+ // x1 = [b4 b5 b3 b2 bC bD bB bA]
+ in0 = _mm_shuffle_epi32(x0, 0xD8);
+ in1 = _mm_shuffle_epi32(x1, 0x8D);
+ // in0 = [b0 b1 b8 b9 b7 b6 bF bE]
+ // in1 = [b3 b2 bB bA b4 b5 bC bD]
+ }
+ {
+ // vertical DCTs finished. Now we do the horizontal DCTs.
+ // Stage 3: Add/subtract
+
+ // t0 = [c0 c1 c8 c9 c4 c5 cC cD]
+ // t1 = [c3 c2 cB cA -c7 -c6 -cF -cE]
+ const __m128i t0 = ADD_EPI16(in0, in1);
+ const __m128i t1 = SUB_EPI16(in0, in1);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x2(&t0, &t1);
+ if (overflow) {
+ vpx_highbd_fdct4x4_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+
+ // Stage 4: multiply by constants (which gets us into 32 bits).
+ {
+ // The constants needed here are:
+ // k__cospi_E = [p16 p16 p16 p16 p16 p16 p16 p16]
+ // k__cospi_F = [p16 m16 p16 m16 p16 m16 p16 m16]
+ // k__cospi_G = [p08 p24 p08 p24 m08 m24 m08 m24]
+ // k__cospi_H = [p24 m08 p24 m08 m24 p08 m24 p08]
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_E);
+ const __m128i u1 = _mm_madd_epi16(t0, k__cospi_F);
+ const __m128i u2 = _mm_madd_epi16(t1, k__cospi_G);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_H);
+ // Then add and right-shift to get back to 16-bit range
+ // but this combines the final right-shift as well to save operations
+ // This unusual rounding operations is to maintain bit-accurate
+ // compatibility with the c version of this function which has two
+ // rounding steps in a row.
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING2);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING2);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING2);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING2);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS2);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS2);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS2);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS2);
+ // w0 = [o0 o4 o8 oC]
+ // w1 = [o2 o6 oA oE]
+ // w2 = [o1 o5 o9 oD]
+ // w3 = [o3 o7 oB oF]
+ // remember the o's are numbered according to the correct output location
+ const __m128i x0 = _mm_packs_epi32(w0, w1);
+ const __m128i x1 = _mm_packs_epi32(w2, w3);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x2(&x0, &x1);
+ if (overflow) {
+ vpx_highbd_fdct4x4_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ {
+ // x0 = [o0 o4 o8 oC o2 o6 oA oE]
+ // x1 = [o1 o5 o9 oD o3 o7 oB oF]
+ const __m128i y0 = _mm_unpacklo_epi16(x0, x1);
+ const __m128i y1 = _mm_unpackhi_epi16(x0, x1);
+ // y0 = [o0 o1 o4 o5 o8 o9 oC oD]
+ // y1 = [o2 o3 o6 o7 oA oB oE oF]
+ in0 = _mm_unpacklo_epi32(y0, y1);
+ // in0 = [o0 o1 o2 o3 o4 o5 o6 o7]
+ in1 = _mm_unpackhi_epi32(y0, y1);
+ // in1 = [o8 o9 oA oB oC oD oE oF]
+ }
+ }
+ }
+ // Post-condition (v + 1) >> 2 is now incorporated into previous
+ // add and right-shift commands. Only 2 store instructions needed
+ // because we are using the fact that 1/3 are stored just after 0/2.
+ storeu_output(&in0, output + 0 * 4);
+ storeu_output(&in1, output + 2 * 4);
+}
+
+void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) {
+ int pass;
+ // Constants
+ // When we use them, in one case, they are all the same. In all others
+ // it's a pair of them that we need to repeat four times. This is done
+ // by constructing the 32 bit constant corresponding to that pair.
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
+ const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+#if DCT_HIGH_BIT_DEPTH
+ int overflow;
+#endif
+ // Load input
+ __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ // Pre-condition input (shift by two)
+ in0 = _mm_slli_epi16(in0, 2);
+ in1 = _mm_slli_epi16(in1, 2);
+ in2 = _mm_slli_epi16(in2, 2);
+ in3 = _mm_slli_epi16(in3, 2);
+ in4 = _mm_slli_epi16(in4, 2);
+ in5 = _mm_slli_epi16(in5, 2);
+ in6 = _mm_slli_epi16(in6, 2);
+ in7 = _mm_slli_epi16(in7, 2);
+
+ // We do two passes, first the columns, then the rows. The results of the
+ // first pass are transposed so that the same column code can be reused. The
+ // results of the second pass are also transposed so that the rows (processed
+ // as columns) are put back in row positions.
+ for (pass = 0; pass < 2; pass++) {
+ // To store results of each pass before the transpose.
+ __m128i res0, res1, res2, res3, res4, res5, res6, res7;
+ // Add/subtract
+ const __m128i q0 = ADD_EPI16(in0, in7);
+ const __m128i q1 = ADD_EPI16(in1, in6);
+ const __m128i q2 = ADD_EPI16(in2, in5);
+ const __m128i q3 = ADD_EPI16(in3, in4);
+ const __m128i q4 = SUB_EPI16(in3, in4);
+ const __m128i q5 = SUB_EPI16(in2, in5);
+ const __m128i q6 = SUB_EPI16(in1, in6);
+ const __m128i q7 = SUB_EPI16(in0, in7);
+#if DCT_HIGH_BIT_DEPTH
+ if (pass == 1) {
+ overflow =
+ check_epi16_overflow_x8(&q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7);
+ if (overflow) {
+ vpx_highbd_fdct8x8_c(input, output, stride);
+ return;
+ }
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ // Work on first four results
+ {
+ // Add/subtract
+ const __m128i r0 = ADD_EPI16(q0, q3);
+ const __m128i r1 = ADD_EPI16(q1, q2);
+ const __m128i r2 = SUB_EPI16(q1, q2);
+ const __m128i r3 = SUB_EPI16(q0, q3);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x4(&r0, &r1, &r2, &r3);
+ if (overflow) {
+ vpx_highbd_fdct8x8_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ // Interleave to do the multiply by constants which gets us into 32bits
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
+ const __m128i t1 = _mm_unpackhi_epi16(r0, r1);
+ const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
+ const __m128i t3 = _mm_unpackhi_epi16(r2, r3);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16);
+ const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08);
+ const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08);
+ const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24);
+ const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ // Combine
+ res0 = _mm_packs_epi32(w0, w1);
+ res4 = _mm_packs_epi32(w2, w3);
+ res2 = _mm_packs_epi32(w4, w5);
+ res6 = _mm_packs_epi32(w6, w7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x4(&res0, &res4, &res2, &res6);
+ if (overflow) {
+ vpx_highbd_fdct8x8_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ }
+ // Work on next four results
+ {
+ // Interleave to do the multiply by constants which gets us into 32bits
+ const __m128i d0 = _mm_unpacklo_epi16(q6, q5);
+ const __m128i d1 = _mm_unpackhi_epi16(q6, q5);
+ const __m128i e0 = _mm_madd_epi16(d0, k__cospi_p16_m16);
+ const __m128i e1 = _mm_madd_epi16(d1, k__cospi_p16_m16);
+ const __m128i e2 = _mm_madd_epi16(d0, k__cospi_p16_p16);
+ const __m128i e3 = _mm_madd_epi16(d1, k__cospi_p16_p16);
+ // dct_const_round_shift
+ const __m128i f0 = _mm_add_epi32(e0, k__DCT_CONST_ROUNDING);
+ const __m128i f1 = _mm_add_epi32(e1, k__DCT_CONST_ROUNDING);
+ const __m128i f2 = _mm_add_epi32(e2, k__DCT_CONST_ROUNDING);
+ const __m128i f3 = _mm_add_epi32(e3, k__DCT_CONST_ROUNDING);
+ const __m128i s0 = _mm_srai_epi32(f0, DCT_CONST_BITS);
+ const __m128i s1 = _mm_srai_epi32(f1, DCT_CONST_BITS);
+ const __m128i s2 = _mm_srai_epi32(f2, DCT_CONST_BITS);
+ const __m128i s3 = _mm_srai_epi32(f3, DCT_CONST_BITS);
+ // Combine
+ const __m128i r0 = _mm_packs_epi32(s0, s1);
+ const __m128i r1 = _mm_packs_epi32(s2, s3);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x2(&r0, &r1);
+ if (overflow) {
+ vpx_highbd_fdct8x8_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ {
+ // Add/subtract
+ const __m128i x0 = ADD_EPI16(q4, r0);
+ const __m128i x1 = SUB_EPI16(q4, r0);
+ const __m128i x2 = SUB_EPI16(q7, r1);
+ const __m128i x3 = ADD_EPI16(q7, r1);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x4(&x0, &x1, &x2, &x3);
+ if (overflow) {
+ vpx_highbd_fdct8x8_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ // Interleave to do the multiply by constants which gets us into 32bits
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(x0, x3);
+ const __m128i t1 = _mm_unpackhi_epi16(x0, x3);
+ const __m128i t2 = _mm_unpacklo_epi16(x1, x2);
+ const __m128i t3 = _mm_unpackhi_epi16(x1, x2);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28);
+ const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20);
+ const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20);
+ const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12);
+ const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ // Combine
+ res1 = _mm_packs_epi32(w0, w1);
+ res7 = _mm_packs_epi32(w2, w3);
+ res5 = _mm_packs_epi32(w4, w5);
+ res3 = _mm_packs_epi32(w6, w7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x4(&res1, &res7, &res5, &res3);
+ if (overflow) {
+ vpx_highbd_fdct8x8_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ }
+ }
+ // Transpose the 8x8.
+ {
+ // 00 01 02 03 04 05 06 07
+ // 10 11 12 13 14 15 16 17
+ // 20 21 22 23 24 25 26 27
+ // 30 31 32 33 34 35 36 37
+ // 40 41 42 43 44 45 46 47
+ // 50 51 52 53 54 55 56 57
+ // 60 61 62 63 64 65 66 67
+ // 70 71 72 73 74 75 76 77
+ const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3);
+ const __m128i tr0_2 = _mm_unpackhi_epi16(res0, res1);
+ const __m128i tr0_3 = _mm_unpackhi_epi16(res2, res3);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(res6, res7);
+ const __m128i tr0_6 = _mm_unpackhi_epi16(res4, res5);
+ const __m128i tr0_7 = _mm_unpackhi_epi16(res6, res7);
+ // 00 10 01 11 02 12 03 13
+ // 20 30 21 31 22 32 23 33
+ // 04 14 05 15 06 16 07 17
+ // 24 34 25 35 26 36 27 37
+ // 40 50 41 51 42 52 43 53
+ // 60 70 61 71 62 72 63 73
+ // 54 54 55 55 56 56 57 57
+ // 64 74 65 75 66 76 67 77
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+ // 00 10 20 30 01 11 21 31
+ // 40 50 60 70 41 51 61 71
+ // 02 12 22 32 03 13 23 33
+ // 42 52 62 72 43 53 63 73
+ // 04 14 24 34 05 15 21 36
+ // 44 54 64 74 45 55 61 76
+ // 06 16 26 36 07 17 27 37
+ // 46 56 66 76 47 57 67 77
+ in0 = _mm_unpacklo_epi64(tr1_0, tr1_4);
+ in1 = _mm_unpackhi_epi64(tr1_0, tr1_4);
+ in2 = _mm_unpacklo_epi64(tr1_2, tr1_6);
+ in3 = _mm_unpackhi_epi64(tr1_2, tr1_6);
+ in4 = _mm_unpacklo_epi64(tr1_1, tr1_5);
+ in5 = _mm_unpackhi_epi64(tr1_1, tr1_5);
+ in6 = _mm_unpacklo_epi64(tr1_3, tr1_7);
+ in7 = _mm_unpackhi_epi64(tr1_3, tr1_7);
+ // 00 10 20 30 40 50 60 70
+ // 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72
+ // 03 13 23 33 43 53 63 73
+ // 04 14 24 34 44 54 64 74
+ // 05 15 25 35 45 55 65 75
+ // 06 16 26 36 46 56 66 76
+ // 07 17 27 37 47 57 67 77
+ }
+ }
+ // Post-condition output and store it
+ {
+ // Post-condition (division by two)
+ // division of two 16 bits signed numbers using shifts
+ // n / 2 = (n - (n >> 15)) >> 1
+ const __m128i sign_in0 = _mm_srai_epi16(in0, 15);
+ const __m128i sign_in1 = _mm_srai_epi16(in1, 15);
+ const __m128i sign_in2 = _mm_srai_epi16(in2, 15);
+ const __m128i sign_in3 = _mm_srai_epi16(in3, 15);
+ const __m128i sign_in4 = _mm_srai_epi16(in4, 15);
+ const __m128i sign_in5 = _mm_srai_epi16(in5, 15);
+ const __m128i sign_in6 = _mm_srai_epi16(in6, 15);
+ const __m128i sign_in7 = _mm_srai_epi16(in7, 15);
+ in0 = _mm_sub_epi16(in0, sign_in0);
+ in1 = _mm_sub_epi16(in1, sign_in1);
+ in2 = _mm_sub_epi16(in2, sign_in2);
+ in3 = _mm_sub_epi16(in3, sign_in3);
+ in4 = _mm_sub_epi16(in4, sign_in4);
+ in5 = _mm_sub_epi16(in5, sign_in5);
+ in6 = _mm_sub_epi16(in6, sign_in6);
+ in7 = _mm_sub_epi16(in7, sign_in7);
+ in0 = _mm_srai_epi16(in0, 1);
+ in1 = _mm_srai_epi16(in1, 1);
+ in2 = _mm_srai_epi16(in2, 1);
+ in3 = _mm_srai_epi16(in3, 1);
+ in4 = _mm_srai_epi16(in4, 1);
+ in5 = _mm_srai_epi16(in5, 1);
+ in6 = _mm_srai_epi16(in6, 1);
+ in7 = _mm_srai_epi16(in7, 1);
+ // store results
+ store_output(&in0, (output + 0 * 8));
+ store_output(&in1, (output + 1 * 8));
+ store_output(&in2, (output + 2 * 8));
+ store_output(&in3, (output + 3 * 8));
+ store_output(&in4, (output + 4 * 8));
+ store_output(&in5, (output + 5 * 8));
+ store_output(&in6, (output + 6 * 8));
+ store_output(&in7, (output + 7 * 8));
+ }
+}
+
+void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
+ // The 2D transform is done with two passes which are actually pretty
+ // similar. In the first one, we transform the columns and transpose
+ // the results. In the second one, we transform the rows. To achieve that,
+ // as the first pass results are transposed, we transpose the columns (that
+ // is the transposed rows) and transpose the results (so that it goes back
+ // in normal/row positions).
+ int pass;
+ // We need an intermediate buffer between passes.
+ DECLARE_ALIGNED(16, int16_t, intermediate[256]);
+ const int16_t *in = input;
+ int16_t *out0 = intermediate;
+ tran_low_t *out1 = output;
+ // Constants
+ // When we use them, in one case, they are all the same. In all others
+ // it's a pair of them that we need to repeat four times. This is done
+ // by constructing the 32 bit constant corresponding to that pair.
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64);
+ const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
+ const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i k__cospi_p30_p02 = pair_set_epi16(cospi_30_64, cospi_2_64);
+ const __m128i k__cospi_p14_p18 = pair_set_epi16(cospi_14_64, cospi_18_64);
+ const __m128i k__cospi_m02_p30 = pair_set_epi16(-cospi_2_64, cospi_30_64);
+ const __m128i k__cospi_m18_p14 = pair_set_epi16(-cospi_18_64, cospi_14_64);
+ const __m128i k__cospi_p22_p10 = pair_set_epi16(cospi_22_64, cospi_10_64);
+ const __m128i k__cospi_p06_p26 = pair_set_epi16(cospi_6_64, cospi_26_64);
+ const __m128i k__cospi_m10_p22 = pair_set_epi16(-cospi_10_64, cospi_22_64);
+ const __m128i k__cospi_m26_p06 = pair_set_epi16(-cospi_26_64, cospi_6_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i kOne = _mm_set1_epi16(1);
+ // Do the two transform/transpose passes
+ for (pass = 0; pass < 2; ++pass) {
+ // We process eight columns (transposed rows in second pass) at a time.
+ int column_start;
+#if DCT_HIGH_BIT_DEPTH
+ int overflow;
+#endif
+ for (column_start = 0; column_start < 16; column_start += 8) {
+ __m128i in00, in01, in02, in03, in04, in05, in06, in07;
+ __m128i in08, in09, in10, in11, in12, in13, in14, in15;
+ __m128i input0, input1, input2, input3, input4, input5, input6, input7;
+ __m128i step1_0, step1_1, step1_2, step1_3;
+ __m128i step1_4, step1_5, step1_6, step1_7;
+ __m128i step2_1, step2_2, step2_3, step2_4, step2_5, step2_6;
+ __m128i step3_0, step3_1, step3_2, step3_3;
+ __m128i step3_4, step3_5, step3_6, step3_7;
+ __m128i res00, res01, res02, res03, res04, res05, res06, res07;
+ __m128i res08, res09, res10, res11, res12, res13, res14, res15;
+ // Load and pre-condition input.
+ if (0 == pass) {
+ in00 = _mm_load_si128((const __m128i *)(in + 0 * stride));
+ in01 = _mm_load_si128((const __m128i *)(in + 1 * stride));
+ in02 = _mm_load_si128((const __m128i *)(in + 2 * stride));
+ in03 = _mm_load_si128((const __m128i *)(in + 3 * stride));
+ in04 = _mm_load_si128((const __m128i *)(in + 4 * stride));
+ in05 = _mm_load_si128((const __m128i *)(in + 5 * stride));
+ in06 = _mm_load_si128((const __m128i *)(in + 6 * stride));
+ in07 = _mm_load_si128((const __m128i *)(in + 7 * stride));
+ in08 = _mm_load_si128((const __m128i *)(in + 8 * stride));
+ in09 = _mm_load_si128((const __m128i *)(in + 9 * stride));
+ in10 = _mm_load_si128((const __m128i *)(in + 10 * stride));
+ in11 = _mm_load_si128((const __m128i *)(in + 11 * stride));
+ in12 = _mm_load_si128((const __m128i *)(in + 12 * stride));
+ in13 = _mm_load_si128((const __m128i *)(in + 13 * stride));
+ in14 = _mm_load_si128((const __m128i *)(in + 14 * stride));
+ in15 = _mm_load_si128((const __m128i *)(in + 15 * stride));
+ // x = x << 2
+ in00 = _mm_slli_epi16(in00, 2);
+ in01 = _mm_slli_epi16(in01, 2);
+ in02 = _mm_slli_epi16(in02, 2);
+ in03 = _mm_slli_epi16(in03, 2);
+ in04 = _mm_slli_epi16(in04, 2);
+ in05 = _mm_slli_epi16(in05, 2);
+ in06 = _mm_slli_epi16(in06, 2);
+ in07 = _mm_slli_epi16(in07, 2);
+ in08 = _mm_slli_epi16(in08, 2);
+ in09 = _mm_slli_epi16(in09, 2);
+ in10 = _mm_slli_epi16(in10, 2);
+ in11 = _mm_slli_epi16(in11, 2);
+ in12 = _mm_slli_epi16(in12, 2);
+ in13 = _mm_slli_epi16(in13, 2);
+ in14 = _mm_slli_epi16(in14, 2);
+ in15 = _mm_slli_epi16(in15, 2);
+ } else {
+ in00 = _mm_load_si128((const __m128i *)(in + 0 * 16));
+ in01 = _mm_load_si128((const __m128i *)(in + 1 * 16));
+ in02 = _mm_load_si128((const __m128i *)(in + 2 * 16));
+ in03 = _mm_load_si128((const __m128i *)(in + 3 * 16));
+ in04 = _mm_load_si128((const __m128i *)(in + 4 * 16));
+ in05 = _mm_load_si128((const __m128i *)(in + 5 * 16));
+ in06 = _mm_load_si128((const __m128i *)(in + 6 * 16));
+ in07 = _mm_load_si128((const __m128i *)(in + 7 * 16));
+ in08 = _mm_load_si128((const __m128i *)(in + 8 * 16));
+ in09 = _mm_load_si128((const __m128i *)(in + 9 * 16));
+ in10 = _mm_load_si128((const __m128i *)(in + 10 * 16));
+ in11 = _mm_load_si128((const __m128i *)(in + 11 * 16));
+ in12 = _mm_load_si128((const __m128i *)(in + 12 * 16));
+ in13 = _mm_load_si128((const __m128i *)(in + 13 * 16));
+ in14 = _mm_load_si128((const __m128i *)(in + 14 * 16));
+ in15 = _mm_load_si128((const __m128i *)(in + 15 * 16));
+ // x = (x + 1) >> 2
+ in00 = _mm_add_epi16(in00, kOne);
+ in01 = _mm_add_epi16(in01, kOne);
+ in02 = _mm_add_epi16(in02, kOne);
+ in03 = _mm_add_epi16(in03, kOne);
+ in04 = _mm_add_epi16(in04, kOne);
+ in05 = _mm_add_epi16(in05, kOne);
+ in06 = _mm_add_epi16(in06, kOne);
+ in07 = _mm_add_epi16(in07, kOne);
+ in08 = _mm_add_epi16(in08, kOne);
+ in09 = _mm_add_epi16(in09, kOne);
+ in10 = _mm_add_epi16(in10, kOne);
+ in11 = _mm_add_epi16(in11, kOne);
+ in12 = _mm_add_epi16(in12, kOne);
+ in13 = _mm_add_epi16(in13, kOne);
+ in14 = _mm_add_epi16(in14, kOne);
+ in15 = _mm_add_epi16(in15, kOne);
+ in00 = _mm_srai_epi16(in00, 2);
+ in01 = _mm_srai_epi16(in01, 2);
+ in02 = _mm_srai_epi16(in02, 2);
+ in03 = _mm_srai_epi16(in03, 2);
+ in04 = _mm_srai_epi16(in04, 2);
+ in05 = _mm_srai_epi16(in05, 2);
+ in06 = _mm_srai_epi16(in06, 2);
+ in07 = _mm_srai_epi16(in07, 2);
+ in08 = _mm_srai_epi16(in08, 2);
+ in09 = _mm_srai_epi16(in09, 2);
+ in10 = _mm_srai_epi16(in10, 2);
+ in11 = _mm_srai_epi16(in11, 2);
+ in12 = _mm_srai_epi16(in12, 2);
+ in13 = _mm_srai_epi16(in13, 2);
+ in14 = _mm_srai_epi16(in14, 2);
+ in15 = _mm_srai_epi16(in15, 2);
+ }
+ in += 8;
+ // Calculate input for the first 8 results.
+ {
+ input0 = ADD_EPI16(in00, in15);
+ input1 = ADD_EPI16(in01, in14);
+ input2 = ADD_EPI16(in02, in13);
+ input3 = ADD_EPI16(in03, in12);
+ input4 = ADD_EPI16(in04, in11);
+ input5 = ADD_EPI16(in05, in10);
+ input6 = ADD_EPI16(in06, in09);
+ input7 = ADD_EPI16(in07, in08);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x8(&input0, &input1, &input2, &input3,
+ &input4, &input5, &input6, &input7);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ // Calculate input for the next 8 results.
+ {
+ step1_0 = SUB_EPI16(in07, in08);
+ step1_1 = SUB_EPI16(in06, in09);
+ step1_2 = SUB_EPI16(in05, in10);
+ step1_3 = SUB_EPI16(in04, in11);
+ step1_4 = SUB_EPI16(in03, in12);
+ step1_5 = SUB_EPI16(in02, in13);
+ step1_6 = SUB_EPI16(in01, in14);
+ step1_7 = SUB_EPI16(in00, in15);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x8(&step1_0, &step1_1, &step1_2, &step1_3,
+ &step1_4, &step1_5, &step1_6, &step1_7);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ // Work on the first eight values; fdct8(input, even_results);
+ {
+ // Add/subtract
+ const __m128i q0 = ADD_EPI16(input0, input7);
+ const __m128i q1 = ADD_EPI16(input1, input6);
+ const __m128i q2 = ADD_EPI16(input2, input5);
+ const __m128i q3 = ADD_EPI16(input3, input4);
+ const __m128i q4 = SUB_EPI16(input3, input4);
+ const __m128i q5 = SUB_EPI16(input2, input5);
+ const __m128i q6 = SUB_EPI16(input1, input6);
+ const __m128i q7 = SUB_EPI16(input0, input7);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x8(&q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ // Work on first four results
+ {
+ // Add/subtract
+ const __m128i r0 = ADD_EPI16(q0, q3);
+ const __m128i r1 = ADD_EPI16(q1, q2);
+ const __m128i r2 = SUB_EPI16(q1, q2);
+ const __m128i r3 = SUB_EPI16(q0, q3);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x4(&r0, &r1, &r2, &r3);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ // Interleave to do the multiply by constants which gets us
+ // into 32 bits.
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
+ const __m128i t1 = _mm_unpackhi_epi16(r0, r1);
+ const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
+ const __m128i t3 = _mm_unpackhi_epi16(r2, r3);
+ res00 = mult_round_shift(&t0, &t1, &k__cospi_p16_p16,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ res08 = mult_round_shift(&t0, &t1, &k__cospi_p16_m16,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ res04 = mult_round_shift(&t2, &t3, &k__cospi_p24_p08,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ res12 = mult_round_shift(&t2, &t3, &k__cospi_m08_p24,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x4(&res00, &res08, &res04, &res12);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ }
+ // Work on next four results
+ {
+ // Interleave to do the multiply by constants which gets us
+ // into 32 bits.
+ const __m128i d0 = _mm_unpacklo_epi16(q6, q5);
+ const __m128i d1 = _mm_unpackhi_epi16(q6, q5);
+ const __m128i r0 =
+ mult_round_shift(&d0, &d1, &k__cospi_p16_m16,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ const __m128i r1 =
+ mult_round_shift(&d0, &d1, &k__cospi_p16_p16,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x2(&r0, &r1);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ {
+ // Add/subtract
+ const __m128i x0 = ADD_EPI16(q4, r0);
+ const __m128i x1 = SUB_EPI16(q4, r0);
+ const __m128i x2 = SUB_EPI16(q7, r1);
+ const __m128i x3 = ADD_EPI16(q7, r1);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x4(&x0, &x1, &x2, &x3);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ // Interleave to do the multiply by constants which gets us
+ // into 32 bits.
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(x0, x3);
+ const __m128i t1 = _mm_unpackhi_epi16(x0, x3);
+ const __m128i t2 = _mm_unpacklo_epi16(x1, x2);
+ const __m128i t3 = _mm_unpackhi_epi16(x1, x2);
+ res02 = mult_round_shift(&t0, &t1, &k__cospi_p28_p04,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ res14 = mult_round_shift(&t0, &t1, &k__cospi_m04_p28,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ res10 = mult_round_shift(&t2, &t3, &k__cospi_p12_p20,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ res06 = mult_round_shift(&t2, &t3, &k__cospi_m20_p12,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x4(&res02, &res14, &res10, &res06);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ }
+ }
+ }
+ // Work on the next eight values; step1 -> odd_results
+ {
+ // step 2
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(step1_5, step1_2);
+ const __m128i t1 = _mm_unpackhi_epi16(step1_5, step1_2);
+ const __m128i t2 = _mm_unpacklo_epi16(step1_4, step1_3);
+ const __m128i t3 = _mm_unpackhi_epi16(step1_4, step1_3);
+ step2_2 = mult_round_shift(&t0, &t1, &k__cospi_p16_m16,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ step2_3 = mult_round_shift(&t2, &t3, &k__cospi_p16_m16,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ step2_5 = mult_round_shift(&t0, &t1, &k__cospi_p16_p16,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ step2_4 = mult_round_shift(&t2, &t3, &k__cospi_p16_p16,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x4(&step2_2, &step2_3, &step2_5, &step2_4);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ // step 3
+ {
+ step3_0 = ADD_EPI16(step1_0, step2_3);
+ step3_1 = ADD_EPI16(step1_1, step2_2);
+ step3_2 = SUB_EPI16(step1_1, step2_2);
+ step3_3 = SUB_EPI16(step1_0, step2_3);
+ step3_4 = SUB_EPI16(step1_7, step2_4);
+ step3_5 = SUB_EPI16(step1_6, step2_5);
+ step3_6 = ADD_EPI16(step1_6, step2_5);
+ step3_7 = ADD_EPI16(step1_7, step2_4);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x8(&step3_0, &step3_1, &step3_2, &step3_3,
+ &step3_4, &step3_5, &step3_6, &step3_7);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ // step 4
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(step3_1, step3_6);
+ const __m128i t1 = _mm_unpackhi_epi16(step3_1, step3_6);
+ const __m128i t2 = _mm_unpacklo_epi16(step3_2, step3_5);
+ const __m128i t3 = _mm_unpackhi_epi16(step3_2, step3_5);
+ step2_1 = mult_round_shift(&t0, &t1, &k__cospi_m08_p24,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ step2_2 = mult_round_shift(&t2, &t3, &k__cospi_p24_p08,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ step2_6 = mult_round_shift(&t0, &t1, &k__cospi_p24_p08,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ step2_5 = mult_round_shift(&t2, &t3, &k__cospi_p08_m24,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x4(&step2_1, &step2_2, &step2_6, &step2_5);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ // step 5
+ {
+ step1_0 = ADD_EPI16(step3_0, step2_1);
+ step1_1 = SUB_EPI16(step3_0, step2_1);
+ step1_2 = ADD_EPI16(step3_3, step2_2);
+ step1_3 = SUB_EPI16(step3_3, step2_2);
+ step1_4 = SUB_EPI16(step3_4, step2_5);
+ step1_5 = ADD_EPI16(step3_4, step2_5);
+ step1_6 = SUB_EPI16(step3_7, step2_6);
+ step1_7 = ADD_EPI16(step3_7, step2_6);
+#if DCT_HIGH_BIT_DEPTH
+ overflow =
+ check_epi16_overflow_x8(&step1_0, &step1_1, &step1_2, &step1_3,
+ &step1_4, &step1_5, &step1_6, &step1_7);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ // step 6
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(step1_0, step1_7);
+ const __m128i t1 = _mm_unpackhi_epi16(step1_0, step1_7);
+ const __m128i t2 = _mm_unpacklo_epi16(step1_1, step1_6);
+ const __m128i t3 = _mm_unpackhi_epi16(step1_1, step1_6);
+ res01 = mult_round_shift(&t0, &t1, &k__cospi_p30_p02,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ res09 = mult_round_shift(&t2, &t3, &k__cospi_p14_p18,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ res15 = mult_round_shift(&t0, &t1, &k__cospi_m02_p30,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ res07 = mult_round_shift(&t2, &t3, &k__cospi_m18_p14,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x4(&res01, &res09, &res15, &res07);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ {
+ const __m128i t0 = _mm_unpacklo_epi16(step1_2, step1_5);
+ const __m128i t1 = _mm_unpackhi_epi16(step1_2, step1_5);
+ const __m128i t2 = _mm_unpacklo_epi16(step1_3, step1_4);
+ const __m128i t3 = _mm_unpackhi_epi16(step1_3, step1_4);
+ res05 = mult_round_shift(&t0, &t1, &k__cospi_p22_p10,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ res13 = mult_round_shift(&t2, &t3, &k__cospi_p06_p26,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ res11 = mult_round_shift(&t0, &t1, &k__cospi_m10_p22,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+ res03 = mult_round_shift(&t2, &t3, &k__cospi_m26_p06,
+ &k__DCT_CONST_ROUNDING, DCT_CONST_BITS);
+#if DCT_HIGH_BIT_DEPTH
+ overflow = check_epi16_overflow_x4(&res05, &res13, &res11, &res03);
+ if (overflow) {
+ vpx_highbd_fdct16x16_c(input, output, stride);
+ return;
+ }
+#endif // DCT_HIGH_BIT_DEPTH
+ }
+ }
+ // Transpose the results, do it as two 8x8 transposes.
+ transpose_and_output8x8(&res00, &res01, &res02, &res03, &res04, &res05,
+ &res06, &res07, pass, out0, out1);
+ transpose_and_output8x8(&res08, &res09, &res10, &res11, &res12, &res13,
+ &res14, &res15, pass, out0 + 8, out1 + 8);
+ if (pass == 0) {
+ out0 += 8 * 16;
+ } else {
+ out1 += 8 * 16;
+ }
+ }
+ // Setup in/out for next pass.
+ in = intermediate;
+ }
+}
+
+#undef ADD_EPI16
+#undef SUB_EPI16
diff --git a/av1/common/x86/vp10_fwd_txfm_sse2.c b/av1/common/x86/vp10_fwd_txfm_sse2.c
new file mode 100644
index 0000000..05ec539
--- /dev/null
+++ b/av1/common/x86/vp10_fwd_txfm_sse2.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h> // SSE2
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_dsp/x86/fwd_txfm_sse2.h"
+
+void vp10_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
+ __m128i in0, in1;
+ __m128i tmp;
+ const __m128i zero = _mm_setzero_si128();
+ in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+ in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+ in1 = _mm_unpacklo_epi64(
+ in1, _mm_loadl_epi64((const __m128i *)(input + 2 * stride)));
+ in0 = _mm_unpacklo_epi64(
+ in0, _mm_loadl_epi64((const __m128i *)(input + 3 * stride)));
+
+ tmp = _mm_add_epi16(in0, in1);
+ in0 = _mm_unpacklo_epi16(zero, tmp);
+ in1 = _mm_unpackhi_epi16(zero, tmp);
+ in0 = _mm_srai_epi32(in0, 16);
+ in1 = _mm_srai_epi32(in1, 16);
+
+ tmp = _mm_add_epi32(in0, in1);
+ in0 = _mm_unpacklo_epi32(tmp, zero);
+ in1 = _mm_unpackhi_epi32(tmp, zero);
+
+ tmp = _mm_add_epi32(in0, in1);
+ in0 = _mm_srli_si128(tmp, 8);
+
+ in1 = _mm_add_epi32(tmp, in0);
+ in0 = _mm_slli_epi32(in1, 1);
+ store_output(&in0, output);
+}
+
+void vp10_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
+ __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ __m128i u0, u1, sum;
+
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ in1 = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ in2 = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ in3 = _mm_load_si128((const __m128i *)(input + 7 * stride));
+
+ sum = _mm_add_epi16(u0, u1);
+
+ in0 = _mm_add_epi16(in0, in1);
+ in2 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, in0);
+
+ u0 = _mm_setzero_si128();
+ sum = _mm_add_epi16(sum, in2);
+
+ in0 = _mm_unpacklo_epi16(u0, sum);
+ in1 = _mm_unpackhi_epi16(u0, sum);
+ in0 = _mm_srai_epi32(in0, 16);
+ in1 = _mm_srai_epi32(in1, 16);
+
+ sum = _mm_add_epi32(in0, in1);
+ in0 = _mm_unpacklo_epi32(sum, u0);
+ in1 = _mm_unpackhi_epi32(sum, u0);
+
+ sum = _mm_add_epi32(in0, in1);
+ in0 = _mm_srli_si128(sum, 8);
+
+ in1 = _mm_add_epi32(sum, in0);
+ store_output(&in1, output);
+}
+
+void vp10_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output,
+ int stride) {
+ __m128i in0, in1, in2, in3;
+ __m128i u0, u1;
+ __m128i sum = _mm_setzero_si128();
+ int i;
+
+ for (i = 0; i < 2; ++i) {
+ input += 8 * i;
+ in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
+
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ in1 = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ in2 = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ in3 = _mm_load_si128((const __m128i *)(input + 7 * stride));
+
+ sum = _mm_add_epi16(sum, u1);
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 8 * stride));
+ in1 = _mm_load_si128((const __m128i *)(input + 9 * stride));
+ in2 = _mm_load_si128((const __m128i *)(input + 10 * stride));
+ in3 = _mm_load_si128((const __m128i *)(input + 11 * stride));
+
+ sum = _mm_add_epi16(sum, u1);
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 12 * stride));
+ in1 = _mm_load_si128((const __m128i *)(input + 13 * stride));
+ in2 = _mm_load_si128((const __m128i *)(input + 14 * stride));
+ in3 = _mm_load_si128((const __m128i *)(input + 15 * stride));
+
+ sum = _mm_add_epi16(sum, u1);
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ sum = _mm_add_epi16(sum, u1);
+ }
+
+ u0 = _mm_setzero_si128();
+ in0 = _mm_unpacklo_epi16(u0, sum);
+ in1 = _mm_unpackhi_epi16(u0, sum);
+ in0 = _mm_srai_epi32(in0, 16);
+ in1 = _mm_srai_epi32(in1, 16);
+
+ sum = _mm_add_epi32(in0, in1);
+ in0 = _mm_unpacklo_epi32(sum, u0);
+ in1 = _mm_unpackhi_epi32(sum, u0);
+
+ sum = _mm_add_epi32(in0, in1);
+ in0 = _mm_srli_si128(sum, 8);
+
+ in1 = _mm_add_epi32(sum, in0);
+ in1 = _mm_srai_epi32(in1, 1);
+ store_output(&in1, output);
+}
+
+void vp10_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output,
+ int stride) {
+ __m128i in0, in1, in2, in3;
+ __m128i u0, u1;
+ __m128i sum = _mm_setzero_si128();
+ int i;
+
+ for (i = 0; i < 8; ++i) {
+ in0 = _mm_load_si128((const __m128i *)(input + 0));
+ in1 = _mm_load_si128((const __m128i *)(input + 8));
+ in2 = _mm_load_si128((const __m128i *)(input + 16));
+ in3 = _mm_load_si128((const __m128i *)(input + 24));
+
+ input += stride;
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 0));
+ in1 = _mm_load_si128((const __m128i *)(input + 8));
+ in2 = _mm_load_si128((const __m128i *)(input + 16));
+ in3 = _mm_load_si128((const __m128i *)(input + 24));
+
+ input += stride;
+ sum = _mm_add_epi16(sum, u1);
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 0));
+ in1 = _mm_load_si128((const __m128i *)(input + 8));
+ in2 = _mm_load_si128((const __m128i *)(input + 16));
+ in3 = _mm_load_si128((const __m128i *)(input + 24));
+
+ input += stride;
+ sum = _mm_add_epi16(sum, u1);
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 0));
+ in1 = _mm_load_si128((const __m128i *)(input + 8));
+ in2 = _mm_load_si128((const __m128i *)(input + 16));
+ in3 = _mm_load_si128((const __m128i *)(input + 24));
+
+ input += stride;
+ sum = _mm_add_epi16(sum, u1);
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ sum = _mm_add_epi16(sum, u1);
+ }
+
+ u0 = _mm_setzero_si128();
+ in0 = _mm_unpacklo_epi16(u0, sum);
+ in1 = _mm_unpackhi_epi16(u0, sum);
+ in0 = _mm_srai_epi32(in0, 16);
+ in1 = _mm_srai_epi32(in1, 16);
+
+ sum = _mm_add_epi32(in0, in1);
+ in0 = _mm_unpacklo_epi32(sum, u0);
+ in1 = _mm_unpackhi_epi32(sum, u0);
+
+ sum = _mm_add_epi32(in0, in1);
+ in0 = _mm_srli_si128(sum, 8);
+
+ in1 = _mm_add_epi32(sum, in0);
+ in1 = _mm_srai_epi32(in1, 3);
+ store_output(&in1, output);
+}
+
+#define DCT_HIGH_BIT_DEPTH 0
+#define FDCT4x4_2D vp10_fdct4x4_sse2
+#define FDCT8x8_2D vp10_fdct8x8_sse2
+#define FDCT16x16_2D vp10_fdct16x16_sse2
+#include "av1/common/x86/vp10_fwd_txfm_impl_sse2.h"
+#undef FDCT4x4_2D
+#undef FDCT8x8_2D
+#undef FDCT16x16_2D
+
+#define FDCT32x32_2D vp10_fdct32x32_rd_sse2
+#define FDCT32x32_HIGH_PRECISION 0
+#include "av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h"
+#undef FDCT32x32_2D
+#undef FDCT32x32_HIGH_PRECISION
+
+#define FDCT32x32_2D vp10_fdct32x32_sse2
+#define FDCT32x32_HIGH_PRECISION 1
+#include "av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h" // NOLINT
+#undef FDCT32x32_2D
+#undef FDCT32x32_HIGH_PRECISION
+#undef DCT_HIGH_BIT_DEPTH
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#define DCT_HIGH_BIT_DEPTH 1
+#define FDCT4x4_2D vp10_highbd_fdct4x4_sse2
+#define FDCT8x8_2D vp10_highbd_fdct8x8_sse2
+#define FDCT16x16_2D vp10_highbd_fdct16x16_sse2
+#include "av1/common/x86/vp10_fwd_txfm_impl_sse2.h" // NOLINT
+#undef FDCT4x4_2D
+#undef FDCT8x8_2D
+#undef FDCT16x16_2D
+
+#define FDCT32x32_2D vp10_highbd_fdct32x32_rd_sse2
+#define FDCT32x32_HIGH_PRECISION 0
+#include "av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h" // NOLINT
+#undef FDCT32x32_2D
+#undef FDCT32x32_HIGH_PRECISION
+
+#define FDCT32x32_2D vp10_highbd_fdct32x32_sse2
+#define FDCT32x32_HIGH_PRECISION 1
+#include "av1/common/x86/vp10_fwd_dct32x32_impl_sse2.h" // NOLINT
+#undef FDCT32x32_2D
+#undef FDCT32x32_HIGH_PRECISION
+#undef DCT_HIGH_BIT_DEPTH
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/x86/vp10_highbd_convolve_filters_sse4.c b/av1/common/x86/vp10_highbd_convolve_filters_sse4.c
new file mode 100644
index 0000000..7f3630c
--- /dev/null
+++ b/av1/common/x86/vp10_highbd_convolve_filters_sse4.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_config.h"
+#include "av1/common/filter.h"
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int16_t,
+ sub_pel_filters_10sharp_highbd_ver_signal_dir[15][6][8]) = {
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0 },
+ { -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 127, -6, 127, -6, 127, -6, 127 },
+ { 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1 },
+ { 0, 0, 0, 0, 0, 0, 0, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -2, 5, -2, 5, -2, 5, -2, 5 },
+ { -12, 124, -12, 124, -12, 124, -12, 124 },
+ { 18, -7, 18, -7, 18, -7, 18, -7 },
+ { 3, -2, 3, -2, 3, -2, 3, -2 },
+ { 0, 0, 0, 0, 0, 0, 0, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -3, 7, -3, 7, -3, 7, -3, 7 },
+ { -17, 119, -17, 119, -17, 119, -17, 119 },
+ { 28, -11, 28, -11, 28, -11, 28, -11 },
+ { 5, -2, 5, -2, 5, -2, 5, -2 },
+ { 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -4, 8, -4, 8, -4, 8, -4, 8 },
+ { -20, 114, -20, 114, -20, 114, -20, 114 },
+ { 38, -14, 38, -14, 38, -14, 38, -14 },
+ { 7, -3, 7, -3, 7, -3, 7, -3 },
+ { 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -4, 9, -4, 9, -4, 9, -4, 9 },
+ { -22, 107, -22, 107, -22, 107, -22, 107 },
+ { 49, -17, 49, -17, 49, -17, 49, -17 },
+ { 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2 },
+ { -5, 10, -5, 10, -5, 10, -5, 10 },
+ { -24, 99, -24, 99, -24, 99, -24, 99 },
+ { 59, -20, 59, -20, 59, -20, 59, -20 },
+ { 9, -4, 9, -4, 9, -4, 9, -4 },
+ { 2, 0, 2, 0, 2, 0, 2, 0 },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2 },
+ { -5, 10, -5, 10, -5, 10, -5, 10 },
+ { -24, 90, -24, 90, -24, 90, -24, 90 },
+ { 70, -22, 70, -22, 70, -22, 70, -22 },
+ { 10, -5, 10, -5, 10, -5, 10, -5 },
+ { 2, 0, 2, 0, 2, 0, 2, 0 },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2 },
+ { -5, 10, -5, 10, -5, 10, -5, 10 },
+ { -23, 80, -23, 80, -23, 80, -23, 80 },
+ { 80, -23, 80, -23, 80, -23, 80, -23 },
+ { 10, -5, 10, -5, 10, -5, 10, -5 },
+ { 2, 0, 2, 0, 2, 0, 2, 0 },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2 },
+ { -5, 10, -5, 10, -5, 10, -5, 10 },
+ { -22, 70, -22, 70, -22, 70, -22, 70 },
+ { 90, -24, 90, -24, 90, -24, 90, -24 },
+ { 10, -5, 10, -5, 10, -5, 10, -5 },
+ { 2, 0, 2, 0, 2, 0, 2, 0 },
+ },
+ {
+ { 0, 2, 0, 2, 0, 2, 0, 2 },
+ { -4, 9, -4, 9, -4, 9, -4, 9 },
+ { -20, 59, -20, 59, -20, 59, -20, 59 },
+ { 99, -24, 99, -24, 99, -24, 99, -24 },
+ { 10, -5, 10, -5, 10, -5, 10, -5 },
+ { 2, 0, 2, 0, 2, 0, 2, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -4, 8, -4, 8, -4, 8, -4, 8 },
+ { -17, 49, -17, 49, -17, 49, -17, 49 },
+ { 107, -22, 107, -22, 107, -22, 107, -22 },
+ { 9, -4, 9, -4, 9, -4, 9, -4 },
+ { 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -3, 7, -3, 7, -3, 7, -3, 7 },
+ { -14, 38, -14, 38, -14, 38, -14, 38 },
+ { 114, -20, 114, -20, 114, -20, 114, -20 },
+ { 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -2, 5, -2, 5, -2, 5, -2, 5 },
+ { -11, 28, -11, 28, -11, 28, -11, 28 },
+ { 119, -17, 119, -17, 119, -17, 119, -17 },
+ { 7, -3, 7, -3, 7, -3, 7, -3 },
+ { 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0 },
+ { -2, 3, -2, 3, -2, 3, -2, 3 },
+ { -7, 18, -7, 18, -7, 18, -7, 18 },
+ { 124, -12, 124, -12, 124, -12, 124, -12 },
+ { 5, -2, 5, -2, 5, -2, 5, -2 },
+ { 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0 },
+ { -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 8, -4, 8, -4, 8, -4, 8 },
+ { 127, -6, 127, -6, 127, -6, 127, -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1 },
+ { 0, 0, 0, 0, 0, 0, 0, 0 },
+ },
+};
+#endif
+#endif
+#if CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_EXT_INTERP
+DECLARE_ALIGNED(16, const int16_t,
+ sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8]) = {
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -2, 3, -2, 3, -2, 3, -2, 3 },
+ { -7, 127, -7, 127, -7, 127, -7, 127 },
+ { 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1 },
+ { 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -3, 6, -3, 6, -3, 6, -3, 6 },
+ { -13, 124, -13, 124, -13, 124, -13, 124 },
+ { 18, -8, 18, -8, 18, -8, 18, -8 },
+ { 4, -2, 4, -2, 4, -2, 4, -2 },
+ { 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -4, 8, -4, 8, -4, 8, -4, 8 },
+ { -18, 120, -18, 120, -18, 120, -18, 120 },
+ { 28, -12, 28, -12, 28, -12, 28, -12 },
+ { 7, -4, 7, -4, 7, -4, 7, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 10, -6, 10, -6, 10, -6, 10 },
+ { -21, 115, -21, 115, -21, 115, -21, 115 },
+ { 38, -15, 38, -15, 38, -15, 38, -15 },
+ { 8, -5, 8, -5, 8, -5, 8, -5 },
+ { 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -6, 12, -6, 12, -6, 12, -6, 12 },
+ { -24, 108, -24, 108, -24, 108, -24, 108 },
+ { 49, -18, 49, -18, 49, -18, 49, -18 },
+ { 10, -6, 10, -6, 10, -6, 10, -6 },
+ { 3, -2, 3, -2, 3, -2, 3, -2 },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -7, 13, -7, 13, -7, 13, -7, 13 },
+ { -25, 100, -25, 100, -25, 100, -25, 100 },
+ { 60, -21, 60, -21, 60, -21, 60, -21 },
+ { 11, -7, 11, -7, 11, -7, 11, -7 },
+ { 4, -2, 4, -2, 4, -2, 4, -2 },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -7, 13, -7, 13, -7, 13, -7, 13 },
+ { -26, 91, -26, 91, -26, 91, -26, 91 },
+ { 71, -24, 71, -24, 71, -24, 71, -24 },
+ { 13, -7, 13, -7, 13, -7, 13, -7 },
+ { 4, -2, 4, -2, 4, -2, 4, -2 },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -7, 13, -7, 13, -7, 13, -7, 13 },
+ { -25, 81, -25, 81, -25, 81, -25, 81 },
+ { 81, -25, 81, -25, 81, -25, 81, -25 },
+ { 13, -7, 13, -7, 13, -7, 13, -7 },
+ { 4, -2, 4, -2, 4, -2, 4, -2 },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -7, 13, -7, 13, -7, 13, -7, 13 },
+ { -24, 71, -24, 71, -24, 71, -24, 71 },
+ { 91, -26, 91, -26, 91, -26, 91, -26 },
+ { 13, -7, 13, -7, 13, -7, 13, -7 },
+ { 4, -2, 4, -2, 4, -2, 4, -2 },
+ },
+ {
+ { -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -7, 11, -7, 11, -7, 11, -7, 11 },
+ { -21, 60, -21, 60, -21, 60, -21, 60 },
+ { 100, -25, 100, -25, 100, -25, 100, -25 },
+ { 13, -7, 13, -7, 13, -7, 13, -7 },
+ { 4, -2, 4, -2, 4, -2, 4, -2 },
+ },
+ {
+ { -2, 3, -2, 3, -2, 3, -2, 3 },
+ { -6, 10, -6, 10, -6, 10, -6, 10 },
+ { -18, 49, -18, 49, -18, 49, -18, 49 },
+ { 108, -24, 108, -24, 108, -24, 108, -24 },
+ { 12, -6, 12, -6, 12, -6, 12, -6 },
+ { 4, -2, 4, -2, 4, -2, 4, -2 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -5, 8, -5, 8, -5, 8, -5, 8 },
+ { -15, 38, -15, 38, -15, 38, -15, 38 },
+ { 115, -21, 115, -21, 115, -21, 115, -21 },
+ { 10, -6, 10, -6, 10, -6, 10, -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 7, -4, 7, -4, 7, -4, 7 },
+ { -12, 28, -12, 28, -12, 28, -12, 28 },
+ { 120, -18, 120, -18, 120, -18, 120, -18 },
+ { 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -8, 18, -8, 18, -8, 18, -8, 18 },
+ { 124, -13, 124, -13, 124, -13, 124, -13 },
+ { 6, -3, 6, -3, 6, -3, 6, -3 },
+ { 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 8, -4, 8, -4, 8, -4, 8 },
+ { 127, -7, 127, -7, 127, -7, 127, -7 },
+ { 3, -2, 3, -2, 3, -2, 3, -2 },
+ { 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+};
+#endif
+#endif
+#if CONFIG_VP9_HIGHBITDEPTH
+#if USE_TEMPORALFILTER_12TAP
+DECLARE_ALIGNED(
+ 16, const int16_t,
+ sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8]) = {
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -7, 127, -7, 127, -7, 127, -7, 127 },
+ { 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1 },
+ { 0, 0, 0, 0, 0, 0, 0, 0 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -3, 5, -3, 5, -3, 5, -3, 5 },
+ { -12, 124, -12, 124, -12, 124, -12, 124 },
+ { 18, -8, 18, -8, 18, -8, 18, -8 },
+ { 4, -2, 4, -2, 4, -2, 4, -2 },
+ { 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 8, -4, 8, -4, 8, -4, 8 },
+ { -17, 120, -17, 120, -17, 120, -17, 120 },
+ { 28, -11, 28, -11, 28, -11, 28, -11 },
+ { 6, -3, 6, -3, 6, -3, 6, -3 },
+ { 1, -1, 1, -1, 1, -1, 1, -1 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 10, -4, 10, -4, 10, -4, 10 },
+ { -21, 114, -21, 114, -21, 114, -21, 114 },
+ { 38, -15, 38, -15, 38, -15, 38, -15 },
+ { 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -5, 11, -5, 11, -5, 11, -5, 11 },
+ { -23, 107, -23, 107, -23, 107, -23, 107 },
+ { 49, -18, 49, -18, 49, -18, 49, -18 },
+ { 9, -5, 9, -5, 9, -5, 9, -5 },
+ { 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 12, -6, 12, -6, 12, -6, 12 },
+ { -25, 99, -25, 99, -25, 99, -25, 99 },
+ { 60, -21, 60, -21, 60, -21, 60, -21 },
+ { 11, -6, 11, -6, 11, -6, 11, -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 12, -6, 12, -6, 12, -6, 12 },
+ { -25, 90, -25, 90, -25, 90, -25, 90 },
+ { 70, -23, 70, -23, 70, -23, 70, -23 },
+ { 12, -6, 12, -6, 12, -6, 12, -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 12, -6, 12, -6, 12, -6, 12 },
+ { -24, 80, -24, 80, -24, 80, -24, 80 },
+ { 80, -24, 80, -24, 80, -24, 80, -24 },
+ { 12, -6, 12, -6, 12, -6, 12, -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 12, -6, 12, -6, 12, -6, 12 },
+ { -23, 70, -23, 70, -23, 70, -23, 70 },
+ { 90, -25, 90, -25, 90, -25, 90, -25 },
+ { 12, -6, 12, -6, 12, -6, 12, -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 3, -1, 3, -1, 3, -1, 3 },
+ { -6, 11, -6, 11, -6, 11, -6, 11 },
+ { -21, 60, -21, 60, -21, 60, -21, 60 },
+ { 99, -25, 99, -25, 99, -25, 99, -25 },
+ { 12, -6, 12, -6, 12, -6, 12, -6 },
+ { 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -5, 9, -5, 9, -5, 9, -5, 9 },
+ { -18, 49, -18, 49, -18, 49, -18, 49 },
+ { 107, -23, 107, -23, 107, -23, 107, -23 },
+ { 11, -5, 11, -5, 11, -5, 11, -5 },
+ { 3, -1, 3, -1, 3, -1, 3, -1 },
+ },
+ {
+ { -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 8, -4, 8, -4, 8, -4, 8 },
+ { -15, 38, -15, 38, -15, 38, -15, 38 },
+ { 114, -21, 114, -21, 114, -21, 114, -21 },
+ { 10, -4, 10, -4, 10, -4, 10, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { -1, 1, -1, 1, -1, 1, -1, 1 },
+ { -3, 6, -3, 6, -3, 6, -3, 6 },
+ { -11, 28, -11, 28, -11, 28, -11, 28 },
+ { 120, -17, 120, -17, 120, -17, 120, -17 },
+ { 8, -4, 8, -4, 8, -4, 8, -4 },
+ { 2, -1, 2, -1, 2, -1, 2, -1 },
+ },
+ {
+ { 0, 1, 0, 1, 0, 1, 0, 1 },
+ { -2, 4, -2, 4, -2, 4, -2, 4 },
+ { -8, 18, -8, 18, -8, 18, -8, 18 },
+ { 124, -12, 124, -12, 124, -12, 124, -12 },
+ { 5, -3, 5, -3, 5, -3, 5, -3 },
+ { 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+ {
+ { 0, 0, 0, 0, 0, 0, 0, 0 },
+ { -1, 2, -1, 2, -1, 2, -1, 2 },
+ { -4, 8, -4, 8, -4, 8, -4, 8 },
+ { 127, -7, 127, -7, 127, -7, 127, -7 },
+ { 3, -1, 3, -1, 3, -1, 3, -1 },
+ { 1, 0, 1, 0, 1, 0, 1, 0 },
+ },
+};
+#endif
+#endif
diff --git a/av1/common/x86/vp10_highbd_convolve_sse4.c b/av1/common/x86/vp10_highbd_convolve_sse4.c
new file mode 100644
index 0000000..ea78400
--- /dev/null
+++ b/av1/common/x86/vp10_highbd_convolve_sse4.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <smmintrin.h>
+
+#include "./vp10_rtcd.h"
+#include "av1/common/filter.h"
+
+typedef void (*TransposeSave)(const int width, int pixelsNum, uint32_t *src,
+ int src_stride, uint16_t *dst, int dst_stride,
+ int bd);
+
+// pixelsNum 0: write all 4 pixels
+// 1/2/3: residual pixels 1/2/3
+static void writePixel(__m128i *u, int width, int pixelsNum, uint16_t *dst,
+ int dst_stride) {
+ if (2 == width) {
+ if (0 == pixelsNum) {
+ *(int *)dst = _mm_cvtsi128_si32(u[0]);
+ *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]);
+ *(int *)(dst + 2 * dst_stride) = _mm_cvtsi128_si32(u[2]);
+ *(int *)(dst + 3 * dst_stride) = _mm_cvtsi128_si32(u[3]);
+ } else if (1 == pixelsNum) {
+ *(int *)dst = _mm_cvtsi128_si32(u[0]);
+ } else if (2 == pixelsNum) {
+ *(int *)dst = _mm_cvtsi128_si32(u[0]);
+ *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]);
+ } else if (3 == pixelsNum) {
+ *(int *)dst = _mm_cvtsi128_si32(u[0]);
+ *(int *)(dst + dst_stride) = _mm_cvtsi128_si32(u[1]);
+ *(int *)(dst + 2 * dst_stride) = _mm_cvtsi128_si32(u[2]);
+ }
+ } else {
+ if (0 == pixelsNum) {
+ _mm_storel_epi64((__m128i *)dst, u[0]);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]);
+ _mm_storel_epi64((__m128i *)(dst + 2 * dst_stride), u[2]);
+ _mm_storel_epi64((__m128i *)(dst + 3 * dst_stride), u[3]);
+ } else if (1 == pixelsNum) {
+ _mm_storel_epi64((__m128i *)dst, u[0]);
+ } else if (2 == pixelsNum) {
+ _mm_storel_epi64((__m128i *)dst, u[0]);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]);
+ } else if (3 == pixelsNum) {
+ _mm_storel_epi64((__m128i *)dst, u[0]);
+ _mm_storel_epi64((__m128i *)(dst + dst_stride), u[1]);
+ _mm_storel_epi64((__m128i *)(dst + 2 * dst_stride), u[2]);
+ }
+ }
+}
+
+// 16-bit pixels clip with bd (10/12)
+static void highbd_clip(__m128i *p, int numVecs, int bd) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i one = _mm_set1_epi16(1);
+ const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one);
+ __m128i clamped, mask;
+ int i;
+
+ for (i = 0; i < numVecs; i++) {
+ mask = _mm_cmpgt_epi16(p[i], max);
+ clamped = _mm_andnot_si128(mask, p[i]);
+ mask = _mm_and_si128(mask, max);
+ clamped = _mm_or_si128(mask, clamped);
+ mask = _mm_cmpgt_epi16(clamped, zero);
+ p[i] = _mm_and_si128(clamped, mask);
+ }
+}
+
+static void transClipPixel(uint32_t *src, int src_stride, __m128i *u, int bd) {
+ __m128i v0, v1;
+ __m128i rnd = _mm_set1_epi32(1 << (FILTER_BITS - 1));
+
+ u[0] = _mm_loadu_si128((__m128i const *)src);
+ u[1] = _mm_loadu_si128((__m128i const *)(src + src_stride));
+ u[2] = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
+ u[3] = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
+
+ u[0] = _mm_add_epi32(u[0], rnd);
+ u[1] = _mm_add_epi32(u[1], rnd);
+ u[2] = _mm_add_epi32(u[2], rnd);
+ u[3] = _mm_add_epi32(u[3], rnd);
+
+ u[0] = _mm_srai_epi32(u[0], FILTER_BITS);
+ u[1] = _mm_srai_epi32(u[1], FILTER_BITS);
+ u[2] = _mm_srai_epi32(u[2], FILTER_BITS);
+ u[3] = _mm_srai_epi32(u[3], FILTER_BITS);
+
+ u[0] = _mm_packus_epi32(u[0], u[1]);
+ u[1] = _mm_packus_epi32(u[2], u[3]);
+
+ highbd_clip(u, 2, bd);
+
+ v0 = _mm_unpacklo_epi16(u[0], u[1]);
+ v1 = _mm_unpackhi_epi16(u[0], u[1]);
+
+ u[0] = _mm_unpacklo_epi16(v0, v1);
+ u[2] = _mm_unpackhi_epi16(v0, v1);
+
+ u[1] = _mm_srli_si128(u[0], 8);
+ u[3] = _mm_srli_si128(u[2], 8);
+}
+
+// pixelsNum = 0 : all 4 rows of pixels will be saved.
+// pixelsNum = 1/2/3 : residual 1/2/4 rows of pixels will be saved.
+void trans_save_4x4(const int width, int pixelsNum, uint32_t *src,
+ int src_stride, uint16_t *dst, int dst_stride, int bd) {
+ __m128i u[4];
+ transClipPixel(src, src_stride, u, bd);
+ writePixel(u, width, pixelsNum, dst, dst_stride);
+}
+
+void trans_accum_save_4x4(const int width, int pixelsNum, uint32_t *src,
+ int src_stride, uint16_t *dst, int dst_stride,
+ int bd) {
+ __m128i u[4], v[4];
+ const __m128i ones = _mm_set1_epi16(1);
+
+ transClipPixel(src, src_stride, u, bd);
+
+ v[0] = _mm_loadl_epi64((__m128i const *)dst);
+ v[1] = _mm_loadl_epi64((__m128i const *)(dst + dst_stride));
+ v[2] = _mm_loadl_epi64((__m128i const *)(dst + 2 * dst_stride));
+ v[3] = _mm_loadl_epi64((__m128i const *)(dst + 3 * dst_stride));
+
+ u[0] = _mm_add_epi16(u[0], v[0]);
+ u[1] = _mm_add_epi16(u[1], v[1]);
+ u[2] = _mm_add_epi16(u[2], v[2]);
+ u[3] = _mm_add_epi16(u[3], v[3]);
+
+ u[0] = _mm_add_epi16(u[0], ones);
+ u[1] = _mm_add_epi16(u[1], ones);
+ u[2] = _mm_add_epi16(u[2], ones);
+ u[3] = _mm_add_epi16(u[3], ones);
+
+ u[0] = _mm_srai_epi16(u[0], 1);
+ u[1] = _mm_srai_epi16(u[1], 1);
+ u[2] = _mm_srai_epi16(u[2], 1);
+ u[3] = _mm_srai_epi16(u[3], 1);
+
+ writePixel(u, width, pixelsNum, dst, dst_stride);
+}
+
+static TransposeSave transSaveTab[2] = { trans_save_4x4, trans_accum_save_4x4 };
+
+static INLINE void transpose_pair(__m128i *in, __m128i *out) {
+ __m128i x0, x1;
+
+ x0 = _mm_unpacklo_epi32(in[0], in[1]);
+ x1 = _mm_unpacklo_epi32(in[2], in[3]);
+
+ out[0] = _mm_unpacklo_epi64(x0, x1);
+ out[1] = _mm_unpackhi_epi64(x0, x1);
+
+ x0 = _mm_unpackhi_epi32(in[0], in[1]);
+ x1 = _mm_unpackhi_epi32(in[2], in[3]);
+
+ out[2] = _mm_unpacklo_epi64(x0, x1);
+ out[3] = _mm_unpackhi_epi64(x0, x1);
+
+ x0 = _mm_unpacklo_epi32(in[4], in[5]);
+ x1 = _mm_unpacklo_epi32(in[6], in[7]);
+
+ out[4] = _mm_unpacklo_epi64(x0, x1);
+ out[5] = _mm_unpackhi_epi64(x0, x1);
+}
+
+static void highbd_filter_horiz(const uint16_t *src, int src_stride, __m128i *f,
+ int tapsNum, uint32_t *buf) {
+ __m128i u[8], v[6];
+
+ if (tapsNum == 10) {
+ src -= 1;
+ }
+
+ u[0] = _mm_loadu_si128((__m128i const *)src);
+ u[1] = _mm_loadu_si128((__m128i const *)(src + src_stride));
+ u[2] = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride));
+ u[3] = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride));
+
+ u[4] = _mm_loadu_si128((__m128i const *)(src + 8));
+ u[5] = _mm_loadu_si128((__m128i const *)(src + src_stride + 8));
+ u[6] = _mm_loadu_si128((__m128i const *)(src + 2 * src_stride + 8));
+ u[7] = _mm_loadu_si128((__m128i const *)(src + 3 * src_stride + 8));
+
+ transpose_pair(u, v);
+
+ u[0] = _mm_madd_epi16(v[0], f[0]);
+ u[1] = _mm_madd_epi16(v[1], f[1]);
+ u[2] = _mm_madd_epi16(v[2], f[2]);
+ u[3] = _mm_madd_epi16(v[3], f[3]);
+ u[4] = _mm_madd_epi16(v[4], f[4]);
+ u[5] = _mm_madd_epi16(v[5], f[5]);
+
+ u[6] = _mm_min_epi32(u[2], u[3]);
+ u[7] = _mm_max_epi32(u[2], u[3]);
+
+ u[0] = _mm_add_epi32(u[0], u[1]);
+ u[0] = _mm_add_epi32(u[0], u[5]);
+ u[0] = _mm_add_epi32(u[0], u[4]);
+ u[0] = _mm_add_epi32(u[0], u[6]);
+ u[0] = _mm_add_epi32(u[0], u[7]);
+
+ _mm_storeu_si128((__m128i *)buf, u[0]);
+}
+
+void vp10_highbd_convolve_horiz_sse4_1(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w,
+ int h,
+ const InterpFilterParams filter_params,
+ const int subpel_x_q4, int x_step_q4,
+ int avg, int bd) {
+ DECLARE_ALIGNED(16, uint32_t, temp[4 * 4]);
+ __m128i verf[6];
+ HbdSubpelFilterCoeffs vCoeffs;
+ const uint16_t *srcPtr;
+ const int tapsNum = filter_params.taps;
+ int i, col, count, blkResidu, blkHeight;
+ TransposeSave transSave = transSaveTab[avg];
+ (void)x_step_q4;
+
+ if (0 == subpel_x_q4 || 16 != x_step_q4) {
+ vp10_highbd_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, avg,
+ bd);
+ return;
+ }
+
+ vCoeffs =
+ vp10_hbd_get_subpel_filter_ver_signal_dir(filter_params, subpel_x_q4 - 1);
+ if (!vCoeffs) {
+ vp10_highbd_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params, subpel_x_q4, x_step_q4, avg,
+ bd);
+ return;
+ }
+
+ verf[0] = *((const __m128i *)(vCoeffs));
+ verf[1] = *((const __m128i *)(vCoeffs + 1));
+ verf[2] = *((const __m128i *)(vCoeffs + 2));
+ verf[3] = *((const __m128i *)(vCoeffs + 3));
+ verf[4] = *((const __m128i *)(vCoeffs + 4));
+ verf[5] = *((const __m128i *)(vCoeffs + 5));
+
+ src -= (tapsNum >> 1) - 1;
+ srcPtr = src;
+
+ count = 0;
+ blkHeight = h >> 2;
+ blkResidu = h & 3;
+
+ while (blkHeight != 0) {
+ for (col = 0; col < w; col += 4) {
+ for (i = 0; i < 4; ++i) {
+ highbd_filter_horiz(srcPtr, src_stride, verf, tapsNum, temp + (i * 4));
+ srcPtr += 1;
+ }
+ transSave(w, 0, temp, 4, dst + col, dst_stride, bd);
+ }
+ count++;
+ srcPtr = src + count * src_stride * 4;
+ dst += dst_stride * 4;
+ blkHeight--;
+ }
+
+ if (blkResidu == 0) return;
+
+ for (col = 0; col < w; col += 4) {
+ for (i = 0; i < 4; ++i) {
+ highbd_filter_horiz(srcPtr, src_stride, verf, tapsNum, temp + (i * 4));
+ srcPtr += 1;
+ }
+ transSave(w, blkResidu, temp, 4, dst + col, dst_stride, bd);
+ }
+}
+
+// Vertical convolutional filter
+
+typedef void (*WritePixels)(__m128i *u, int bd, uint16_t *dst);
+
+static void highbdRndingPacks(__m128i *u) {
+ __m128i rnd = _mm_set1_epi32(1 << (FILTER_BITS - 1));
+ u[0] = _mm_add_epi32(u[0], rnd);
+ u[0] = _mm_srai_epi32(u[0], FILTER_BITS);
+ u[0] = _mm_packus_epi32(u[0], u[0]);
+}
+
+static void write2pixelsOnly(__m128i *u, int bd, uint16_t *dst) {
+ highbdRndingPacks(u);
+ highbd_clip(u, 1, bd);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(u[0]);
+}
+
+static void write2pixelsAccum(__m128i *u, int bd, uint16_t *dst) {
+ __m128i v = _mm_loadl_epi64((__m128i const *)dst);
+ const __m128i ones = _mm_set1_epi16(1);
+
+ highbdRndingPacks(u);
+ highbd_clip(u, 1, bd);
+
+ v = _mm_add_epi16(v, u[0]);
+ v = _mm_add_epi16(v, ones);
+ v = _mm_srai_epi16(v, 1);
+ *(uint32_t *)dst = _mm_cvtsi128_si32(v);
+}
+
+WritePixels write2pixelsTab[2] = { write2pixelsOnly, write2pixelsAccum };
+
+static void write4pixelsOnly(__m128i *u, int bd, uint16_t *dst) {
+ highbdRndingPacks(u);
+ highbd_clip(u, 1, bd);
+ _mm_storel_epi64((__m128i *)dst, u[0]);
+}
+
+static void write4pixelsAccum(__m128i *u, int bd, uint16_t *dst) {
+ __m128i v = _mm_loadl_epi64((__m128i const *)dst);
+ const __m128i ones = _mm_set1_epi16(1);
+
+ highbdRndingPacks(u);
+ highbd_clip(u, 1, bd);
+
+ v = _mm_add_epi16(v, u[0]);
+ v = _mm_add_epi16(v, ones);
+ v = _mm_srai_epi16(v, 1);
+ _mm_storel_epi64((__m128i *)dst, v);
+}
+
+WritePixels write4pixelsTab[2] = { write4pixelsOnly, write4pixelsAccum };
+
+static void filter_vert_horiz_parallel(const uint16_t *src, int src_stride,
+ const __m128i *f, int taps,
+ uint16_t *dst, WritePixels saveFunc,
+ int bd) {
+ __m128i s[12];
+ __m128i zero = _mm_setzero_si128();
+ int i = 0;
+ int r = 0;
+
+ // TODO(luoyi) treat s[12] as a circular buffer in width = 2 case
+ if (10 == taps) {
+ i += 1;
+ s[0] = zero;
+ }
+ while (i < 12) {
+ s[i] = _mm_loadu_si128((__m128i const *)(src + r * src_stride));
+ i += 1;
+ r += 1;
+ }
+
+ s[0] = _mm_unpacklo_epi16(s[0], s[1]);
+ s[2] = _mm_unpacklo_epi16(s[2], s[3]);
+ s[4] = _mm_unpacklo_epi16(s[4], s[5]);
+ s[6] = _mm_unpacklo_epi16(s[6], s[7]);
+ s[8] = _mm_unpacklo_epi16(s[8], s[9]);
+ s[10] = _mm_unpacklo_epi16(s[10], s[11]);
+
+ s[0] = _mm_madd_epi16(s[0], f[0]);
+ s[2] = _mm_madd_epi16(s[2], f[1]);
+ s[4] = _mm_madd_epi16(s[4], f[2]);
+ s[6] = _mm_madd_epi16(s[6], f[3]);
+ s[8] = _mm_madd_epi16(s[8], f[4]);
+ s[10] = _mm_madd_epi16(s[10], f[5]);
+
+ s[1] = _mm_min_epi32(s[4], s[6]);
+ s[3] = _mm_max_epi32(s[4], s[6]);
+
+ s[0] = _mm_add_epi32(s[0], s[2]);
+ s[0] = _mm_add_epi32(s[0], s[10]);
+ s[0] = _mm_add_epi32(s[0], s[8]);
+ s[0] = _mm_add_epi32(s[0], s[1]);
+ s[0] = _mm_add_epi32(s[0], s[3]);
+
+ saveFunc(s, bd, dst);
+}
+
+static void highbd_filter_vert_compute_large(const uint16_t *src,
+ int src_stride, const __m128i *f,
+ int taps, int w, int h,
+ uint16_t *dst, int dst_stride,
+ int avg, int bd) {
+ int col;
+ int rowIndex = 0;
+ const uint16_t *src_ptr = src;
+ uint16_t *dst_ptr = dst;
+ const int step = 4;
+ WritePixels write4pixels = write4pixelsTab[avg];
+
+ do {
+ for (col = 0; col < w; col += step) {
+ filter_vert_horiz_parallel(src_ptr, src_stride, f, taps, dst_ptr,
+ write4pixels, bd);
+ src_ptr += step;
+ dst_ptr += step;
+ }
+ rowIndex++;
+ src_ptr = src + rowIndex * src_stride;
+ dst_ptr = dst + rowIndex * dst_stride;
+ } while (rowIndex < h);
+}
+
+static void highbd_filter_vert_compute_small(const uint16_t *src,
+ int src_stride, const __m128i *f,
+ int taps, int w, int h,
+ uint16_t *dst, int dst_stride,
+ int avg, int bd) {
+ int rowIndex = 0;
+ WritePixels write2pixels = write2pixelsTab[avg];
+ (void)w;
+
+ do {
+ filter_vert_horiz_parallel(src, src_stride, f, taps, dst, write2pixels, bd);
+ rowIndex++;
+ src += src_stride;
+ dst += dst_stride;
+ } while (rowIndex < h);
+}
+
+void vp10_highbd_convolve_vert_sse4_1(const uint16_t *src, int src_stride,
+ uint16_t *dst, int dst_stride, int w,
+ int h,
+ const InterpFilterParams filter_params,
+ const int subpel_y_q4, int y_step_q4,
+ int avg, int bd) {
+ __m128i verf[6];
+ HbdSubpelFilterCoeffs vCoeffs;
+ const int tapsNum = filter_params.taps;
+
+ if (0 == subpel_y_q4 || 16 != y_step_q4) {
+ vp10_highbd_convolve_vert_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params, subpel_y_q4, y_step_q4, avg, bd);
+ return;
+ }
+
+ vCoeffs =
+ vp10_hbd_get_subpel_filter_ver_signal_dir(filter_params, subpel_y_q4 - 1);
+ if (!vCoeffs) {
+ vp10_highbd_convolve_vert_c(src, src_stride, dst, dst_stride, w, h,
+ filter_params, subpel_y_q4, y_step_q4, avg, bd);
+ return;
+ }
+
+ verf[0] = *((const __m128i *)(vCoeffs));
+ verf[1] = *((const __m128i *)(vCoeffs + 1));
+ verf[2] = *((const __m128i *)(vCoeffs + 2));
+ verf[3] = *((const __m128i *)(vCoeffs + 3));
+ verf[4] = *((const __m128i *)(vCoeffs + 4));
+ verf[5] = *((const __m128i *)(vCoeffs + 5));
+
+ src -= src_stride * ((tapsNum >> 1) - 1);
+
+ if (w > 2) {
+ highbd_filter_vert_compute_large(src, src_stride, verf, tapsNum, w, h, dst,
+ dst_stride, avg, bd);
+ } else {
+ highbd_filter_vert_compute_small(src, src_stride, verf, tapsNum, w, h, dst,
+ dst_stride, avg, bd);
+ }
+}
diff --git a/av1/common/x86/vp10_inv_txfm_sse2.c b/av1/common/x86/vp10_inv_txfm_sse2.c
new file mode 100644
index 0000000..b09933e
--- /dev/null
+++ b/av1/common/x86/vp10_inv_txfm_sse2.c
@@ -0,0 +1,4033 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "av1/common/x86/vp10_inv_txfm_sse2.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+
+#define RECON_AND_STORE4X4(dest, in_x) \
+ { \
+ __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \
+ d0 = _mm_unpacklo_epi8(d0, zero); \
+ d0 = _mm_add_epi16(in_x, d0); \
+ d0 = _mm_packus_epi16(d0, d0); \
+ *(int *)(dest) = _mm_cvtsi128_si32(d0); \
+ }
+
+void vp10_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i eight = _mm_set1_epi16(8);
+ const __m128i cst = _mm_setr_epi16(
+ (int16_t)cospi_16_64, (int16_t)cospi_16_64, (int16_t)cospi_16_64,
+ (int16_t)-cospi_16_64, (int16_t)cospi_24_64, (int16_t)-cospi_8_64,
+ (int16_t)cospi_8_64, (int16_t)cospi_24_64);
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ __m128i input0, input1, input2, input3;
+
+ // Rows
+ input0 = _mm_load_si128((const __m128i *)input);
+ input2 = _mm_load_si128((const __m128i *)(input + 8));
+
+ // Construct i3, i1, i3, i1, i2, i0, i2, i0
+ input0 = _mm_shufflelo_epi16(input0, 0xd8);
+ input0 = _mm_shufflehi_epi16(input0, 0xd8);
+ input2 = _mm_shufflelo_epi16(input2, 0xd8);
+ input2 = _mm_shufflehi_epi16(input2, 0xd8);
+
+ input1 = _mm_unpackhi_epi32(input0, input0);
+ input0 = _mm_unpacklo_epi32(input0, input0);
+ input3 = _mm_unpackhi_epi32(input2, input2);
+ input2 = _mm_unpacklo_epi32(input2, input2);
+
+ // Stage 1
+ input0 = _mm_madd_epi16(input0, cst);
+ input1 = _mm_madd_epi16(input1, cst);
+ input2 = _mm_madd_epi16(input2, cst);
+ input3 = _mm_madd_epi16(input3, cst);
+
+ input0 = _mm_add_epi32(input0, rounding);
+ input1 = _mm_add_epi32(input1, rounding);
+ input2 = _mm_add_epi32(input2, rounding);
+ input3 = _mm_add_epi32(input3, rounding);
+
+ input0 = _mm_srai_epi32(input0, DCT_CONST_BITS);
+ input1 = _mm_srai_epi32(input1, DCT_CONST_BITS);
+ input2 = _mm_srai_epi32(input2, DCT_CONST_BITS);
+ input3 = _mm_srai_epi32(input3, DCT_CONST_BITS);
+
+ // Stage 2
+ input0 = _mm_packs_epi32(input0, input1);
+ input1 = _mm_packs_epi32(input2, input3);
+
+ // Transpose
+ input2 = _mm_unpacklo_epi16(input0, input1);
+ input3 = _mm_unpackhi_epi16(input0, input1);
+ input0 = _mm_unpacklo_epi32(input2, input3);
+ input1 = _mm_unpackhi_epi32(input2, input3);
+
+ // Switch column2, column 3, and then, we got:
+ // input2: column1, column 0; input3: column2, column 3.
+ input1 = _mm_shuffle_epi32(input1, 0x4e);
+ input2 = _mm_add_epi16(input0, input1);
+ input3 = _mm_sub_epi16(input0, input1);
+
+ // Columns
+ // Construct i3, i1, i3, i1, i2, i0, i2, i0
+ input0 = _mm_unpacklo_epi32(input2, input2);
+ input1 = _mm_unpackhi_epi32(input2, input2);
+ input2 = _mm_unpackhi_epi32(input3, input3);
+ input3 = _mm_unpacklo_epi32(input3, input3);
+
+ // Stage 1
+ input0 = _mm_madd_epi16(input0, cst);
+ input1 = _mm_madd_epi16(input1, cst);
+ input2 = _mm_madd_epi16(input2, cst);
+ input3 = _mm_madd_epi16(input3, cst);
+
+ input0 = _mm_add_epi32(input0, rounding);
+ input1 = _mm_add_epi32(input1, rounding);
+ input2 = _mm_add_epi32(input2, rounding);
+ input3 = _mm_add_epi32(input3, rounding);
+
+ input0 = _mm_srai_epi32(input0, DCT_CONST_BITS);
+ input1 = _mm_srai_epi32(input1, DCT_CONST_BITS);
+ input2 = _mm_srai_epi32(input2, DCT_CONST_BITS);
+ input3 = _mm_srai_epi32(input3, DCT_CONST_BITS);
+
+ // Stage 2
+ input0 = _mm_packs_epi32(input0, input2);
+ input1 = _mm_packs_epi32(input1, input3);
+
+ // Transpose
+ input2 = _mm_unpacklo_epi16(input0, input1);
+ input3 = _mm_unpackhi_epi16(input0, input1);
+ input0 = _mm_unpacklo_epi32(input2, input3);
+ input1 = _mm_unpackhi_epi32(input2, input3);
+
+ // Switch column2, column 3, and then, we got:
+ // input2: column1, column 0; input3: column2, column 3.
+ input1 = _mm_shuffle_epi32(input1, 0x4e);
+ input2 = _mm_add_epi16(input0, input1);
+ input3 = _mm_sub_epi16(input0, input1);
+
+ // Final round and shift
+ input2 = _mm_add_epi16(input2, eight);
+ input3 = _mm_add_epi16(input3, eight);
+
+ input2 = _mm_srai_epi16(input2, 4);
+ input3 = _mm_srai_epi16(input3, 4);
+
+ // Reconstruction and Store
+ {
+ __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest));
+ __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2));
+ d0 = _mm_unpacklo_epi32(d0,
+ _mm_cvtsi32_si128(*(const int *)(dest + stride)));
+ d2 = _mm_unpacklo_epi32(
+ _mm_cvtsi32_si128(*(const int *)(dest + stride * 3)), d2);
+ d0 = _mm_unpacklo_epi8(d0, zero);
+ d2 = _mm_unpacklo_epi8(d2, zero);
+ d0 = _mm_add_epi16(d0, input2);
+ d2 = _mm_add_epi16(d2, input3);
+ d0 = _mm_packus_epi16(d0, d2);
+ // store input0
+ *(int *)dest = _mm_cvtsi128_si32(d0);
+ // store input1
+ d0 = _mm_srli_si128(d0, 4);
+ *(int *)(dest + stride) = _mm_cvtsi128_si32(d0);
+ // store input2
+ d0 = _mm_srli_si128(d0, 4);
+ *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0);
+ // store input3
+ d0 = _mm_srli_si128(d0, 4);
+ *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0);
+ }
+}
+
+void vp10_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+ __m128i dc_value;
+ const __m128i zero = _mm_setzero_si128();
+ int a;
+
+ a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+ a = (int)dct_const_round_shift(a * cospi_16_64);
+ a = ROUND_POWER_OF_TWO(a, 4);
+
+ dc_value = _mm_set1_epi16(a);
+
+ RECON_AND_STORE4X4(dest + 0 * stride, dc_value);
+ RECON_AND_STORE4X4(dest + 1 * stride, dc_value);
+ RECON_AND_STORE4X4(dest + 2 * stride, dc_value);
+ RECON_AND_STORE4X4(dest + 3 * stride, dc_value);
+}
+
+static INLINE void transpose_4x4(__m128i *res) {
+ const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]);
+ const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]);
+
+ res[0] = _mm_unpacklo_epi16(tr0_0, tr0_1);
+ res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1);
+}
+
+void vp10_idct4_sse2(__m128i *in) {
+ const __m128i k__cospi_p16_p16 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ __m128i u[8], v[8];
+
+ transpose_4x4(in);
+ // stage 1
+ u[0] = _mm_unpacklo_epi16(in[0], in[1]);
+ u[1] = _mm_unpackhi_epi16(in[0], in[1]);
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p16_p16);
+ v[1] = _mm_madd_epi16(u[0], k__cospi_p16_m16);
+ v[2] = _mm_madd_epi16(u[1], k__cospi_p24_m08);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p08_p24);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+
+ v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+
+ u[0] = _mm_packs_epi32(v[0], v[1]);
+ u[1] = _mm_packs_epi32(v[3], v[2]);
+
+ // stage 2
+ in[0] = _mm_add_epi16(u[0], u[1]);
+ in[1] = _mm_sub_epi16(u[0], u[1]);
+ in[1] = _mm_shuffle_epi32(in[1], 0x4E);
+}
+
+void vp10_iadst4_sse2(__m128i *in) {
+ const __m128i k__sinpi_p01_p04 = pair_set_epi16(sinpi_1_9, sinpi_4_9);
+ const __m128i k__sinpi_p03_p02 = pair_set_epi16(sinpi_3_9, sinpi_2_9);
+ const __m128i k__sinpi_p02_m01 = pair_set_epi16(sinpi_2_9, -sinpi_1_9);
+ const __m128i k__sinpi_p03_m04 = pair_set_epi16(sinpi_3_9, -sinpi_4_9);
+ const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9);
+ const __m128i kZero = _mm_set1_epi16(0);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ __m128i u[8], v[8], in7;
+
+ transpose_4x4(in);
+ in7 = _mm_srli_si128(in[1], 8);
+ in7 = _mm_add_epi16(in7, in[0]);
+ in7 = _mm_sub_epi16(in7, in[1]);
+
+ u[0] = _mm_unpacklo_epi16(in[0], in[1]);
+ u[1] = _mm_unpackhi_epi16(in[0], in[1]);
+ u[2] = _mm_unpacklo_epi16(in7, kZero);
+ u[3] = _mm_unpackhi_epi16(in[0], kZero);
+
+ v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p04); // s0 + s3
+ v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p02); // s2 + s5
+ v[2] = _mm_madd_epi16(u[2], k__sinpi_p03_p03); // x2
+ v[3] = _mm_madd_epi16(u[0], k__sinpi_p02_m01); // s1 - s4
+ v[4] = _mm_madd_epi16(u[1], k__sinpi_p03_m04); // s2 - s6
+ v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03); // s2
+
+ u[0] = _mm_add_epi32(v[0], v[1]);
+ u[1] = _mm_add_epi32(v[3], v[4]);
+ u[2] = v[2];
+ u[3] = _mm_add_epi32(u[0], u[1]);
+ u[4] = _mm_slli_epi32(v[5], 2);
+ u[5] = _mm_add_epi32(u[3], v[5]);
+ u[6] = _mm_sub_epi32(u[5], u[4]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+
+ in[0] = _mm_packs_epi32(u[0], u[1]);
+ in[1] = _mm_packs_epi32(u[2], u[3]);
+}
+
+#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
+ out2, out3, out4, out5, out6, out7) \
+ { \
+ const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
+ const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \
+ const __m128i tr0_2 = _mm_unpackhi_epi16(in0, in1); \
+ const __m128i tr0_3 = _mm_unpackhi_epi16(in2, in3); \
+ const __m128i tr0_4 = _mm_unpacklo_epi16(in4, in5); \
+ const __m128i tr0_5 = _mm_unpacklo_epi16(in6, in7); \
+ const __m128i tr0_6 = _mm_unpackhi_epi16(in4, in5); \
+ const __m128i tr0_7 = _mm_unpackhi_epi16(in6, in7); \
+ \
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3); \
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); \
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); \
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); \
+ \
+ out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \
+ out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \
+ out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \
+ out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \
+ out4 = _mm_unpacklo_epi64(tr1_1, tr1_5); \
+ out5 = _mm_unpackhi_epi64(tr1_1, tr1_5); \
+ out6 = _mm_unpacklo_epi64(tr1_3, tr1_7); \
+ out7 = _mm_unpackhi_epi64(tr1_3, tr1_7); \
+ }
+
+#define TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, out0, out1, out2, out3) \
+ { \
+ const __m128i tr0_0 = _mm_unpackhi_epi16(tmp0, tmp1); \
+ const __m128i tr0_1 = _mm_unpacklo_epi16(tmp1, tmp0); \
+ const __m128i tr0_4 = _mm_unpacklo_epi16(tmp2, tmp3); \
+ const __m128i tr0_5 = _mm_unpackhi_epi16(tmp3, tmp2); \
+ \
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \
+ \
+ out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \
+ out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \
+ out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \
+ out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \
+ }
+
+#define TRANSPOSE_8X8_10(in0, in1, in2, in3, out0, out1) \
+ { \
+ const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
+ const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \
+ out0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \
+ out1 = _mm_unpackhi_epi32(tr0_0, tr0_1); \
+ }
+
+// Define Macro for multiplying elements by constants and adding them together.
+#define MULTIPLICATION_AND_ADD(lo_0, hi_0, lo_1, hi_1, cst0, cst1, cst2, cst3, \
+ res0, res1, res2, res3) \
+ { \
+ tmp0 = _mm_madd_epi16(lo_0, cst0); \
+ tmp1 = _mm_madd_epi16(hi_0, cst0); \
+ tmp2 = _mm_madd_epi16(lo_0, cst1); \
+ tmp3 = _mm_madd_epi16(hi_0, cst1); \
+ tmp4 = _mm_madd_epi16(lo_1, cst2); \
+ tmp5 = _mm_madd_epi16(hi_1, cst2); \
+ tmp6 = _mm_madd_epi16(lo_1, cst3); \
+ tmp7 = _mm_madd_epi16(hi_1, cst3); \
+ \
+ tmp0 = _mm_add_epi32(tmp0, rounding); \
+ tmp1 = _mm_add_epi32(tmp1, rounding); \
+ tmp2 = _mm_add_epi32(tmp2, rounding); \
+ tmp3 = _mm_add_epi32(tmp3, rounding); \
+ tmp4 = _mm_add_epi32(tmp4, rounding); \
+ tmp5 = _mm_add_epi32(tmp5, rounding); \
+ tmp6 = _mm_add_epi32(tmp6, rounding); \
+ tmp7 = _mm_add_epi32(tmp7, rounding); \
+ \
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \
+ tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS); \
+ tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS); \
+ tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS); \
+ tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS); \
+ \
+ res0 = _mm_packs_epi32(tmp0, tmp1); \
+ res1 = _mm_packs_epi32(tmp2, tmp3); \
+ res2 = _mm_packs_epi32(tmp4, tmp5); \
+ res3 = _mm_packs_epi32(tmp6, tmp7); \
+ }
+
+#define MULTIPLICATION_AND_ADD_2(lo_0, hi_0, cst0, cst1, res0, res1) \
+ { \
+ tmp0 = _mm_madd_epi16(lo_0, cst0); \
+ tmp1 = _mm_madd_epi16(hi_0, cst0); \
+ tmp2 = _mm_madd_epi16(lo_0, cst1); \
+ tmp3 = _mm_madd_epi16(hi_0, cst1); \
+ \
+ tmp0 = _mm_add_epi32(tmp0, rounding); \
+ tmp1 = _mm_add_epi32(tmp1, rounding); \
+ tmp2 = _mm_add_epi32(tmp2, rounding); \
+ tmp3 = _mm_add_epi32(tmp3, rounding); \
+ \
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \
+ \
+ res0 = _mm_packs_epi32(tmp0, tmp1); \
+ res1 = _mm_packs_epi32(tmp2, tmp3); \
+ }
+
+#define IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, \
+ out4, out5, out6, out7) \
+ { \
+ /* Stage1 */ \
+ { \
+ const __m128i lo_17 = _mm_unpacklo_epi16(in1, in7); \
+ const __m128i hi_17 = _mm_unpackhi_epi16(in1, in7); \
+ const __m128i lo_35 = _mm_unpacklo_epi16(in3, in5); \
+ const __m128i hi_35 = _mm_unpackhi_epi16(in3, in5); \
+ \
+ MULTIPLICATION_AND_ADD(lo_17, hi_17, lo_35, hi_35, stg1_0, stg1_1, \
+ stg1_2, stg1_3, stp1_4, stp1_7, stp1_5, stp1_6) \
+ } \
+ \
+ /* Stage2 */ \
+ { \
+ const __m128i lo_04 = _mm_unpacklo_epi16(in0, in4); \
+ const __m128i hi_04 = _mm_unpackhi_epi16(in0, in4); \
+ const __m128i lo_26 = _mm_unpacklo_epi16(in2, in6); \
+ const __m128i hi_26 = _mm_unpackhi_epi16(in2, in6); \
+ \
+ MULTIPLICATION_AND_ADD(lo_04, hi_04, lo_26, hi_26, stg2_0, stg2_1, \
+ stg2_2, stg2_3, stp2_0, stp2_1, stp2_2, stp2_3) \
+ \
+ stp2_4 = _mm_adds_epi16(stp1_4, stp1_5); \
+ stp2_5 = _mm_subs_epi16(stp1_4, stp1_5); \
+ stp2_6 = _mm_subs_epi16(stp1_7, stp1_6); \
+ stp2_7 = _mm_adds_epi16(stp1_7, stp1_6); \
+ } \
+ \
+ /* Stage3 */ \
+ { \
+ const __m128i lo_56 = _mm_unpacklo_epi16(stp2_6, stp2_5); \
+ const __m128i hi_56 = _mm_unpackhi_epi16(stp2_6, stp2_5); \
+ \
+ stp1_0 = _mm_adds_epi16(stp2_0, stp2_3); \
+ stp1_1 = _mm_adds_epi16(stp2_1, stp2_2); \
+ stp1_2 = _mm_subs_epi16(stp2_1, stp2_2); \
+ stp1_3 = _mm_subs_epi16(stp2_0, stp2_3); \
+ \
+ tmp0 = _mm_madd_epi16(lo_56, stg2_1); \
+ tmp1 = _mm_madd_epi16(hi_56, stg2_1); \
+ tmp2 = _mm_madd_epi16(lo_56, stg2_0); \
+ tmp3 = _mm_madd_epi16(hi_56, stg2_0); \
+ \
+ tmp0 = _mm_add_epi32(tmp0, rounding); \
+ tmp1 = _mm_add_epi32(tmp1, rounding); \
+ tmp2 = _mm_add_epi32(tmp2, rounding); \
+ tmp3 = _mm_add_epi32(tmp3, rounding); \
+ \
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \
+ \
+ stp1_5 = _mm_packs_epi32(tmp0, tmp1); \
+ stp1_6 = _mm_packs_epi32(tmp2, tmp3); \
+ } \
+ \
+ /* Stage4 */ \
+ out0 = _mm_adds_epi16(stp1_0, stp2_7); \
+ out1 = _mm_adds_epi16(stp1_1, stp1_6); \
+ out2 = _mm_adds_epi16(stp1_2, stp1_5); \
+ out3 = _mm_adds_epi16(stp1_3, stp2_4); \
+ out4 = _mm_subs_epi16(stp1_3, stp2_4); \
+ out5 = _mm_subs_epi16(stp1_2, stp1_5); \
+ out6 = _mm_subs_epi16(stp1_1, stp1_6); \
+ out7 = _mm_subs_epi16(stp1_0, stp2_7); \
+ }
+
+void vp10_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i final_rounding = _mm_set1_epi16(1 << 4);
+ const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+
+ __m128i in0, in1, in2, in3, in4, in5, in6, in7;
+ __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int i;
+
+ // Load input data.
+ in0 = _mm_load_si128((const __m128i *)input);
+ in1 = _mm_load_si128((const __m128i *)(input + 8 * 1));
+ in2 = _mm_load_si128((const __m128i *)(input + 8 * 2));
+ in3 = _mm_load_si128((const __m128i *)(input + 8 * 3));
+ in4 = _mm_load_si128((const __m128i *)(input + 8 * 4));
+ in5 = _mm_load_si128((const __m128i *)(input + 8 * 5));
+ in6 = _mm_load_si128((const __m128i *)(input + 8 * 6));
+ in7 = _mm_load_si128((const __m128i *)(input + 8 * 7));
+
+ // 2-D
+ for (i = 0; i < 2; i++) {
+ // 8x8 Transpose is copied from vp10_fdct8x8_sse2()
+ TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+
+ // 4-stage 1D vp10_idct8x8
+ IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5,
+ in6, in7);
+ }
+
+ // Final rounding and shift
+ in0 = _mm_adds_epi16(in0, final_rounding);
+ in1 = _mm_adds_epi16(in1, final_rounding);
+ in2 = _mm_adds_epi16(in2, final_rounding);
+ in3 = _mm_adds_epi16(in3, final_rounding);
+ in4 = _mm_adds_epi16(in4, final_rounding);
+ in5 = _mm_adds_epi16(in5, final_rounding);
+ in6 = _mm_adds_epi16(in6, final_rounding);
+ in7 = _mm_adds_epi16(in7, final_rounding);
+
+ in0 = _mm_srai_epi16(in0, 5);
+ in1 = _mm_srai_epi16(in1, 5);
+ in2 = _mm_srai_epi16(in2, 5);
+ in3 = _mm_srai_epi16(in3, 5);
+ in4 = _mm_srai_epi16(in4, 5);
+ in5 = _mm_srai_epi16(in5, 5);
+ in6 = _mm_srai_epi16(in6, 5);
+ in7 = _mm_srai_epi16(in7, 5);
+
+ RECON_AND_STORE(dest + 0 * stride, in0);
+ RECON_AND_STORE(dest + 1 * stride, in1);
+ RECON_AND_STORE(dest + 2 * stride, in2);
+ RECON_AND_STORE(dest + 3 * stride, in3);
+ RECON_AND_STORE(dest + 4 * stride, in4);
+ RECON_AND_STORE(dest + 5 * stride, in5);
+ RECON_AND_STORE(dest + 6 * stride, in6);
+ RECON_AND_STORE(dest + 7 * stride, in7);
+}
+
+void vp10_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+ __m128i dc_value;
+ const __m128i zero = _mm_setzero_si128();
+ int a;
+
+ a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+ a = (int)dct_const_round_shift(a * cospi_16_64);
+ a = ROUND_POWER_OF_TWO(a, 5);
+
+ dc_value = _mm_set1_epi16(a);
+
+ RECON_AND_STORE(dest + 0 * stride, dc_value);
+ RECON_AND_STORE(dest + 1 * stride, dc_value);
+ RECON_AND_STORE(dest + 2 * stride, dc_value);
+ RECON_AND_STORE(dest + 3 * stride, dc_value);
+ RECON_AND_STORE(dest + 4 * stride, dc_value);
+ RECON_AND_STORE(dest + 5 * stride, dc_value);
+ RECON_AND_STORE(dest + 6 * stride, dc_value);
+ RECON_AND_STORE(dest + 7 * stride, dc_value);
+}
+
+void vp10_idct8_sse2(__m128i *in) {
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+
+ __m128i in0, in1, in2, in3, in4, in5, in6, in7;
+ __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+
+ // 8x8 Transpose is copied from vp10_fdct8x8_sse2()
+ TRANSPOSE_8X8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], in0,
+ in1, in2, in3, in4, in5, in6, in7);
+
+ // 4-stage 1D vp10_idct8x8
+ IDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in[0], in[1], in[2], in[3],
+ in[4], in[5], in[6], in[7]);
+}
+
+void vp10_iadst8_sse2(__m128i *in) {
+ const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
+ const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+ const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
+ const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+ const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64);
+ const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+ const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64);
+ const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+ const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+ const __m128i k__const_0 = _mm_set1_epi16(0);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+
+ __m128i u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15;
+ __m128i v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15;
+ __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
+ __m128i s0, s1, s2, s3, s4, s5, s6, s7;
+ __m128i in0, in1, in2, in3, in4, in5, in6, in7;
+
+ // transpose
+ array_transpose_8x8(in, in);
+
+ // properly aligned for butterfly input
+ in0 = in[7];
+ in1 = in[0];
+ in2 = in[5];
+ in3 = in[2];
+ in4 = in[3];
+ in5 = in[4];
+ in6 = in[1];
+ in7 = in[6];
+
+ // column transformation
+ // stage 1
+ // interleave and multiply/add into 32-bit integer
+ s0 = _mm_unpacklo_epi16(in0, in1);
+ s1 = _mm_unpackhi_epi16(in0, in1);
+ s2 = _mm_unpacklo_epi16(in2, in3);
+ s3 = _mm_unpackhi_epi16(in2, in3);
+ s4 = _mm_unpacklo_epi16(in4, in5);
+ s5 = _mm_unpackhi_epi16(in4, in5);
+ s6 = _mm_unpacklo_epi16(in6, in7);
+ s7 = _mm_unpackhi_epi16(in6, in7);
+
+ u0 = _mm_madd_epi16(s0, k__cospi_p02_p30);
+ u1 = _mm_madd_epi16(s1, k__cospi_p02_p30);
+ u2 = _mm_madd_epi16(s0, k__cospi_p30_m02);
+ u3 = _mm_madd_epi16(s1, k__cospi_p30_m02);
+ u4 = _mm_madd_epi16(s2, k__cospi_p10_p22);
+ u5 = _mm_madd_epi16(s3, k__cospi_p10_p22);
+ u6 = _mm_madd_epi16(s2, k__cospi_p22_m10);
+ u7 = _mm_madd_epi16(s3, k__cospi_p22_m10);
+ u8 = _mm_madd_epi16(s4, k__cospi_p18_p14);
+ u9 = _mm_madd_epi16(s5, k__cospi_p18_p14);
+ u10 = _mm_madd_epi16(s4, k__cospi_p14_m18);
+ u11 = _mm_madd_epi16(s5, k__cospi_p14_m18);
+ u12 = _mm_madd_epi16(s6, k__cospi_p26_p06);
+ u13 = _mm_madd_epi16(s7, k__cospi_p26_p06);
+ u14 = _mm_madd_epi16(s6, k__cospi_p06_m26);
+ u15 = _mm_madd_epi16(s7, k__cospi_p06_m26);
+
+ // addition
+ w0 = _mm_add_epi32(u0, u8);
+ w1 = _mm_add_epi32(u1, u9);
+ w2 = _mm_add_epi32(u2, u10);
+ w3 = _mm_add_epi32(u3, u11);
+ w4 = _mm_add_epi32(u4, u12);
+ w5 = _mm_add_epi32(u5, u13);
+ w6 = _mm_add_epi32(u6, u14);
+ w7 = _mm_add_epi32(u7, u15);
+ w8 = _mm_sub_epi32(u0, u8);
+ w9 = _mm_sub_epi32(u1, u9);
+ w10 = _mm_sub_epi32(u2, u10);
+ w11 = _mm_sub_epi32(u3, u11);
+ w12 = _mm_sub_epi32(u4, u12);
+ w13 = _mm_sub_epi32(u5, u13);
+ w14 = _mm_sub_epi32(u6, u14);
+ w15 = _mm_sub_epi32(u7, u15);
+
+ // shift and rounding
+ v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING);
+ v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING);
+ v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING);
+ v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING);
+ v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING);
+ v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING);
+ v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING);
+ v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING);
+ v8 = _mm_add_epi32(w8, k__DCT_CONST_ROUNDING);
+ v9 = _mm_add_epi32(w9, k__DCT_CONST_ROUNDING);
+ v10 = _mm_add_epi32(w10, k__DCT_CONST_ROUNDING);
+ v11 = _mm_add_epi32(w11, k__DCT_CONST_ROUNDING);
+ v12 = _mm_add_epi32(w12, k__DCT_CONST_ROUNDING);
+ v13 = _mm_add_epi32(w13, k__DCT_CONST_ROUNDING);
+ v14 = _mm_add_epi32(w14, k__DCT_CONST_ROUNDING);
+ v15 = _mm_add_epi32(w15, k__DCT_CONST_ROUNDING);
+
+ u0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ u1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ u2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ u3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ u4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ u5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ u6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ u7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ u8 = _mm_srai_epi32(v8, DCT_CONST_BITS);
+ u9 = _mm_srai_epi32(v9, DCT_CONST_BITS);
+ u10 = _mm_srai_epi32(v10, DCT_CONST_BITS);
+ u11 = _mm_srai_epi32(v11, DCT_CONST_BITS);
+ u12 = _mm_srai_epi32(v12, DCT_CONST_BITS);
+ u13 = _mm_srai_epi32(v13, DCT_CONST_BITS);
+ u14 = _mm_srai_epi32(v14, DCT_CONST_BITS);
+ u15 = _mm_srai_epi32(v15, DCT_CONST_BITS);
+
+ // back to 16-bit and pack 8 integers into __m128i
+ in[0] = _mm_packs_epi32(u0, u1);
+ in[1] = _mm_packs_epi32(u2, u3);
+ in[2] = _mm_packs_epi32(u4, u5);
+ in[3] = _mm_packs_epi32(u6, u7);
+ in[4] = _mm_packs_epi32(u8, u9);
+ in[5] = _mm_packs_epi32(u10, u11);
+ in[6] = _mm_packs_epi32(u12, u13);
+ in[7] = _mm_packs_epi32(u14, u15);
+
+ // stage 2
+ s0 = _mm_add_epi16(in[0], in[2]);
+ s1 = _mm_add_epi16(in[1], in[3]);
+ s2 = _mm_sub_epi16(in[0], in[2]);
+ s3 = _mm_sub_epi16(in[1], in[3]);
+ u0 = _mm_unpacklo_epi16(in[4], in[5]);
+ u1 = _mm_unpackhi_epi16(in[4], in[5]);
+ u2 = _mm_unpacklo_epi16(in[6], in[7]);
+ u3 = _mm_unpackhi_epi16(in[6], in[7]);
+
+ v0 = _mm_madd_epi16(u0, k__cospi_p08_p24);
+ v1 = _mm_madd_epi16(u1, k__cospi_p08_p24);
+ v2 = _mm_madd_epi16(u0, k__cospi_p24_m08);
+ v3 = _mm_madd_epi16(u1, k__cospi_p24_m08);
+ v4 = _mm_madd_epi16(u2, k__cospi_m24_p08);
+ v5 = _mm_madd_epi16(u3, k__cospi_m24_p08);
+ v6 = _mm_madd_epi16(u2, k__cospi_p08_p24);
+ v7 = _mm_madd_epi16(u3, k__cospi_p08_p24);
+
+ w0 = _mm_add_epi32(v0, v4);
+ w1 = _mm_add_epi32(v1, v5);
+ w2 = _mm_add_epi32(v2, v6);
+ w3 = _mm_add_epi32(v3, v7);
+ w4 = _mm_sub_epi32(v0, v4);
+ w5 = _mm_sub_epi32(v1, v5);
+ w6 = _mm_sub_epi32(v2, v6);
+ w7 = _mm_sub_epi32(v3, v7);
+
+ v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING);
+ v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING);
+ v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING);
+ v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING);
+ v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING);
+ v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING);
+ v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING);
+ v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING);
+
+ u0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ u1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ u2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ u3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ u4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ u5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ u6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ u7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+
+ // back to 16-bit intergers
+ s4 = _mm_packs_epi32(u0, u1);
+ s5 = _mm_packs_epi32(u2, u3);
+ s6 = _mm_packs_epi32(u4, u5);
+ s7 = _mm_packs_epi32(u6, u7);
+
+ // stage 3
+ u0 = _mm_unpacklo_epi16(s2, s3);
+ u1 = _mm_unpackhi_epi16(s2, s3);
+ u2 = _mm_unpacklo_epi16(s6, s7);
+ u3 = _mm_unpackhi_epi16(s6, s7);
+
+ v0 = _mm_madd_epi16(u0, k__cospi_p16_p16);
+ v1 = _mm_madd_epi16(u1, k__cospi_p16_p16);
+ v2 = _mm_madd_epi16(u0, k__cospi_p16_m16);
+ v3 = _mm_madd_epi16(u1, k__cospi_p16_m16);
+ v4 = _mm_madd_epi16(u2, k__cospi_p16_p16);
+ v5 = _mm_madd_epi16(u3, k__cospi_p16_p16);
+ v6 = _mm_madd_epi16(u2, k__cospi_p16_m16);
+ v7 = _mm_madd_epi16(u3, k__cospi_p16_m16);
+
+ u0 = _mm_add_epi32(v0, k__DCT_CONST_ROUNDING);
+ u1 = _mm_add_epi32(v1, k__DCT_CONST_ROUNDING);
+ u2 = _mm_add_epi32(v2, k__DCT_CONST_ROUNDING);
+ u3 = _mm_add_epi32(v3, k__DCT_CONST_ROUNDING);
+ u4 = _mm_add_epi32(v4, k__DCT_CONST_ROUNDING);
+ u5 = _mm_add_epi32(v5, k__DCT_CONST_ROUNDING);
+ u6 = _mm_add_epi32(v6, k__DCT_CONST_ROUNDING);
+ u7 = _mm_add_epi32(v7, k__DCT_CONST_ROUNDING);
+
+ v0 = _mm_srai_epi32(u0, DCT_CONST_BITS);
+ v1 = _mm_srai_epi32(u1, DCT_CONST_BITS);
+ v2 = _mm_srai_epi32(u2, DCT_CONST_BITS);
+ v3 = _mm_srai_epi32(u3, DCT_CONST_BITS);
+ v4 = _mm_srai_epi32(u4, DCT_CONST_BITS);
+ v5 = _mm_srai_epi32(u5, DCT_CONST_BITS);
+ v6 = _mm_srai_epi32(u6, DCT_CONST_BITS);
+ v7 = _mm_srai_epi32(u7, DCT_CONST_BITS);
+
+ s2 = _mm_packs_epi32(v0, v1);
+ s3 = _mm_packs_epi32(v2, v3);
+ s6 = _mm_packs_epi32(v4, v5);
+ s7 = _mm_packs_epi32(v6, v7);
+
+ in[0] = s0;
+ in[1] = _mm_sub_epi16(k__const_0, s4);
+ in[2] = s6;
+ in[3] = _mm_sub_epi16(k__const_0, s2);
+ in[4] = s3;
+ in[5] = _mm_sub_epi16(k__const_0, s7);
+ in[6] = s5;
+ in[7] = _mm_sub_epi16(k__const_0, s1);
+}
+
+void vp10_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i final_rounding = _mm_set1_epi16(1 << 4);
+ const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i stg3_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+
+ __m128i in0, in1, in2, in3, in4, in5, in6, in7;
+ __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+
+ // Rows. Load 4-row input data.
+ in0 = _mm_load_si128((const __m128i *)input);
+ in1 = _mm_load_si128((const __m128i *)(input + 8 * 1));
+ in2 = _mm_load_si128((const __m128i *)(input + 8 * 2));
+ in3 = _mm_load_si128((const __m128i *)(input + 8 * 3));
+
+ // 8x4 Transpose
+ TRANSPOSE_8X8_10(in0, in1, in2, in3, in0, in1);
+ // Stage1
+ {
+ const __m128i lo_17 = _mm_unpackhi_epi16(in0, zero);
+ const __m128i lo_35 = _mm_unpackhi_epi16(in1, zero);
+
+ tmp0 = _mm_madd_epi16(lo_17, stg1_0);
+ tmp2 = _mm_madd_epi16(lo_17, stg1_1);
+ tmp4 = _mm_madd_epi16(lo_35, stg1_2);
+ tmp6 = _mm_madd_epi16(lo_35, stg1_3);
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp4 = _mm_add_epi32(tmp4, rounding);
+ tmp6 = _mm_add_epi32(tmp6, rounding);
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+ tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);
+ tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS);
+
+ stp1_4 = _mm_packs_epi32(tmp0, tmp2);
+ stp1_5 = _mm_packs_epi32(tmp4, tmp6);
+ }
+
+ // Stage2
+ {
+ const __m128i lo_04 = _mm_unpacklo_epi16(in0, zero);
+ const __m128i lo_26 = _mm_unpacklo_epi16(in1, zero);
+
+ tmp0 = _mm_madd_epi16(lo_04, stg2_0);
+ tmp2 = _mm_madd_epi16(lo_04, stg2_1);
+ tmp4 = _mm_madd_epi16(lo_26, stg2_2);
+ tmp6 = _mm_madd_epi16(lo_26, stg2_3);
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp4 = _mm_add_epi32(tmp4, rounding);
+ tmp6 = _mm_add_epi32(tmp6, rounding);
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+ tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);
+ tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS);
+
+ stp2_0 = _mm_packs_epi32(tmp0, tmp2);
+ stp2_2 = _mm_packs_epi32(tmp6, tmp4);
+
+ tmp0 = _mm_adds_epi16(stp1_4, stp1_5);
+ tmp1 = _mm_subs_epi16(stp1_4, stp1_5);
+
+ stp2_4 = tmp0;
+ stp2_5 = _mm_unpacklo_epi64(tmp1, zero);
+ stp2_6 = _mm_unpackhi_epi64(tmp1, zero);
+ }
+
+ // Stage3
+ {
+ const __m128i lo_56 = _mm_unpacklo_epi16(stp2_5, stp2_6);
+
+ tmp4 = _mm_adds_epi16(stp2_0, stp2_2);
+ tmp6 = _mm_subs_epi16(stp2_0, stp2_2);
+
+ stp1_2 = _mm_unpackhi_epi64(tmp6, tmp4);
+ stp1_3 = _mm_unpacklo_epi64(tmp6, tmp4);
+
+ tmp0 = _mm_madd_epi16(lo_56, stg3_0);
+ tmp2 = _mm_madd_epi16(lo_56, stg2_0); // stg3_1 = stg2_0
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+
+ stp1_5 = _mm_packs_epi32(tmp0, tmp2);
+ }
+
+ // Stage4
+ tmp0 = _mm_adds_epi16(stp1_3, stp2_4);
+ tmp1 = _mm_adds_epi16(stp1_2, stp1_5);
+ tmp2 = _mm_subs_epi16(stp1_3, stp2_4);
+ tmp3 = _mm_subs_epi16(stp1_2, stp1_5);
+
+ TRANSPOSE_4X8_10(tmp0, tmp1, tmp2, tmp3, in0, in1, in2, in3)
+
+ IDCT8(in0, in1, in2, in3, zero, zero, zero, zero, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ // Final rounding and shift
+ in0 = _mm_adds_epi16(in0, final_rounding);
+ in1 = _mm_adds_epi16(in1, final_rounding);
+ in2 = _mm_adds_epi16(in2, final_rounding);
+ in3 = _mm_adds_epi16(in3, final_rounding);
+ in4 = _mm_adds_epi16(in4, final_rounding);
+ in5 = _mm_adds_epi16(in5, final_rounding);
+ in6 = _mm_adds_epi16(in6, final_rounding);
+ in7 = _mm_adds_epi16(in7, final_rounding);
+
+ in0 = _mm_srai_epi16(in0, 5);
+ in1 = _mm_srai_epi16(in1, 5);
+ in2 = _mm_srai_epi16(in2, 5);
+ in3 = _mm_srai_epi16(in3, 5);
+ in4 = _mm_srai_epi16(in4, 5);
+ in5 = _mm_srai_epi16(in5, 5);
+ in6 = _mm_srai_epi16(in6, 5);
+ in7 = _mm_srai_epi16(in7, 5);
+
+ RECON_AND_STORE(dest + 0 * stride, in0);
+ RECON_AND_STORE(dest + 1 * stride, in1);
+ RECON_AND_STORE(dest + 2 * stride, in2);
+ RECON_AND_STORE(dest + 3 * stride, in3);
+ RECON_AND_STORE(dest + 4 * stride, in4);
+ RECON_AND_STORE(dest + 5 * stride, in5);
+ RECON_AND_STORE(dest + 6 * stride, in6);
+ RECON_AND_STORE(dest + 7 * stride, in7);
+}
+
+#define IDCT16 \
+ /* Stage2 */ \
+ { \
+ const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], in[15]); \
+ const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], in[15]); \
+ const __m128i lo_9_7 = _mm_unpacklo_epi16(in[9], in[7]); \
+ const __m128i hi_9_7 = _mm_unpackhi_epi16(in[9], in[7]); \
+ const __m128i lo_5_11 = _mm_unpacklo_epi16(in[5], in[11]); \
+ const __m128i hi_5_11 = _mm_unpackhi_epi16(in[5], in[11]); \
+ const __m128i lo_13_3 = _mm_unpacklo_epi16(in[13], in[3]); \
+ const __m128i hi_13_3 = _mm_unpackhi_epi16(in[13], in[3]); \
+ \
+ MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_9_7, hi_9_7, stg2_0, stg2_1, \
+ stg2_2, stg2_3, stp2_8, stp2_15, stp2_9, stp2_14) \
+ \
+ MULTIPLICATION_AND_ADD(lo_5_11, hi_5_11, lo_13_3, hi_13_3, stg2_4, stg2_5, \
+ stg2_6, stg2_7, stp2_10, stp2_13, stp2_11, stp2_12) \
+ } \
+ \
+ /* Stage3 */ \
+ { \
+ const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], in[14]); \
+ const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], in[14]); \
+ const __m128i lo_10_6 = _mm_unpacklo_epi16(in[10], in[6]); \
+ const __m128i hi_10_6 = _mm_unpackhi_epi16(in[10], in[6]); \
+ \
+ MULTIPLICATION_AND_ADD(lo_2_14, hi_2_14, lo_10_6, hi_10_6, stg3_0, stg3_1, \
+ stg3_2, stg3_3, stp1_4, stp1_7, stp1_5, stp1_6) \
+ \
+ stp1_8_0 = _mm_add_epi16(stp2_8, stp2_9); \
+ stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); \
+ stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); \
+ stp1_11 = _mm_add_epi16(stp2_11, stp2_10); \
+ \
+ stp1_12_0 = _mm_add_epi16(stp2_12, stp2_13); \
+ stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); \
+ stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); \
+ stp1_15 = _mm_add_epi16(stp2_15, stp2_14); \
+ } \
+ \
+ /* Stage4 */ \
+ { \
+ const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], in[8]); \
+ const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], in[8]); \
+ const __m128i lo_4_12 = _mm_unpacklo_epi16(in[4], in[12]); \
+ const __m128i hi_4_12 = _mm_unpackhi_epi16(in[4], in[12]); \
+ \
+ const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \
+ const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \
+ const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \
+ \
+ MULTIPLICATION_AND_ADD(lo_0_8, hi_0_8, lo_4_12, hi_4_12, stg4_0, stg4_1, \
+ stg4_2, stg4_3, stp2_0, stp2_1, stp2_2, stp2_3) \
+ \
+ stp2_4 = _mm_add_epi16(stp1_4, stp1_5); \
+ stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); \
+ stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); \
+ stp2_7 = _mm_add_epi16(stp1_7, stp1_6); \
+ \
+ MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \
+ stg4_5, stg4_6, stg4_7, stp2_9, stp2_14, stp2_10, \
+ stp2_13) \
+ } \
+ \
+ /* Stage5 */ \
+ { \
+ const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \
+ const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \
+ \
+ stp1_0 = _mm_add_epi16(stp2_0, stp2_3); \
+ stp1_1 = _mm_add_epi16(stp2_1, stp2_2); \
+ stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); \
+ stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); \
+ \
+ tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \
+ tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \
+ tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \
+ tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \
+ \
+ tmp0 = _mm_add_epi32(tmp0, rounding); \
+ tmp1 = _mm_add_epi32(tmp1, rounding); \
+ tmp2 = _mm_add_epi32(tmp2, rounding); \
+ tmp3 = _mm_add_epi32(tmp3, rounding); \
+ \
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \
+ \
+ stp1_5 = _mm_packs_epi32(tmp0, tmp1); \
+ stp1_6 = _mm_packs_epi32(tmp2, tmp3); \
+ \
+ stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \
+ stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \
+ stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \
+ stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \
+ \
+ stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \
+ stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \
+ stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \
+ stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \
+ } \
+ \
+ /* Stage6 */ \
+ { \
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \
+ const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \
+ const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \
+ const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \
+ \
+ stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \
+ stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \
+ stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \
+ stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \
+ stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \
+ stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \
+ stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \
+ stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \
+ \
+ MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, stg6_0, \
+ stg4_0, stg6_0, stg4_0, stp2_10, stp2_13, stp2_11, \
+ stp2_12) \
+ }
+
+#define IDCT16_10 \
+ /* Stage2 */ \
+ { \
+ const __m128i lo_1_15 = _mm_unpacklo_epi16(in[1], zero); \
+ const __m128i hi_1_15 = _mm_unpackhi_epi16(in[1], zero); \
+ const __m128i lo_13_3 = _mm_unpacklo_epi16(zero, in[3]); \
+ const __m128i hi_13_3 = _mm_unpackhi_epi16(zero, in[3]); \
+ \
+ MULTIPLICATION_AND_ADD(lo_1_15, hi_1_15, lo_13_3, hi_13_3, stg2_0, stg2_1, \
+ stg2_6, stg2_7, stp1_8_0, stp1_15, stp1_11, \
+ stp1_12_0) \
+ } \
+ \
+ /* Stage3 */ \
+ { \
+ const __m128i lo_2_14 = _mm_unpacklo_epi16(in[2], zero); \
+ const __m128i hi_2_14 = _mm_unpackhi_epi16(in[2], zero); \
+ \
+ MULTIPLICATION_AND_ADD_2(lo_2_14, hi_2_14, stg3_0, stg3_1, stp2_4, stp2_7) \
+ \
+ stp1_9 = stp1_8_0; \
+ stp1_10 = stp1_11; \
+ \
+ stp1_13 = stp1_12_0; \
+ stp1_14 = stp1_15; \
+ } \
+ \
+ /* Stage4 */ \
+ { \
+ const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero); \
+ const __m128i hi_0_8 = _mm_unpackhi_epi16(in[0], zero); \
+ \
+ const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \
+ const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \
+ const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \
+ \
+ MULTIPLICATION_AND_ADD_2(lo_0_8, hi_0_8, stg4_0, stg4_1, stp1_0, stp1_1) \
+ stp2_5 = stp2_4; \
+ stp2_6 = stp2_7; \
+ \
+ MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \
+ stg4_5, stg4_6, stg4_7, stp2_9, stp2_14, stp2_10, \
+ stp2_13) \
+ } \
+ \
+ /* Stage5 */ \
+ { \
+ const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \
+ const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \
+ \
+ stp1_2 = stp1_1; \
+ stp1_3 = stp1_0; \
+ \
+ tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \
+ tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \
+ tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \
+ tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \
+ \
+ tmp0 = _mm_add_epi32(tmp0, rounding); \
+ tmp1 = _mm_add_epi32(tmp1, rounding); \
+ tmp2 = _mm_add_epi32(tmp2, rounding); \
+ tmp3 = _mm_add_epi32(tmp3, rounding); \
+ \
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \
+ \
+ stp1_5 = _mm_packs_epi32(tmp0, tmp1); \
+ stp1_6 = _mm_packs_epi32(tmp2, tmp3); \
+ \
+ stp1_8 = _mm_add_epi16(stp1_8_0, stp1_11); \
+ stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \
+ stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \
+ stp1_11 = _mm_sub_epi16(stp1_8_0, stp1_11); \
+ \
+ stp1_12 = _mm_sub_epi16(stp1_15, stp1_12_0); \
+ stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \
+ stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \
+ stp1_15 = _mm_add_epi16(stp1_15, stp1_12_0); \
+ } \
+ \
+ /* Stage6 */ \
+ { \
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \
+ const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \
+ const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \
+ const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \
+ \
+ stp2_0 = _mm_add_epi16(stp1_0, stp2_7); \
+ stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \
+ stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \
+ stp2_3 = _mm_add_epi16(stp1_3, stp2_4); \
+ stp2_4 = _mm_sub_epi16(stp1_3, stp2_4); \
+ stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \
+ stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \
+ stp2_7 = _mm_sub_epi16(stp1_0, stp2_7); \
+ \
+ MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, stg6_0, \
+ stg4_0, stg6_0, stg4_0, stp2_10, stp2_13, stp2_11, \
+ stp2_12) \
+ }
+
+void vp10_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
+ int stride) {
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i final_rounding = _mm_set1_epi16(1 << 5);
+ const __m128i zero = _mm_setzero_si128();
+
+ const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+ const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64);
+ const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+ const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64);
+ const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+ const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64);
+ const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+ const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64);
+
+ const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+ const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64);
+
+ const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+ const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+
+ const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+
+ __m128i in[16], l[16], r[16], *curr1;
+ __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7,
+ stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15,
+ stp1_8_0, stp1_12_0;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7,
+ stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int i;
+
+ curr1 = l;
+ for (i = 0; i < 2; i++) {
+ // 1-D vp10_idct
+
+ // Load input data.
+ in[0] = _mm_load_si128((const __m128i *)input);
+ in[8] = _mm_load_si128((const __m128i *)(input + 8 * 1));
+ in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2));
+ in[9] = _mm_load_si128((const __m128i *)(input + 8 * 3));
+ in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4));
+ in[10] = _mm_load_si128((const __m128i *)(input + 8 * 5));
+ in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6));
+ in[11] = _mm_load_si128((const __m128i *)(input + 8 * 7));
+ in[4] = _mm_load_si128((const __m128i *)(input + 8 * 8));
+ in[12] = _mm_load_si128((const __m128i *)(input + 8 * 9));
+ in[5] = _mm_load_si128((const __m128i *)(input + 8 * 10));
+ in[13] = _mm_load_si128((const __m128i *)(input + 8 * 11));
+ in[6] = _mm_load_si128((const __m128i *)(input + 8 * 12));
+ in[14] = _mm_load_si128((const __m128i *)(input + 8 * 13));
+ in[7] = _mm_load_si128((const __m128i *)(input + 8 * 14));
+ in[15] = _mm_load_si128((const __m128i *)(input + 8 * 15));
+
+ array_transpose_8x8(in, in);
+ array_transpose_8x8(in + 8, in + 8);
+
+ IDCT16
+
+ // Stage7
+ curr1[0] = _mm_add_epi16(stp2_0, stp1_15);
+ curr1[1] = _mm_add_epi16(stp2_1, stp1_14);
+ curr1[2] = _mm_add_epi16(stp2_2, stp2_13);
+ curr1[3] = _mm_add_epi16(stp2_3, stp2_12);
+ curr1[4] = _mm_add_epi16(stp2_4, stp2_11);
+ curr1[5] = _mm_add_epi16(stp2_5, stp2_10);
+ curr1[6] = _mm_add_epi16(stp2_6, stp1_9);
+ curr1[7] = _mm_add_epi16(stp2_7, stp1_8);
+ curr1[8] = _mm_sub_epi16(stp2_7, stp1_8);
+ curr1[9] = _mm_sub_epi16(stp2_6, stp1_9);
+ curr1[10] = _mm_sub_epi16(stp2_5, stp2_10);
+ curr1[11] = _mm_sub_epi16(stp2_4, stp2_11);
+ curr1[12] = _mm_sub_epi16(stp2_3, stp2_12);
+ curr1[13] = _mm_sub_epi16(stp2_2, stp2_13);
+ curr1[14] = _mm_sub_epi16(stp2_1, stp1_14);
+ curr1[15] = _mm_sub_epi16(stp2_0, stp1_15);
+
+ curr1 = r;
+ input += 128;
+ }
+ for (i = 0; i < 2; i++) {
+ int j;
+ // 1-D vp10_idct
+ array_transpose_8x8(l + i * 8, in);
+ array_transpose_8x8(r + i * 8, in + 8);
+
+ IDCT16
+
+ // 2-D
+ in[0] = _mm_add_epi16(stp2_0, stp1_15);
+ in[1] = _mm_add_epi16(stp2_1, stp1_14);
+ in[2] = _mm_add_epi16(stp2_2, stp2_13);
+ in[3] = _mm_add_epi16(stp2_3, stp2_12);
+ in[4] = _mm_add_epi16(stp2_4, stp2_11);
+ in[5] = _mm_add_epi16(stp2_5, stp2_10);
+ in[6] = _mm_add_epi16(stp2_6, stp1_9);
+ in[7] = _mm_add_epi16(stp2_7, stp1_8);
+ in[8] = _mm_sub_epi16(stp2_7, stp1_8);
+ in[9] = _mm_sub_epi16(stp2_6, stp1_9);
+ in[10] = _mm_sub_epi16(stp2_5, stp2_10);
+ in[11] = _mm_sub_epi16(stp2_4, stp2_11);
+ in[12] = _mm_sub_epi16(stp2_3, stp2_12);
+ in[13] = _mm_sub_epi16(stp2_2, stp2_13);
+ in[14] = _mm_sub_epi16(stp2_1, stp1_14);
+ in[15] = _mm_sub_epi16(stp2_0, stp1_15);
+
+ for (j = 0; j < 16; ++j) {
+ // Final rounding and shift
+ in[j] = _mm_adds_epi16(in[j], final_rounding);
+ in[j] = _mm_srai_epi16(in[j], 6);
+ RECON_AND_STORE(dest + j * stride, in[j]);
+ }
+
+ dest += 8;
+ }
+}
+
+void vp10_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest,
+ int stride) {
+ __m128i dc_value;
+ const __m128i zero = _mm_setzero_si128();
+ int a, i;
+
+ a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+ a = (int)dct_const_round_shift(a * cospi_16_64);
+ a = ROUND_POWER_OF_TWO(a, 6);
+
+ dc_value = _mm_set1_epi16(a);
+
+ for (i = 0; i < 2; ++i) {
+ RECON_AND_STORE(dest + 0 * stride, dc_value);
+ RECON_AND_STORE(dest + 1 * stride, dc_value);
+ RECON_AND_STORE(dest + 2 * stride, dc_value);
+ RECON_AND_STORE(dest + 3 * stride, dc_value);
+ RECON_AND_STORE(dest + 4 * stride, dc_value);
+ RECON_AND_STORE(dest + 5 * stride, dc_value);
+ RECON_AND_STORE(dest + 6 * stride, dc_value);
+ RECON_AND_STORE(dest + 7 * stride, dc_value);
+ RECON_AND_STORE(dest + 8 * stride, dc_value);
+ RECON_AND_STORE(dest + 9 * stride, dc_value);
+ RECON_AND_STORE(dest + 10 * stride, dc_value);
+ RECON_AND_STORE(dest + 11 * stride, dc_value);
+ RECON_AND_STORE(dest + 12 * stride, dc_value);
+ RECON_AND_STORE(dest + 13 * stride, dc_value);
+ RECON_AND_STORE(dest + 14 * stride, dc_value);
+ RECON_AND_STORE(dest + 15 * stride, dc_value);
+ dest += 8;
+ }
+}
+
+static void vp10_iadst16_8col(__m128i *in) {
+ // perform 16x16 1-D ADST for 8 columns
+ __m128i s[16], x[16], u[32], v[32];
+ const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
+ const __m128i k__cospi_p31_m01 = pair_set_epi16(cospi_31_64, -cospi_1_64);
+ const __m128i k__cospi_p05_p27 = pair_set_epi16(cospi_5_64, cospi_27_64);
+ const __m128i k__cospi_p27_m05 = pair_set_epi16(cospi_27_64, -cospi_5_64);
+ const __m128i k__cospi_p09_p23 = pair_set_epi16(cospi_9_64, cospi_23_64);
+ const __m128i k__cospi_p23_m09 = pair_set_epi16(cospi_23_64, -cospi_9_64);
+ const __m128i k__cospi_p13_p19 = pair_set_epi16(cospi_13_64, cospi_19_64);
+ const __m128i k__cospi_p19_m13 = pair_set_epi16(cospi_19_64, -cospi_13_64);
+ const __m128i k__cospi_p17_p15 = pair_set_epi16(cospi_17_64, cospi_15_64);
+ const __m128i k__cospi_p15_m17 = pair_set_epi16(cospi_15_64, -cospi_17_64);
+ const __m128i k__cospi_p21_p11 = pair_set_epi16(cospi_21_64, cospi_11_64);
+ const __m128i k__cospi_p11_m21 = pair_set_epi16(cospi_11_64, -cospi_21_64);
+ const __m128i k__cospi_p25_p07 = pair_set_epi16(cospi_25_64, cospi_7_64);
+ const __m128i k__cospi_p07_m25 = pair_set_epi16(cospi_7_64, -cospi_25_64);
+ const __m128i k__cospi_p29_p03 = pair_set_epi16(cospi_29_64, cospi_3_64);
+ const __m128i k__cospi_p03_m29 = pair_set_epi16(cospi_3_64, -cospi_29_64);
+ const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64);
+ const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+ const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64);
+ const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64);
+ const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t)-cospi_16_64);
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i kZero = _mm_set1_epi16(0);
+
+ u[0] = _mm_unpacklo_epi16(in[15], in[0]);
+ u[1] = _mm_unpackhi_epi16(in[15], in[0]);
+ u[2] = _mm_unpacklo_epi16(in[13], in[2]);
+ u[3] = _mm_unpackhi_epi16(in[13], in[2]);
+ u[4] = _mm_unpacklo_epi16(in[11], in[4]);
+ u[5] = _mm_unpackhi_epi16(in[11], in[4]);
+ u[6] = _mm_unpacklo_epi16(in[9], in[6]);
+ u[7] = _mm_unpackhi_epi16(in[9], in[6]);
+ u[8] = _mm_unpacklo_epi16(in[7], in[8]);
+ u[9] = _mm_unpackhi_epi16(in[7], in[8]);
+ u[10] = _mm_unpacklo_epi16(in[5], in[10]);
+ u[11] = _mm_unpackhi_epi16(in[5], in[10]);
+ u[12] = _mm_unpacklo_epi16(in[3], in[12]);
+ u[13] = _mm_unpackhi_epi16(in[3], in[12]);
+ u[14] = _mm_unpacklo_epi16(in[1], in[14]);
+ u[15] = _mm_unpackhi_epi16(in[1], in[14]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p01_p31);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_p01_p31);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p31_m01);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p31_m01);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_p05_p27);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_p05_p27);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p27_m05);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p27_m05);
+ v[8] = _mm_madd_epi16(u[4], k__cospi_p09_p23);
+ v[9] = _mm_madd_epi16(u[5], k__cospi_p09_p23);
+ v[10] = _mm_madd_epi16(u[4], k__cospi_p23_m09);
+ v[11] = _mm_madd_epi16(u[5], k__cospi_p23_m09);
+ v[12] = _mm_madd_epi16(u[6], k__cospi_p13_p19);
+ v[13] = _mm_madd_epi16(u[7], k__cospi_p13_p19);
+ v[14] = _mm_madd_epi16(u[6], k__cospi_p19_m13);
+ v[15] = _mm_madd_epi16(u[7], k__cospi_p19_m13);
+ v[16] = _mm_madd_epi16(u[8], k__cospi_p17_p15);
+ v[17] = _mm_madd_epi16(u[9], k__cospi_p17_p15);
+ v[18] = _mm_madd_epi16(u[8], k__cospi_p15_m17);
+ v[19] = _mm_madd_epi16(u[9], k__cospi_p15_m17);
+ v[20] = _mm_madd_epi16(u[10], k__cospi_p21_p11);
+ v[21] = _mm_madd_epi16(u[11], k__cospi_p21_p11);
+ v[22] = _mm_madd_epi16(u[10], k__cospi_p11_m21);
+ v[23] = _mm_madd_epi16(u[11], k__cospi_p11_m21);
+ v[24] = _mm_madd_epi16(u[12], k__cospi_p25_p07);
+ v[25] = _mm_madd_epi16(u[13], k__cospi_p25_p07);
+ v[26] = _mm_madd_epi16(u[12], k__cospi_p07_m25);
+ v[27] = _mm_madd_epi16(u[13], k__cospi_p07_m25);
+ v[28] = _mm_madd_epi16(u[14], k__cospi_p29_p03);
+ v[29] = _mm_madd_epi16(u[15], k__cospi_p29_p03);
+ v[30] = _mm_madd_epi16(u[14], k__cospi_p03_m29);
+ v[31] = _mm_madd_epi16(u[15], k__cospi_p03_m29);
+
+ u[0] = _mm_add_epi32(v[0], v[16]);
+ u[1] = _mm_add_epi32(v[1], v[17]);
+ u[2] = _mm_add_epi32(v[2], v[18]);
+ u[3] = _mm_add_epi32(v[3], v[19]);
+ u[4] = _mm_add_epi32(v[4], v[20]);
+ u[5] = _mm_add_epi32(v[5], v[21]);
+ u[6] = _mm_add_epi32(v[6], v[22]);
+ u[7] = _mm_add_epi32(v[7], v[23]);
+ u[8] = _mm_add_epi32(v[8], v[24]);
+ u[9] = _mm_add_epi32(v[9], v[25]);
+ u[10] = _mm_add_epi32(v[10], v[26]);
+ u[11] = _mm_add_epi32(v[11], v[27]);
+ u[12] = _mm_add_epi32(v[12], v[28]);
+ u[13] = _mm_add_epi32(v[13], v[29]);
+ u[14] = _mm_add_epi32(v[14], v[30]);
+ u[15] = _mm_add_epi32(v[15], v[31]);
+ u[16] = _mm_sub_epi32(v[0], v[16]);
+ u[17] = _mm_sub_epi32(v[1], v[17]);
+ u[18] = _mm_sub_epi32(v[2], v[18]);
+ u[19] = _mm_sub_epi32(v[3], v[19]);
+ u[20] = _mm_sub_epi32(v[4], v[20]);
+ u[21] = _mm_sub_epi32(v[5], v[21]);
+ u[22] = _mm_sub_epi32(v[6], v[22]);
+ u[23] = _mm_sub_epi32(v[7], v[23]);
+ u[24] = _mm_sub_epi32(v[8], v[24]);
+ u[25] = _mm_sub_epi32(v[9], v[25]);
+ u[26] = _mm_sub_epi32(v[10], v[26]);
+ u[27] = _mm_sub_epi32(v[11], v[27]);
+ u[28] = _mm_sub_epi32(v[12], v[28]);
+ u[29] = _mm_sub_epi32(v[13], v[29]);
+ u[30] = _mm_sub_epi32(v[14], v[30]);
+ u[31] = _mm_sub_epi32(v[15], v[31]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+ v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+ v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+ v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+ v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+ v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+ v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+ v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+ v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+ v[16] = _mm_add_epi32(u[16], k__DCT_CONST_ROUNDING);
+ v[17] = _mm_add_epi32(u[17], k__DCT_CONST_ROUNDING);
+ v[18] = _mm_add_epi32(u[18], k__DCT_CONST_ROUNDING);
+ v[19] = _mm_add_epi32(u[19], k__DCT_CONST_ROUNDING);
+ v[20] = _mm_add_epi32(u[20], k__DCT_CONST_ROUNDING);
+ v[21] = _mm_add_epi32(u[21], k__DCT_CONST_ROUNDING);
+ v[22] = _mm_add_epi32(u[22], k__DCT_CONST_ROUNDING);
+ v[23] = _mm_add_epi32(u[23], k__DCT_CONST_ROUNDING);
+ v[24] = _mm_add_epi32(u[24], k__DCT_CONST_ROUNDING);
+ v[25] = _mm_add_epi32(u[25], k__DCT_CONST_ROUNDING);
+ v[26] = _mm_add_epi32(u[26], k__DCT_CONST_ROUNDING);
+ v[27] = _mm_add_epi32(u[27], k__DCT_CONST_ROUNDING);
+ v[28] = _mm_add_epi32(u[28], k__DCT_CONST_ROUNDING);
+ v[29] = _mm_add_epi32(u[29], k__DCT_CONST_ROUNDING);
+ v[30] = _mm_add_epi32(u[30], k__DCT_CONST_ROUNDING);
+ v[31] = _mm_add_epi32(u[31], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+ u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+ u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+ u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+ u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+ u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+ u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+ u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+ u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+ u[16] = _mm_srai_epi32(v[16], DCT_CONST_BITS);
+ u[17] = _mm_srai_epi32(v[17], DCT_CONST_BITS);
+ u[18] = _mm_srai_epi32(v[18], DCT_CONST_BITS);
+ u[19] = _mm_srai_epi32(v[19], DCT_CONST_BITS);
+ u[20] = _mm_srai_epi32(v[20], DCT_CONST_BITS);
+ u[21] = _mm_srai_epi32(v[21], DCT_CONST_BITS);
+ u[22] = _mm_srai_epi32(v[22], DCT_CONST_BITS);
+ u[23] = _mm_srai_epi32(v[23], DCT_CONST_BITS);
+ u[24] = _mm_srai_epi32(v[24], DCT_CONST_BITS);
+ u[25] = _mm_srai_epi32(v[25], DCT_CONST_BITS);
+ u[26] = _mm_srai_epi32(v[26], DCT_CONST_BITS);
+ u[27] = _mm_srai_epi32(v[27], DCT_CONST_BITS);
+ u[28] = _mm_srai_epi32(v[28], DCT_CONST_BITS);
+ u[29] = _mm_srai_epi32(v[29], DCT_CONST_BITS);
+ u[30] = _mm_srai_epi32(v[30], DCT_CONST_BITS);
+ u[31] = _mm_srai_epi32(v[31], DCT_CONST_BITS);
+
+ s[0] = _mm_packs_epi32(u[0], u[1]);
+ s[1] = _mm_packs_epi32(u[2], u[3]);
+ s[2] = _mm_packs_epi32(u[4], u[5]);
+ s[3] = _mm_packs_epi32(u[6], u[7]);
+ s[4] = _mm_packs_epi32(u[8], u[9]);
+ s[5] = _mm_packs_epi32(u[10], u[11]);
+ s[6] = _mm_packs_epi32(u[12], u[13]);
+ s[7] = _mm_packs_epi32(u[14], u[15]);
+ s[8] = _mm_packs_epi32(u[16], u[17]);
+ s[9] = _mm_packs_epi32(u[18], u[19]);
+ s[10] = _mm_packs_epi32(u[20], u[21]);
+ s[11] = _mm_packs_epi32(u[22], u[23]);
+ s[12] = _mm_packs_epi32(u[24], u[25]);
+ s[13] = _mm_packs_epi32(u[26], u[27]);
+ s[14] = _mm_packs_epi32(u[28], u[29]);
+ s[15] = _mm_packs_epi32(u[30], u[31]);
+
+ // stage 2
+ u[0] = _mm_unpacklo_epi16(s[8], s[9]);
+ u[1] = _mm_unpackhi_epi16(s[8], s[9]);
+ u[2] = _mm_unpacklo_epi16(s[10], s[11]);
+ u[3] = _mm_unpackhi_epi16(s[10], s[11]);
+ u[4] = _mm_unpacklo_epi16(s[12], s[13]);
+ u[5] = _mm_unpackhi_epi16(s[12], s[13]);
+ u[6] = _mm_unpacklo_epi16(s[14], s[15]);
+ u[7] = _mm_unpackhi_epi16(s[14], s[15]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p04_p28);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_p04_p28);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p28_m04);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p28_m04);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_p20_p12);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_p20_p12);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p12_m20);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p12_m20);
+ v[8] = _mm_madd_epi16(u[4], k__cospi_m28_p04);
+ v[9] = _mm_madd_epi16(u[5], k__cospi_m28_p04);
+ v[10] = _mm_madd_epi16(u[4], k__cospi_p04_p28);
+ v[11] = _mm_madd_epi16(u[5], k__cospi_p04_p28);
+ v[12] = _mm_madd_epi16(u[6], k__cospi_m12_p20);
+ v[13] = _mm_madd_epi16(u[7], k__cospi_m12_p20);
+ v[14] = _mm_madd_epi16(u[6], k__cospi_p20_p12);
+ v[15] = _mm_madd_epi16(u[7], k__cospi_p20_p12);
+
+ u[0] = _mm_add_epi32(v[0], v[8]);
+ u[1] = _mm_add_epi32(v[1], v[9]);
+ u[2] = _mm_add_epi32(v[2], v[10]);
+ u[3] = _mm_add_epi32(v[3], v[11]);
+ u[4] = _mm_add_epi32(v[4], v[12]);
+ u[5] = _mm_add_epi32(v[5], v[13]);
+ u[6] = _mm_add_epi32(v[6], v[14]);
+ u[7] = _mm_add_epi32(v[7], v[15]);
+ u[8] = _mm_sub_epi32(v[0], v[8]);
+ u[9] = _mm_sub_epi32(v[1], v[9]);
+ u[10] = _mm_sub_epi32(v[2], v[10]);
+ u[11] = _mm_sub_epi32(v[3], v[11]);
+ u[12] = _mm_sub_epi32(v[4], v[12]);
+ u[13] = _mm_sub_epi32(v[5], v[13]);
+ u[14] = _mm_sub_epi32(v[6], v[14]);
+ u[15] = _mm_sub_epi32(v[7], v[15]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+ v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+ v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+ v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+ v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+ v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+ v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+ v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+ v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+ u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+ u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+ u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+ u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+ u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+ u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+ u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+ u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+
+ x[0] = _mm_add_epi16(s[0], s[4]);
+ x[1] = _mm_add_epi16(s[1], s[5]);
+ x[2] = _mm_add_epi16(s[2], s[6]);
+ x[3] = _mm_add_epi16(s[3], s[7]);
+ x[4] = _mm_sub_epi16(s[0], s[4]);
+ x[5] = _mm_sub_epi16(s[1], s[5]);
+ x[6] = _mm_sub_epi16(s[2], s[6]);
+ x[7] = _mm_sub_epi16(s[3], s[7]);
+ x[8] = _mm_packs_epi32(u[0], u[1]);
+ x[9] = _mm_packs_epi32(u[2], u[3]);
+ x[10] = _mm_packs_epi32(u[4], u[5]);
+ x[11] = _mm_packs_epi32(u[6], u[7]);
+ x[12] = _mm_packs_epi32(u[8], u[9]);
+ x[13] = _mm_packs_epi32(u[10], u[11]);
+ x[14] = _mm_packs_epi32(u[12], u[13]);
+ x[15] = _mm_packs_epi32(u[14], u[15]);
+
+ // stage 3
+ u[0] = _mm_unpacklo_epi16(x[4], x[5]);
+ u[1] = _mm_unpackhi_epi16(x[4], x[5]);
+ u[2] = _mm_unpacklo_epi16(x[6], x[7]);
+ u[3] = _mm_unpackhi_epi16(x[6], x[7]);
+ u[4] = _mm_unpacklo_epi16(x[12], x[13]);
+ u[5] = _mm_unpackhi_epi16(x[12], x[13]);
+ u[6] = _mm_unpacklo_epi16(x[14], x[15]);
+ u[7] = _mm_unpackhi_epi16(x[14], x[15]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p08_p24);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_p08_p24);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p24_m08);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p24_m08);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_m24_p08);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_m24_p08);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24);
+ v[8] = _mm_madd_epi16(u[4], k__cospi_p08_p24);
+ v[9] = _mm_madd_epi16(u[5], k__cospi_p08_p24);
+ v[10] = _mm_madd_epi16(u[4], k__cospi_p24_m08);
+ v[11] = _mm_madd_epi16(u[5], k__cospi_p24_m08);
+ v[12] = _mm_madd_epi16(u[6], k__cospi_m24_p08);
+ v[13] = _mm_madd_epi16(u[7], k__cospi_m24_p08);
+ v[14] = _mm_madd_epi16(u[6], k__cospi_p08_p24);
+ v[15] = _mm_madd_epi16(u[7], k__cospi_p08_p24);
+
+ u[0] = _mm_add_epi32(v[0], v[4]);
+ u[1] = _mm_add_epi32(v[1], v[5]);
+ u[2] = _mm_add_epi32(v[2], v[6]);
+ u[3] = _mm_add_epi32(v[3], v[7]);
+ u[4] = _mm_sub_epi32(v[0], v[4]);
+ u[5] = _mm_sub_epi32(v[1], v[5]);
+ u[6] = _mm_sub_epi32(v[2], v[6]);
+ u[7] = _mm_sub_epi32(v[3], v[7]);
+ u[8] = _mm_add_epi32(v[8], v[12]);
+ u[9] = _mm_add_epi32(v[9], v[13]);
+ u[10] = _mm_add_epi32(v[10], v[14]);
+ u[11] = _mm_add_epi32(v[11], v[15]);
+ u[12] = _mm_sub_epi32(v[8], v[12]);
+ u[13] = _mm_sub_epi32(v[9], v[13]);
+ u[14] = _mm_sub_epi32(v[10], v[14]);
+ u[15] = _mm_sub_epi32(v[11], v[15]);
+
+ u[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+ u[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+ u[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+ u[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+ u[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+ u[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+ u[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+ u[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+ u[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+ v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+ v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+ v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+ v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+ v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+ v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+ v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+ v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+ v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+ s[0] = _mm_add_epi16(x[0], x[2]);
+ s[1] = _mm_add_epi16(x[1], x[3]);
+ s[2] = _mm_sub_epi16(x[0], x[2]);
+ s[3] = _mm_sub_epi16(x[1], x[3]);
+ s[4] = _mm_packs_epi32(v[0], v[1]);
+ s[5] = _mm_packs_epi32(v[2], v[3]);
+ s[6] = _mm_packs_epi32(v[4], v[5]);
+ s[7] = _mm_packs_epi32(v[6], v[7]);
+ s[8] = _mm_add_epi16(x[8], x[10]);
+ s[9] = _mm_add_epi16(x[9], x[11]);
+ s[10] = _mm_sub_epi16(x[8], x[10]);
+ s[11] = _mm_sub_epi16(x[9], x[11]);
+ s[12] = _mm_packs_epi32(v[8], v[9]);
+ s[13] = _mm_packs_epi32(v[10], v[11]);
+ s[14] = _mm_packs_epi32(v[12], v[13]);
+ s[15] = _mm_packs_epi32(v[14], v[15]);
+
+ // stage 4
+ u[0] = _mm_unpacklo_epi16(s[2], s[3]);
+ u[1] = _mm_unpackhi_epi16(s[2], s[3]);
+ u[2] = _mm_unpacklo_epi16(s[6], s[7]);
+ u[3] = _mm_unpackhi_epi16(s[6], s[7]);
+ u[4] = _mm_unpacklo_epi16(s[10], s[11]);
+ u[5] = _mm_unpackhi_epi16(s[10], s[11]);
+ u[6] = _mm_unpacklo_epi16(s[14], s[15]);
+ u[7] = _mm_unpackhi_epi16(s[14], s[15]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_m16_m16);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_m16_m16);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p16_m16);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p16_m16);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_p16_p16);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_p16_p16);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_m16_p16);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_m16_p16);
+ v[8] = _mm_madd_epi16(u[4], k__cospi_p16_p16);
+ v[9] = _mm_madd_epi16(u[5], k__cospi_p16_p16);
+ v[10] = _mm_madd_epi16(u[4], k__cospi_m16_p16);
+ v[11] = _mm_madd_epi16(u[5], k__cospi_m16_p16);
+ v[12] = _mm_madd_epi16(u[6], k__cospi_m16_m16);
+ v[13] = _mm_madd_epi16(u[7], k__cospi_m16_m16);
+ v[14] = _mm_madd_epi16(u[6], k__cospi_p16_m16);
+ v[15] = _mm_madd_epi16(u[7], k__cospi_p16_m16);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+ u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING);
+ u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING);
+ u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING);
+ u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING);
+ u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING);
+ u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING);
+ u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING);
+ u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING);
+
+ v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+ v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+ v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+ v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+ v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+ v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+ v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+ v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+ v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+ in[0] = s[0];
+ in[1] = _mm_sub_epi16(kZero, s[8]);
+ in[2] = s[12];
+ in[3] = _mm_sub_epi16(kZero, s[4]);
+ in[4] = _mm_packs_epi32(v[4], v[5]);
+ in[5] = _mm_packs_epi32(v[12], v[13]);
+ in[6] = _mm_packs_epi32(v[8], v[9]);
+ in[7] = _mm_packs_epi32(v[0], v[1]);
+ in[8] = _mm_packs_epi32(v[2], v[3]);
+ in[9] = _mm_packs_epi32(v[10], v[11]);
+ in[10] = _mm_packs_epi32(v[14], v[15]);
+ in[11] = _mm_packs_epi32(v[6], v[7]);
+ in[12] = s[5];
+ in[13] = _mm_sub_epi16(kZero, s[13]);
+ in[14] = s[9];
+ in[15] = _mm_sub_epi16(kZero, s[1]);
+}
+
+static void vp10_idct16_8col(__m128i *in) {
+ const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+ const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
+ const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+ const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64);
+ const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+ const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
+ const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+ const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64);
+ const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+ const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64);
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+ const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ __m128i v[16], u[16], s[16], t[16];
+
+ // stage 1
+ s[0] = in[0];
+ s[1] = in[8];
+ s[2] = in[4];
+ s[3] = in[12];
+ s[4] = in[2];
+ s[5] = in[10];
+ s[6] = in[6];
+ s[7] = in[14];
+ s[8] = in[1];
+ s[9] = in[9];
+ s[10] = in[5];
+ s[11] = in[13];
+ s[12] = in[3];
+ s[13] = in[11];
+ s[14] = in[7];
+ s[15] = in[15];
+
+ // stage 2
+ u[0] = _mm_unpacklo_epi16(s[8], s[15]);
+ u[1] = _mm_unpackhi_epi16(s[8], s[15]);
+ u[2] = _mm_unpacklo_epi16(s[9], s[14]);
+ u[3] = _mm_unpackhi_epi16(s[9], s[14]);
+ u[4] = _mm_unpacklo_epi16(s[10], s[13]);
+ u[5] = _mm_unpackhi_epi16(s[10], s[13]);
+ u[6] = _mm_unpacklo_epi16(s[11], s[12]);
+ u[7] = _mm_unpackhi_epi16(s[11], s[12]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p30_m02);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_p30_m02);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p02_p30);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p02_p30);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_p14_m18);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_p14_m18);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p18_p14);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p18_p14);
+ v[8] = _mm_madd_epi16(u[4], k__cospi_p22_m10);
+ v[9] = _mm_madd_epi16(u[5], k__cospi_p22_m10);
+ v[10] = _mm_madd_epi16(u[4], k__cospi_p10_p22);
+ v[11] = _mm_madd_epi16(u[5], k__cospi_p10_p22);
+ v[12] = _mm_madd_epi16(u[6], k__cospi_p06_m26);
+ v[13] = _mm_madd_epi16(u[7], k__cospi_p06_m26);
+ v[14] = _mm_madd_epi16(u[6], k__cospi_p26_p06);
+ v[15] = _mm_madd_epi16(u[7], k__cospi_p26_p06);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+ u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING);
+ u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING);
+ u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING);
+ u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING);
+ u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING);
+ u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING);
+ u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING);
+ u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+ u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+ u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+ u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+ u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+ u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+ u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+ u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+ u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+ s[8] = _mm_packs_epi32(u[0], u[1]);
+ s[15] = _mm_packs_epi32(u[2], u[3]);
+ s[9] = _mm_packs_epi32(u[4], u[5]);
+ s[14] = _mm_packs_epi32(u[6], u[7]);
+ s[10] = _mm_packs_epi32(u[8], u[9]);
+ s[13] = _mm_packs_epi32(u[10], u[11]);
+ s[11] = _mm_packs_epi32(u[12], u[13]);
+ s[12] = _mm_packs_epi32(u[14], u[15]);
+
+ // stage 3
+ t[0] = s[0];
+ t[1] = s[1];
+ t[2] = s[2];
+ t[3] = s[3];
+ u[0] = _mm_unpacklo_epi16(s[4], s[7]);
+ u[1] = _mm_unpackhi_epi16(s[4], s[7]);
+ u[2] = _mm_unpacklo_epi16(s[5], s[6]);
+ u[3] = _mm_unpackhi_epi16(s[5], s[6]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p28_m04);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_p28_m04);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p04_p28);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p04_p28);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_p12_m20);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_p12_m20);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p20_p12);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p20_p12);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+
+ t[4] = _mm_packs_epi32(u[0], u[1]);
+ t[7] = _mm_packs_epi32(u[2], u[3]);
+ t[5] = _mm_packs_epi32(u[4], u[5]);
+ t[6] = _mm_packs_epi32(u[6], u[7]);
+ t[8] = _mm_add_epi16(s[8], s[9]);
+ t[9] = _mm_sub_epi16(s[8], s[9]);
+ t[10] = _mm_sub_epi16(s[11], s[10]);
+ t[11] = _mm_add_epi16(s[10], s[11]);
+ t[12] = _mm_add_epi16(s[12], s[13]);
+ t[13] = _mm_sub_epi16(s[12], s[13]);
+ t[14] = _mm_sub_epi16(s[15], s[14]);
+ t[15] = _mm_add_epi16(s[14], s[15]);
+
+ // stage 4
+ u[0] = _mm_unpacklo_epi16(t[0], t[1]);
+ u[1] = _mm_unpackhi_epi16(t[0], t[1]);
+ u[2] = _mm_unpacklo_epi16(t[2], t[3]);
+ u[3] = _mm_unpackhi_epi16(t[2], t[3]);
+ u[4] = _mm_unpacklo_epi16(t[9], t[14]);
+ u[5] = _mm_unpackhi_epi16(t[9], t[14]);
+ u[6] = _mm_unpacklo_epi16(t[10], t[13]);
+ u[7] = _mm_unpackhi_epi16(t[10], t[13]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p16_p16);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_p16_p16);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p16_m16);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p16_m16);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_p24_m08);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_p24_m08);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24);
+ v[8] = _mm_madd_epi16(u[4], k__cospi_m08_p24);
+ v[9] = _mm_madd_epi16(u[5], k__cospi_m08_p24);
+ v[10] = _mm_madd_epi16(u[4], k__cospi_p24_p08);
+ v[11] = _mm_madd_epi16(u[5], k__cospi_p24_p08);
+ v[12] = _mm_madd_epi16(u[6], k__cospi_m24_m08);
+ v[13] = _mm_madd_epi16(u[7], k__cospi_m24_m08);
+ v[14] = _mm_madd_epi16(u[6], k__cospi_m08_p24);
+ v[15] = _mm_madd_epi16(u[7], k__cospi_m08_p24);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+ u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING);
+ u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING);
+ u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING);
+ u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING);
+ u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING);
+ u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING);
+ u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING);
+ u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+ u[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+ u[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+ u[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+ u[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+ u[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+ u[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+ u[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+ u[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+ s[0] = _mm_packs_epi32(u[0], u[1]);
+ s[1] = _mm_packs_epi32(u[2], u[3]);
+ s[2] = _mm_packs_epi32(u[4], u[5]);
+ s[3] = _mm_packs_epi32(u[6], u[7]);
+ s[4] = _mm_add_epi16(t[4], t[5]);
+ s[5] = _mm_sub_epi16(t[4], t[5]);
+ s[6] = _mm_sub_epi16(t[7], t[6]);
+ s[7] = _mm_add_epi16(t[6], t[7]);
+ s[8] = t[8];
+ s[15] = t[15];
+ s[9] = _mm_packs_epi32(u[8], u[9]);
+ s[14] = _mm_packs_epi32(u[10], u[11]);
+ s[10] = _mm_packs_epi32(u[12], u[13]);
+ s[13] = _mm_packs_epi32(u[14], u[15]);
+ s[11] = t[11];
+ s[12] = t[12];
+
+ // stage 5
+ t[0] = _mm_add_epi16(s[0], s[3]);
+ t[1] = _mm_add_epi16(s[1], s[2]);
+ t[2] = _mm_sub_epi16(s[1], s[2]);
+ t[3] = _mm_sub_epi16(s[0], s[3]);
+ t[4] = s[4];
+ t[7] = s[7];
+
+ u[0] = _mm_unpacklo_epi16(s[5], s[6]);
+ u[1] = _mm_unpackhi_epi16(s[5], s[6]);
+ v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16);
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ t[5] = _mm_packs_epi32(u[0], u[1]);
+ t[6] = _mm_packs_epi32(u[2], u[3]);
+
+ t[8] = _mm_add_epi16(s[8], s[11]);
+ t[9] = _mm_add_epi16(s[9], s[10]);
+ t[10] = _mm_sub_epi16(s[9], s[10]);
+ t[11] = _mm_sub_epi16(s[8], s[11]);
+ t[12] = _mm_sub_epi16(s[15], s[12]);
+ t[13] = _mm_sub_epi16(s[14], s[13]);
+ t[14] = _mm_add_epi16(s[13], s[14]);
+ t[15] = _mm_add_epi16(s[12], s[15]);
+
+ // stage 6
+ s[0] = _mm_add_epi16(t[0], t[7]);
+ s[1] = _mm_add_epi16(t[1], t[6]);
+ s[2] = _mm_add_epi16(t[2], t[5]);
+ s[3] = _mm_add_epi16(t[3], t[4]);
+ s[4] = _mm_sub_epi16(t[3], t[4]);
+ s[5] = _mm_sub_epi16(t[2], t[5]);
+ s[6] = _mm_sub_epi16(t[1], t[6]);
+ s[7] = _mm_sub_epi16(t[0], t[7]);
+ s[8] = t[8];
+ s[9] = t[9];
+
+ u[0] = _mm_unpacklo_epi16(t[10], t[13]);
+ u[1] = _mm_unpackhi_epi16(t[10], t[13]);
+ u[2] = _mm_unpacklo_epi16(t[11], t[12]);
+ u[3] = _mm_unpackhi_epi16(t[11], t[12]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_m16_p16);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_m16_p16);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p16_p16);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p16_p16);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+
+ s[10] = _mm_packs_epi32(u[0], u[1]);
+ s[13] = _mm_packs_epi32(u[2], u[3]);
+ s[11] = _mm_packs_epi32(u[4], u[5]);
+ s[12] = _mm_packs_epi32(u[6], u[7]);
+ s[14] = t[14];
+ s[15] = t[15];
+
+ // stage 7
+ in[0] = _mm_add_epi16(s[0], s[15]);
+ in[1] = _mm_add_epi16(s[1], s[14]);
+ in[2] = _mm_add_epi16(s[2], s[13]);
+ in[3] = _mm_add_epi16(s[3], s[12]);
+ in[4] = _mm_add_epi16(s[4], s[11]);
+ in[5] = _mm_add_epi16(s[5], s[10]);
+ in[6] = _mm_add_epi16(s[6], s[9]);
+ in[7] = _mm_add_epi16(s[7], s[8]);
+ in[8] = _mm_sub_epi16(s[7], s[8]);
+ in[9] = _mm_sub_epi16(s[6], s[9]);
+ in[10] = _mm_sub_epi16(s[5], s[10]);
+ in[11] = _mm_sub_epi16(s[4], s[11]);
+ in[12] = _mm_sub_epi16(s[3], s[12]);
+ in[13] = _mm_sub_epi16(s[2], s[13]);
+ in[14] = _mm_sub_epi16(s[1], s[14]);
+ in[15] = _mm_sub_epi16(s[0], s[15]);
+}
+
+void vp10_idct16_sse2(__m128i *in0, __m128i *in1) {
+ array_transpose_16x16(in0, in1);
+ vp10_idct16_8col(in0);
+ vp10_idct16_8col(in1);
+}
+
+void vp10_iadst16_sse2(__m128i *in0, __m128i *in1) {
+ array_transpose_16x16(in0, in1);
+ vp10_iadst16_8col(in0);
+ vp10_iadst16_8col(in1);
+}
+
+void vp10_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
+ int stride) {
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i final_rounding = _mm_set1_epi16(1 << 5);
+ const __m128i zero = _mm_setzero_si128();
+
+ const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+ const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64);
+ const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+ const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64);
+
+ const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+
+ const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+ const __m128i stg4_7 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+
+ const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+ __m128i in[16], l[16];
+ __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_8,
+ stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15, stp1_8_0,
+ stp1_12_0;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7,
+ stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int i;
+ // First 1-D inverse DCT
+ // Load input data.
+ in[0] = _mm_load_si128((const __m128i *)input);
+ in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2));
+ in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4));
+ in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6));
+
+ TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1]);
+
+ // Stage2
+ {
+ const __m128i lo_1_15 = _mm_unpackhi_epi16(in[0], zero);
+ const __m128i lo_13_3 = _mm_unpackhi_epi16(zero, in[1]);
+
+ tmp0 = _mm_madd_epi16(lo_1_15, stg2_0);
+ tmp2 = _mm_madd_epi16(lo_1_15, stg2_1);
+ tmp5 = _mm_madd_epi16(lo_13_3, stg2_6);
+ tmp7 = _mm_madd_epi16(lo_13_3, stg2_7);
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp5 = _mm_add_epi32(tmp5, rounding);
+ tmp7 = _mm_add_epi32(tmp7, rounding);
+
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+ tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS);
+ tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS);
+
+ stp2_8 = _mm_packs_epi32(tmp0, tmp2);
+ stp2_11 = _mm_packs_epi32(tmp5, tmp7);
+ }
+
+ // Stage3
+ {
+ const __m128i lo_2_14 = _mm_unpacklo_epi16(in[1], zero);
+
+ tmp0 = _mm_madd_epi16(lo_2_14, stg3_0);
+ tmp2 = _mm_madd_epi16(lo_2_14, stg3_1);
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+
+ stp1_13 = _mm_unpackhi_epi64(stp2_11, zero);
+ stp1_14 = _mm_unpackhi_epi64(stp2_8, zero);
+
+ stp1_4 = _mm_packs_epi32(tmp0, tmp2);
+ }
+
+ // Stage4
+ {
+ const __m128i lo_0_8 = _mm_unpacklo_epi16(in[0], zero);
+ const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp1_14);
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp1_13);
+
+ tmp0 = _mm_madd_epi16(lo_0_8, stg4_0);
+ tmp2 = _mm_madd_epi16(lo_0_8, stg4_1);
+ tmp1 = _mm_madd_epi16(lo_9_14, stg4_4);
+ tmp3 = _mm_madd_epi16(lo_9_14, stg4_5);
+ tmp5 = _mm_madd_epi16(lo_10_13, stg4_6);
+ tmp7 = _mm_madd_epi16(lo_10_13, stg4_7);
+
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp1 = _mm_add_epi32(tmp1, rounding);
+ tmp3 = _mm_add_epi32(tmp3, rounding);
+ tmp5 = _mm_add_epi32(tmp5, rounding);
+ tmp7 = _mm_add_epi32(tmp7, rounding);
+
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);
+ tmp5 = _mm_srai_epi32(tmp5, DCT_CONST_BITS);
+ tmp7 = _mm_srai_epi32(tmp7, DCT_CONST_BITS);
+
+ stp1_0 = _mm_packs_epi32(tmp0, tmp0);
+ stp1_1 = _mm_packs_epi32(tmp2, tmp2);
+ stp2_9 = _mm_packs_epi32(tmp1, tmp3);
+ stp2_10 = _mm_packs_epi32(tmp5, tmp7);
+
+ stp2_6 = _mm_unpackhi_epi64(stp1_4, zero);
+ }
+
+ // Stage5 and Stage6
+ {
+ tmp0 = _mm_add_epi16(stp2_8, stp2_11);
+ tmp1 = _mm_sub_epi16(stp2_8, stp2_11);
+ tmp2 = _mm_add_epi16(stp2_9, stp2_10);
+ tmp3 = _mm_sub_epi16(stp2_9, stp2_10);
+
+ stp1_9 = _mm_unpacklo_epi64(tmp2, zero);
+ stp1_10 = _mm_unpacklo_epi64(tmp3, zero);
+ stp1_8 = _mm_unpacklo_epi64(tmp0, zero);
+ stp1_11 = _mm_unpacklo_epi64(tmp1, zero);
+
+ stp1_13 = _mm_unpackhi_epi64(tmp3, zero);
+ stp1_14 = _mm_unpackhi_epi64(tmp2, zero);
+ stp1_12 = _mm_unpackhi_epi64(tmp1, zero);
+ stp1_15 = _mm_unpackhi_epi64(tmp0, zero);
+ }
+
+ // Stage6
+ {
+ const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp1_4);
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13);
+ const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12);
+
+ tmp1 = _mm_madd_epi16(lo_6_5, stg4_1);
+ tmp3 = _mm_madd_epi16(lo_6_5, stg4_0);
+ tmp0 = _mm_madd_epi16(lo_10_13, stg6_0);
+ tmp2 = _mm_madd_epi16(lo_10_13, stg4_0);
+ tmp4 = _mm_madd_epi16(lo_11_12, stg6_0);
+ tmp6 = _mm_madd_epi16(lo_11_12, stg4_0);
+
+ tmp1 = _mm_add_epi32(tmp1, rounding);
+ tmp3 = _mm_add_epi32(tmp3, rounding);
+ tmp0 = _mm_add_epi32(tmp0, rounding);
+ tmp2 = _mm_add_epi32(tmp2, rounding);
+ tmp4 = _mm_add_epi32(tmp4, rounding);
+ tmp6 = _mm_add_epi32(tmp6, rounding);
+
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS);
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS);
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS);
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS);
+ tmp4 = _mm_srai_epi32(tmp4, DCT_CONST_BITS);
+ tmp6 = _mm_srai_epi32(tmp6, DCT_CONST_BITS);
+
+ stp1_6 = _mm_packs_epi32(tmp3, tmp1);
+
+ stp2_10 = _mm_packs_epi32(tmp0, zero);
+ stp2_13 = _mm_packs_epi32(tmp2, zero);
+ stp2_11 = _mm_packs_epi32(tmp4, zero);
+ stp2_12 = _mm_packs_epi32(tmp6, zero);
+
+ tmp0 = _mm_add_epi16(stp1_0, stp1_4);
+ tmp1 = _mm_sub_epi16(stp1_0, stp1_4);
+ tmp2 = _mm_add_epi16(stp1_1, stp1_6);
+ tmp3 = _mm_sub_epi16(stp1_1, stp1_6);
+
+ stp2_0 = _mm_unpackhi_epi64(tmp0, zero);
+ stp2_1 = _mm_unpacklo_epi64(tmp2, zero);
+ stp2_2 = _mm_unpackhi_epi64(tmp2, zero);
+ stp2_3 = _mm_unpacklo_epi64(tmp0, zero);
+ stp2_4 = _mm_unpacklo_epi64(tmp1, zero);
+ stp2_5 = _mm_unpackhi_epi64(tmp3, zero);
+ stp2_6 = _mm_unpacklo_epi64(tmp3, zero);
+ stp2_7 = _mm_unpackhi_epi64(tmp1, zero);
+ }
+
+ // Stage7. Left 8x16 only.
+ l[0] = _mm_add_epi16(stp2_0, stp1_15);
+ l[1] = _mm_add_epi16(stp2_1, stp1_14);
+ l[2] = _mm_add_epi16(stp2_2, stp2_13);
+ l[3] = _mm_add_epi16(stp2_3, stp2_12);
+ l[4] = _mm_add_epi16(stp2_4, stp2_11);
+ l[5] = _mm_add_epi16(stp2_5, stp2_10);
+ l[6] = _mm_add_epi16(stp2_6, stp1_9);
+ l[7] = _mm_add_epi16(stp2_7, stp1_8);
+ l[8] = _mm_sub_epi16(stp2_7, stp1_8);
+ l[9] = _mm_sub_epi16(stp2_6, stp1_9);
+ l[10] = _mm_sub_epi16(stp2_5, stp2_10);
+ l[11] = _mm_sub_epi16(stp2_4, stp2_11);
+ l[12] = _mm_sub_epi16(stp2_3, stp2_12);
+ l[13] = _mm_sub_epi16(stp2_2, stp2_13);
+ l[14] = _mm_sub_epi16(stp2_1, stp1_14);
+ l[15] = _mm_sub_epi16(stp2_0, stp1_15);
+
+ // Second 1-D inverse transform, performed per 8x16 block
+ for (i = 0; i < 2; i++) {
+ int j;
+ array_transpose_4X8(l + 8 * i, in);
+
+ IDCT16_10
+
+ // Stage7
+ in[0] = _mm_add_epi16(stp2_0, stp1_15);
+ in[1] = _mm_add_epi16(stp2_1, stp1_14);
+ in[2] = _mm_add_epi16(stp2_2, stp2_13);
+ in[3] = _mm_add_epi16(stp2_3, stp2_12);
+ in[4] = _mm_add_epi16(stp2_4, stp2_11);
+ in[5] = _mm_add_epi16(stp2_5, stp2_10);
+ in[6] = _mm_add_epi16(stp2_6, stp1_9);
+ in[7] = _mm_add_epi16(stp2_7, stp1_8);
+ in[8] = _mm_sub_epi16(stp2_7, stp1_8);
+ in[9] = _mm_sub_epi16(stp2_6, stp1_9);
+ in[10] = _mm_sub_epi16(stp2_5, stp2_10);
+ in[11] = _mm_sub_epi16(stp2_4, stp2_11);
+ in[12] = _mm_sub_epi16(stp2_3, stp2_12);
+ in[13] = _mm_sub_epi16(stp2_2, stp2_13);
+ in[14] = _mm_sub_epi16(stp2_1, stp1_14);
+ in[15] = _mm_sub_epi16(stp2_0, stp1_15);
+
+ for (j = 0; j < 16; ++j) {
+ // Final rounding and shift
+ in[j] = _mm_adds_epi16(in[j], final_rounding);
+ in[j] = _mm_srai_epi16(in[j], 6);
+ RECON_AND_STORE(dest + j * stride, in[j]);
+ }
+
+ dest += 8;
+ }
+}
+
+#define LOAD_DQCOEFF(reg, input) \
+ { \
+ reg = _mm_load_si128((const __m128i *)input); \
+ input += 8; \
+ }
+
+#define IDCT32_34 \
+ /* Stage1 */ \
+ { \
+ const __m128i zero = _mm_setzero_si128(); \
+ const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero); \
+ const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero); \
+ \
+ const __m128i lo_25_7 = _mm_unpacklo_epi16(zero, in[7]); \
+ const __m128i hi_25_7 = _mm_unpackhi_epi16(zero, in[7]); \
+ \
+ const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], zero); \
+ const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], zero); \
+ \
+ const __m128i lo_29_3 = _mm_unpacklo_epi16(zero, in[3]); \
+ const __m128i hi_29_3 = _mm_unpackhi_epi16(zero, in[3]); \
+ \
+ MULTIPLICATION_AND_ADD_2(lo_1_31, hi_1_31, stg1_0, stg1_1, stp1_16, \
+ stp1_31); \
+ MULTIPLICATION_AND_ADD_2(lo_25_7, hi_25_7, stg1_6, stg1_7, stp1_19, \
+ stp1_28); \
+ MULTIPLICATION_AND_ADD_2(lo_5_27, hi_5_27, stg1_8, stg1_9, stp1_20, \
+ stp1_27); \
+ MULTIPLICATION_AND_ADD_2(lo_29_3, hi_29_3, stg1_14, stg1_15, stp1_23, \
+ stp1_24); \
+ } \
+ \
+ /* Stage2 */ \
+ { \
+ const __m128i zero = _mm_setzero_si128(); \
+ const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero); \
+ const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero); \
+ \
+ const __m128i lo_26_6 = _mm_unpacklo_epi16(zero, in[6]); \
+ const __m128i hi_26_6 = _mm_unpackhi_epi16(zero, in[6]); \
+ \
+ MULTIPLICATION_AND_ADD_2(lo_2_30, hi_2_30, stg2_0, stg2_1, stp2_8, \
+ stp2_15); \
+ MULTIPLICATION_AND_ADD_2(lo_26_6, hi_26_6, stg2_6, stg2_7, stp2_11, \
+ stp2_12); \
+ \
+ stp2_16 = stp1_16; \
+ stp2_19 = stp1_19; \
+ \
+ stp2_20 = stp1_20; \
+ stp2_23 = stp1_23; \
+ \
+ stp2_24 = stp1_24; \
+ stp2_27 = stp1_27; \
+ \
+ stp2_28 = stp1_28; \
+ stp2_31 = stp1_31; \
+ } \
+ \
+ /* Stage3 */ \
+ { \
+ const __m128i zero = _mm_setzero_si128(); \
+ const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero); \
+ const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero); \
+ \
+ const __m128i lo_17_30 = _mm_unpacklo_epi16(stp1_16, stp1_31); \
+ const __m128i hi_17_30 = _mm_unpackhi_epi16(stp1_16, stp1_31); \
+ const __m128i lo_18_29 = _mm_unpacklo_epi16(stp1_19, stp1_28); \
+ const __m128i hi_18_29 = _mm_unpackhi_epi16(stp1_19, stp1_28); \
+ \
+ const __m128i lo_21_26 = _mm_unpacklo_epi16(stp1_20, stp1_27); \
+ const __m128i hi_21_26 = _mm_unpackhi_epi16(stp1_20, stp1_27); \
+ const __m128i lo_22_25 = _mm_unpacklo_epi16(stp1_23, stp1_24); \
+ const __m128i hi_22_25 = _mm_unpackhi_epi16(stp1_23, stp2_24); \
+ \
+ MULTIPLICATION_AND_ADD_2(lo_4_28, hi_4_28, stg3_0, stg3_1, stp1_4, \
+ stp1_7); \
+ \
+ stp1_8 = stp2_8; \
+ stp1_11 = stp2_11; \
+ stp1_12 = stp2_12; \
+ stp1_15 = stp2_15; \
+ \
+ MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4, \
+ stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, stp1_18, \
+ stp1_29) \
+ MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8, \
+ stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, stp1_22, \
+ stp1_25) \
+ \
+ stp1_16 = stp2_16; \
+ stp1_31 = stp2_31; \
+ stp1_19 = stp2_19; \
+ stp1_20 = stp2_20; \
+ stp1_23 = stp2_23; \
+ stp1_24 = stp2_24; \
+ stp1_27 = stp2_27; \
+ stp1_28 = stp2_28; \
+ } \
+ \
+ /* Stage4 */ \
+ { \
+ const __m128i zero = _mm_setzero_si128(); \
+ const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero); \
+ const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero); \
+ \
+ const __m128i lo_9_14 = _mm_unpacklo_epi16(stp2_8, stp2_15); \
+ const __m128i hi_9_14 = _mm_unpackhi_epi16(stp2_8, stp2_15); \
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp2_11, stp2_12); \
+ const __m128i hi_10_13 = _mm_unpackhi_epi16(stp2_11, stp2_12); \
+ \
+ MULTIPLICATION_AND_ADD_2(lo_0_16, hi_0_16, stg4_0, stg4_1, stp2_0, \
+ stp2_1); \
+ \
+ stp2_4 = stp1_4; \
+ stp2_5 = stp1_4; \
+ stp2_6 = stp1_7; \
+ stp2_7 = stp1_7; \
+ \
+ MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \
+ stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, stp2_10, \
+ stp2_13) \
+ \
+ stp2_8 = stp1_8; \
+ stp2_15 = stp1_15; \
+ stp2_11 = stp1_11; \
+ stp2_12 = stp1_12; \
+ \
+ stp2_16 = _mm_add_epi16(stp1_16, stp1_19); \
+ stp2_17 = _mm_add_epi16(stp1_17, stp1_18); \
+ stp2_18 = _mm_sub_epi16(stp1_17, stp1_18); \
+ stp2_19 = _mm_sub_epi16(stp1_16, stp1_19); \
+ stp2_20 = _mm_sub_epi16(stp1_23, stp1_20); \
+ stp2_21 = _mm_sub_epi16(stp1_22, stp1_21); \
+ stp2_22 = _mm_add_epi16(stp1_22, stp1_21); \
+ stp2_23 = _mm_add_epi16(stp1_23, stp1_20); \
+ \
+ stp2_24 = _mm_add_epi16(stp1_24, stp1_27); \
+ stp2_25 = _mm_add_epi16(stp1_25, stp1_26); \
+ stp2_26 = _mm_sub_epi16(stp1_25, stp1_26); \
+ stp2_27 = _mm_sub_epi16(stp1_24, stp1_27); \
+ stp2_28 = _mm_sub_epi16(stp1_31, stp1_28); \
+ stp2_29 = _mm_sub_epi16(stp1_30, stp1_29); \
+ stp2_30 = _mm_add_epi16(stp1_29, stp1_30); \
+ stp2_31 = _mm_add_epi16(stp1_28, stp1_31); \
+ } \
+ \
+ /* Stage5 */ \
+ { \
+ const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \
+ const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \
+ const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \
+ const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \
+ \
+ const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28); \
+ const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28); \
+ const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \
+ const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \
+ \
+ const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \
+ const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \
+ \
+ stp1_0 = stp2_0; \
+ stp1_1 = stp2_1; \
+ stp1_2 = stp2_1; \
+ stp1_3 = stp2_0; \
+ \
+ tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \
+ tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \
+ tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \
+ tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \
+ \
+ tmp0 = _mm_add_epi32(tmp0, rounding); \
+ tmp1 = _mm_add_epi32(tmp1, rounding); \
+ tmp2 = _mm_add_epi32(tmp2, rounding); \
+ tmp3 = _mm_add_epi32(tmp3, rounding); \
+ \
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \
+ \
+ stp1_5 = _mm_packs_epi32(tmp0, tmp1); \
+ stp1_6 = _mm_packs_epi32(tmp2, tmp3); \
+ \
+ stp1_4 = stp2_4; \
+ stp1_7 = stp2_7; \
+ \
+ stp1_8 = _mm_add_epi16(stp2_8, stp2_11); \
+ stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \
+ stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \
+ stp1_11 = _mm_sub_epi16(stp2_8, stp2_11); \
+ stp1_12 = _mm_sub_epi16(stp2_15, stp2_12); \
+ stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \
+ stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \
+ stp1_15 = _mm_add_epi16(stp2_15, stp2_12); \
+ \
+ stp1_16 = stp2_16; \
+ stp1_17 = stp2_17; \
+ \
+ MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4, \
+ stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, stp1_19, \
+ stp1_28) \
+ MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6, \
+ stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, stp1_21, \
+ stp1_26) \
+ \
+ stp1_22 = stp2_22; \
+ stp1_23 = stp2_23; \
+ stp1_24 = stp2_24; \
+ stp1_25 = stp2_25; \
+ stp1_30 = stp2_30; \
+ stp1_31 = stp2_31; \
+ } \
+ \
+ /* Stage6 */ \
+ { \
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \
+ const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \
+ const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \
+ const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \
+ \
+ stp2_0 = _mm_add_epi16(stp1_0, stp1_7); \
+ stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \
+ stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \
+ stp2_3 = _mm_add_epi16(stp1_3, stp1_4); \
+ stp2_4 = _mm_sub_epi16(stp1_3, stp1_4); \
+ stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \
+ stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \
+ stp2_7 = _mm_sub_epi16(stp1_0, stp1_7); \
+ \
+ stp2_8 = stp1_8; \
+ stp2_9 = stp1_9; \
+ stp2_14 = stp1_14; \
+ stp2_15 = stp1_15; \
+ \
+ MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, stg6_0, \
+ stg4_0, stg6_0, stg4_0, stp2_10, stp2_13, stp2_11, \
+ stp2_12) \
+ \
+ stp2_16 = _mm_add_epi16(stp1_16, stp1_23); \
+ stp2_17 = _mm_add_epi16(stp1_17, stp1_22); \
+ stp2_18 = _mm_add_epi16(stp1_18, stp1_21); \
+ stp2_19 = _mm_add_epi16(stp1_19, stp1_20); \
+ stp2_20 = _mm_sub_epi16(stp1_19, stp1_20); \
+ stp2_21 = _mm_sub_epi16(stp1_18, stp1_21); \
+ stp2_22 = _mm_sub_epi16(stp1_17, stp1_22); \
+ stp2_23 = _mm_sub_epi16(stp1_16, stp1_23); \
+ \
+ stp2_24 = _mm_sub_epi16(stp1_31, stp1_24); \
+ stp2_25 = _mm_sub_epi16(stp1_30, stp1_25); \
+ stp2_26 = _mm_sub_epi16(stp1_29, stp1_26); \
+ stp2_27 = _mm_sub_epi16(stp1_28, stp1_27); \
+ stp2_28 = _mm_add_epi16(stp1_27, stp1_28); \
+ stp2_29 = _mm_add_epi16(stp1_26, stp1_29); \
+ stp2_30 = _mm_add_epi16(stp1_25, stp1_30); \
+ stp2_31 = _mm_add_epi16(stp1_24, stp1_31); \
+ } \
+ \
+ /* Stage7 */ \
+ { \
+ const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \
+ const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \
+ const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \
+ const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \
+ \
+ const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \
+ const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \
+ const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24); \
+ const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24); \
+ \
+ stp1_0 = _mm_add_epi16(stp2_0, stp2_15); \
+ stp1_1 = _mm_add_epi16(stp2_1, stp2_14); \
+ stp1_2 = _mm_add_epi16(stp2_2, stp2_13); \
+ stp1_3 = _mm_add_epi16(stp2_3, stp2_12); \
+ stp1_4 = _mm_add_epi16(stp2_4, stp2_11); \
+ stp1_5 = _mm_add_epi16(stp2_5, stp2_10); \
+ stp1_6 = _mm_add_epi16(stp2_6, stp2_9); \
+ stp1_7 = _mm_add_epi16(stp2_7, stp2_8); \
+ stp1_8 = _mm_sub_epi16(stp2_7, stp2_8); \
+ stp1_9 = _mm_sub_epi16(stp2_6, stp2_9); \
+ stp1_10 = _mm_sub_epi16(stp2_5, stp2_10); \
+ stp1_11 = _mm_sub_epi16(stp2_4, stp2_11); \
+ stp1_12 = _mm_sub_epi16(stp2_3, stp2_12); \
+ stp1_13 = _mm_sub_epi16(stp2_2, stp2_13); \
+ stp1_14 = _mm_sub_epi16(stp2_1, stp2_14); \
+ stp1_15 = _mm_sub_epi16(stp2_0, stp2_15); \
+ \
+ stp1_16 = stp2_16; \
+ stp1_17 = stp2_17; \
+ stp1_18 = stp2_18; \
+ stp1_19 = stp2_19; \
+ \
+ MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0, \
+ stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, stp1_21, \
+ stp1_26) \
+ MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0, \
+ stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, stp1_23, \
+ stp1_24) \
+ \
+ stp1_28 = stp2_28; \
+ stp1_29 = stp2_29; \
+ stp1_30 = stp2_30; \
+ stp1_31 = stp2_31; \
+ }
+
+#define IDCT32 \
+ /* Stage1 */ \
+ { \
+ const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], in[31]); \
+ const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], in[31]); \
+ const __m128i lo_17_15 = _mm_unpacklo_epi16(in[17], in[15]); \
+ const __m128i hi_17_15 = _mm_unpackhi_epi16(in[17], in[15]); \
+ \
+ const __m128i lo_9_23 = _mm_unpacklo_epi16(in[9], in[23]); \
+ const __m128i hi_9_23 = _mm_unpackhi_epi16(in[9], in[23]); \
+ const __m128i lo_25_7 = _mm_unpacklo_epi16(in[25], in[7]); \
+ const __m128i hi_25_7 = _mm_unpackhi_epi16(in[25], in[7]); \
+ \
+ const __m128i lo_5_27 = _mm_unpacklo_epi16(in[5], in[27]); \
+ const __m128i hi_5_27 = _mm_unpackhi_epi16(in[5], in[27]); \
+ const __m128i lo_21_11 = _mm_unpacklo_epi16(in[21], in[11]); \
+ const __m128i hi_21_11 = _mm_unpackhi_epi16(in[21], in[11]); \
+ \
+ const __m128i lo_13_19 = _mm_unpacklo_epi16(in[13], in[19]); \
+ const __m128i hi_13_19 = _mm_unpackhi_epi16(in[13], in[19]); \
+ const __m128i lo_29_3 = _mm_unpacklo_epi16(in[29], in[3]); \
+ const __m128i hi_29_3 = _mm_unpackhi_epi16(in[29], in[3]); \
+ \
+ MULTIPLICATION_AND_ADD(lo_1_31, hi_1_31, lo_17_15, hi_17_15, stg1_0, \
+ stg1_1, stg1_2, stg1_3, stp1_16, stp1_31, stp1_17, \
+ stp1_30) \
+ MULTIPLICATION_AND_ADD(lo_9_23, hi_9_23, lo_25_7, hi_25_7, stg1_4, stg1_5, \
+ stg1_6, stg1_7, stp1_18, stp1_29, stp1_19, stp1_28) \
+ MULTIPLICATION_AND_ADD(lo_5_27, hi_5_27, lo_21_11, hi_21_11, stg1_8, \
+ stg1_9, stg1_10, stg1_11, stp1_20, stp1_27, \
+ stp1_21, stp1_26) \
+ MULTIPLICATION_AND_ADD(lo_13_19, hi_13_19, lo_29_3, hi_29_3, stg1_12, \
+ stg1_13, stg1_14, stg1_15, stp1_22, stp1_25, \
+ stp1_23, stp1_24) \
+ } \
+ \
+ /* Stage2 */ \
+ { \
+ const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], in[30]); \
+ const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], in[30]); \
+ const __m128i lo_18_14 = _mm_unpacklo_epi16(in[18], in[14]); \
+ const __m128i hi_18_14 = _mm_unpackhi_epi16(in[18], in[14]); \
+ \
+ const __m128i lo_10_22 = _mm_unpacklo_epi16(in[10], in[22]); \
+ const __m128i hi_10_22 = _mm_unpackhi_epi16(in[10], in[22]); \
+ const __m128i lo_26_6 = _mm_unpacklo_epi16(in[26], in[6]); \
+ const __m128i hi_26_6 = _mm_unpackhi_epi16(in[26], in[6]); \
+ \
+ MULTIPLICATION_AND_ADD(lo_2_30, hi_2_30, lo_18_14, hi_18_14, stg2_0, \
+ stg2_1, stg2_2, stg2_3, stp2_8, stp2_15, stp2_9, \
+ stp2_14) \
+ MULTIPLICATION_AND_ADD(lo_10_22, hi_10_22, lo_26_6, hi_26_6, stg2_4, \
+ stg2_5, stg2_6, stg2_7, stp2_10, stp2_13, stp2_11, \
+ stp2_12) \
+ \
+ stp2_16 = _mm_add_epi16(stp1_16, stp1_17); \
+ stp2_17 = _mm_sub_epi16(stp1_16, stp1_17); \
+ stp2_18 = _mm_sub_epi16(stp1_19, stp1_18); \
+ stp2_19 = _mm_add_epi16(stp1_19, stp1_18); \
+ \
+ stp2_20 = _mm_add_epi16(stp1_20, stp1_21); \
+ stp2_21 = _mm_sub_epi16(stp1_20, stp1_21); \
+ stp2_22 = _mm_sub_epi16(stp1_23, stp1_22); \
+ stp2_23 = _mm_add_epi16(stp1_23, stp1_22); \
+ \
+ stp2_24 = _mm_add_epi16(stp1_24, stp1_25); \
+ stp2_25 = _mm_sub_epi16(stp1_24, stp1_25); \
+ stp2_26 = _mm_sub_epi16(stp1_27, stp1_26); \
+ stp2_27 = _mm_add_epi16(stp1_27, stp1_26); \
+ \
+ stp2_28 = _mm_add_epi16(stp1_28, stp1_29); \
+ stp2_29 = _mm_sub_epi16(stp1_28, stp1_29); \
+ stp2_30 = _mm_sub_epi16(stp1_31, stp1_30); \
+ stp2_31 = _mm_add_epi16(stp1_31, stp1_30); \
+ } \
+ \
+ /* Stage3 */ \
+ { \
+ const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], in[28]); \
+ const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], in[28]); \
+ const __m128i lo_20_12 = _mm_unpacklo_epi16(in[20], in[12]); \
+ const __m128i hi_20_12 = _mm_unpackhi_epi16(in[20], in[12]); \
+ \
+ const __m128i lo_17_30 = _mm_unpacklo_epi16(stp2_17, stp2_30); \
+ const __m128i hi_17_30 = _mm_unpackhi_epi16(stp2_17, stp2_30); \
+ const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \
+ const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \
+ \
+ const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \
+ const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \
+ const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \
+ const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \
+ \
+ MULTIPLICATION_AND_ADD(lo_4_28, hi_4_28, lo_20_12, hi_20_12, stg3_0, \
+ stg3_1, stg3_2, stg3_3, stp1_4, stp1_7, stp1_5, \
+ stp1_6) \
+ \
+ stp1_8 = _mm_add_epi16(stp2_8, stp2_9); \
+ stp1_9 = _mm_sub_epi16(stp2_8, stp2_9); \
+ stp1_10 = _mm_sub_epi16(stp2_11, stp2_10); \
+ stp1_11 = _mm_add_epi16(stp2_11, stp2_10); \
+ stp1_12 = _mm_add_epi16(stp2_12, stp2_13); \
+ stp1_13 = _mm_sub_epi16(stp2_12, stp2_13); \
+ stp1_14 = _mm_sub_epi16(stp2_15, stp2_14); \
+ stp1_15 = _mm_add_epi16(stp2_15, stp2_14); \
+ \
+ MULTIPLICATION_AND_ADD(lo_17_30, hi_17_30, lo_18_29, hi_18_29, stg3_4, \
+ stg3_5, stg3_6, stg3_4, stp1_17, stp1_30, stp1_18, \
+ stp1_29) \
+ MULTIPLICATION_AND_ADD(lo_21_26, hi_21_26, lo_22_25, hi_22_25, stg3_8, \
+ stg3_9, stg3_10, stg3_8, stp1_21, stp1_26, stp1_22, \
+ stp1_25) \
+ \
+ stp1_16 = stp2_16; \
+ stp1_31 = stp2_31; \
+ stp1_19 = stp2_19; \
+ stp1_20 = stp2_20; \
+ stp1_23 = stp2_23; \
+ stp1_24 = stp2_24; \
+ stp1_27 = stp2_27; \
+ stp1_28 = stp2_28; \
+ } \
+ \
+ /* Stage4 */ \
+ { \
+ const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], in[16]); \
+ const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], in[16]); \
+ const __m128i lo_8_24 = _mm_unpacklo_epi16(in[8], in[24]); \
+ const __m128i hi_8_24 = _mm_unpackhi_epi16(in[8], in[24]); \
+ \
+ const __m128i lo_9_14 = _mm_unpacklo_epi16(stp1_9, stp1_14); \
+ const __m128i hi_9_14 = _mm_unpackhi_epi16(stp1_9, stp1_14); \
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \
+ const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \
+ \
+ MULTIPLICATION_AND_ADD(lo_0_16, hi_0_16, lo_8_24, hi_8_24, stg4_0, stg4_1, \
+ stg4_2, stg4_3, stp2_0, stp2_1, stp2_2, stp2_3) \
+ \
+ stp2_4 = _mm_add_epi16(stp1_4, stp1_5); \
+ stp2_5 = _mm_sub_epi16(stp1_4, stp1_5); \
+ stp2_6 = _mm_sub_epi16(stp1_7, stp1_6); \
+ stp2_7 = _mm_add_epi16(stp1_7, stp1_6); \
+ \
+ MULTIPLICATION_AND_ADD(lo_9_14, hi_9_14, lo_10_13, hi_10_13, stg4_4, \
+ stg4_5, stg4_6, stg4_4, stp2_9, stp2_14, stp2_10, \
+ stp2_13) \
+ \
+ stp2_8 = stp1_8; \
+ stp2_15 = stp1_15; \
+ stp2_11 = stp1_11; \
+ stp2_12 = stp1_12; \
+ \
+ stp2_16 = _mm_add_epi16(stp1_16, stp1_19); \
+ stp2_17 = _mm_add_epi16(stp1_17, stp1_18); \
+ stp2_18 = _mm_sub_epi16(stp1_17, stp1_18); \
+ stp2_19 = _mm_sub_epi16(stp1_16, stp1_19); \
+ stp2_20 = _mm_sub_epi16(stp1_23, stp1_20); \
+ stp2_21 = _mm_sub_epi16(stp1_22, stp1_21); \
+ stp2_22 = _mm_add_epi16(stp1_22, stp1_21); \
+ stp2_23 = _mm_add_epi16(stp1_23, stp1_20); \
+ \
+ stp2_24 = _mm_add_epi16(stp1_24, stp1_27); \
+ stp2_25 = _mm_add_epi16(stp1_25, stp1_26); \
+ stp2_26 = _mm_sub_epi16(stp1_25, stp1_26); \
+ stp2_27 = _mm_sub_epi16(stp1_24, stp1_27); \
+ stp2_28 = _mm_sub_epi16(stp1_31, stp1_28); \
+ stp2_29 = _mm_sub_epi16(stp1_30, stp1_29); \
+ stp2_30 = _mm_add_epi16(stp1_29, stp1_30); \
+ stp2_31 = _mm_add_epi16(stp1_28, stp1_31); \
+ } \
+ \
+ /* Stage5 */ \
+ { \
+ const __m128i lo_6_5 = _mm_unpacklo_epi16(stp2_6, stp2_5); \
+ const __m128i hi_6_5 = _mm_unpackhi_epi16(stp2_6, stp2_5); \
+ const __m128i lo_18_29 = _mm_unpacklo_epi16(stp2_18, stp2_29); \
+ const __m128i hi_18_29 = _mm_unpackhi_epi16(stp2_18, stp2_29); \
+ \
+ const __m128i lo_19_28 = _mm_unpacklo_epi16(stp2_19, stp2_28); \
+ const __m128i hi_19_28 = _mm_unpackhi_epi16(stp2_19, stp2_28); \
+ const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \
+ const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \
+ \
+ const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \
+ const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \
+ \
+ stp1_0 = _mm_add_epi16(stp2_0, stp2_3); \
+ stp1_1 = _mm_add_epi16(stp2_1, stp2_2); \
+ stp1_2 = _mm_sub_epi16(stp2_1, stp2_2); \
+ stp1_3 = _mm_sub_epi16(stp2_0, stp2_3); \
+ \
+ tmp0 = _mm_madd_epi16(lo_6_5, stg4_1); \
+ tmp1 = _mm_madd_epi16(hi_6_5, stg4_1); \
+ tmp2 = _mm_madd_epi16(lo_6_5, stg4_0); \
+ tmp3 = _mm_madd_epi16(hi_6_5, stg4_0); \
+ \
+ tmp0 = _mm_add_epi32(tmp0, rounding); \
+ tmp1 = _mm_add_epi32(tmp1, rounding); \
+ tmp2 = _mm_add_epi32(tmp2, rounding); \
+ tmp3 = _mm_add_epi32(tmp3, rounding); \
+ \
+ tmp0 = _mm_srai_epi32(tmp0, DCT_CONST_BITS); \
+ tmp1 = _mm_srai_epi32(tmp1, DCT_CONST_BITS); \
+ tmp2 = _mm_srai_epi32(tmp2, DCT_CONST_BITS); \
+ tmp3 = _mm_srai_epi32(tmp3, DCT_CONST_BITS); \
+ \
+ stp1_5 = _mm_packs_epi32(tmp0, tmp1); \
+ stp1_6 = _mm_packs_epi32(tmp2, tmp3); \
+ \
+ stp1_4 = stp2_4; \
+ stp1_7 = stp2_7; \
+ \
+ stp1_8 = _mm_add_epi16(stp2_8, stp2_11); \
+ stp1_9 = _mm_add_epi16(stp2_9, stp2_10); \
+ stp1_10 = _mm_sub_epi16(stp2_9, stp2_10); \
+ stp1_11 = _mm_sub_epi16(stp2_8, stp2_11); \
+ stp1_12 = _mm_sub_epi16(stp2_15, stp2_12); \
+ stp1_13 = _mm_sub_epi16(stp2_14, stp2_13); \
+ stp1_14 = _mm_add_epi16(stp2_14, stp2_13); \
+ stp1_15 = _mm_add_epi16(stp2_15, stp2_12); \
+ \
+ stp1_16 = stp2_16; \
+ stp1_17 = stp2_17; \
+ \
+ MULTIPLICATION_AND_ADD(lo_18_29, hi_18_29, lo_19_28, hi_19_28, stg4_4, \
+ stg4_5, stg4_4, stg4_5, stp1_18, stp1_29, stp1_19, \
+ stp1_28) \
+ MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg4_6, \
+ stg4_4, stg4_6, stg4_4, stp1_20, stp1_27, stp1_21, \
+ stp1_26) \
+ \
+ stp1_22 = stp2_22; \
+ stp1_23 = stp2_23; \
+ stp1_24 = stp2_24; \
+ stp1_25 = stp2_25; \
+ stp1_30 = stp2_30; \
+ stp1_31 = stp2_31; \
+ } \
+ \
+ /* Stage6 */ \
+ { \
+ const __m128i lo_10_13 = _mm_unpacklo_epi16(stp1_10, stp1_13); \
+ const __m128i hi_10_13 = _mm_unpackhi_epi16(stp1_10, stp1_13); \
+ const __m128i lo_11_12 = _mm_unpacklo_epi16(stp1_11, stp1_12); \
+ const __m128i hi_11_12 = _mm_unpackhi_epi16(stp1_11, stp1_12); \
+ \
+ stp2_0 = _mm_add_epi16(stp1_0, stp1_7); \
+ stp2_1 = _mm_add_epi16(stp1_1, stp1_6); \
+ stp2_2 = _mm_add_epi16(stp1_2, stp1_5); \
+ stp2_3 = _mm_add_epi16(stp1_3, stp1_4); \
+ stp2_4 = _mm_sub_epi16(stp1_3, stp1_4); \
+ stp2_5 = _mm_sub_epi16(stp1_2, stp1_5); \
+ stp2_6 = _mm_sub_epi16(stp1_1, stp1_6); \
+ stp2_7 = _mm_sub_epi16(stp1_0, stp1_7); \
+ \
+ stp2_8 = stp1_8; \
+ stp2_9 = stp1_9; \
+ stp2_14 = stp1_14; \
+ stp2_15 = stp1_15; \
+ \
+ MULTIPLICATION_AND_ADD(lo_10_13, hi_10_13, lo_11_12, hi_11_12, stg6_0, \
+ stg4_0, stg6_0, stg4_0, stp2_10, stp2_13, stp2_11, \
+ stp2_12) \
+ \
+ stp2_16 = _mm_add_epi16(stp1_16, stp1_23); \
+ stp2_17 = _mm_add_epi16(stp1_17, stp1_22); \
+ stp2_18 = _mm_add_epi16(stp1_18, stp1_21); \
+ stp2_19 = _mm_add_epi16(stp1_19, stp1_20); \
+ stp2_20 = _mm_sub_epi16(stp1_19, stp1_20); \
+ stp2_21 = _mm_sub_epi16(stp1_18, stp1_21); \
+ stp2_22 = _mm_sub_epi16(stp1_17, stp1_22); \
+ stp2_23 = _mm_sub_epi16(stp1_16, stp1_23); \
+ \
+ stp2_24 = _mm_sub_epi16(stp1_31, stp1_24); \
+ stp2_25 = _mm_sub_epi16(stp1_30, stp1_25); \
+ stp2_26 = _mm_sub_epi16(stp1_29, stp1_26); \
+ stp2_27 = _mm_sub_epi16(stp1_28, stp1_27); \
+ stp2_28 = _mm_add_epi16(stp1_27, stp1_28); \
+ stp2_29 = _mm_add_epi16(stp1_26, stp1_29); \
+ stp2_30 = _mm_add_epi16(stp1_25, stp1_30); \
+ stp2_31 = _mm_add_epi16(stp1_24, stp1_31); \
+ } \
+ \
+ /* Stage7 */ \
+ { \
+ const __m128i lo_20_27 = _mm_unpacklo_epi16(stp2_20, stp2_27); \
+ const __m128i hi_20_27 = _mm_unpackhi_epi16(stp2_20, stp2_27); \
+ const __m128i lo_21_26 = _mm_unpacklo_epi16(stp2_21, stp2_26); \
+ const __m128i hi_21_26 = _mm_unpackhi_epi16(stp2_21, stp2_26); \
+ \
+ const __m128i lo_22_25 = _mm_unpacklo_epi16(stp2_22, stp2_25); \
+ const __m128i hi_22_25 = _mm_unpackhi_epi16(stp2_22, stp2_25); \
+ const __m128i lo_23_24 = _mm_unpacklo_epi16(stp2_23, stp2_24); \
+ const __m128i hi_23_24 = _mm_unpackhi_epi16(stp2_23, stp2_24); \
+ \
+ stp1_0 = _mm_add_epi16(stp2_0, stp2_15); \
+ stp1_1 = _mm_add_epi16(stp2_1, stp2_14); \
+ stp1_2 = _mm_add_epi16(stp2_2, stp2_13); \
+ stp1_3 = _mm_add_epi16(stp2_3, stp2_12); \
+ stp1_4 = _mm_add_epi16(stp2_4, stp2_11); \
+ stp1_5 = _mm_add_epi16(stp2_5, stp2_10); \
+ stp1_6 = _mm_add_epi16(stp2_6, stp2_9); \
+ stp1_7 = _mm_add_epi16(stp2_7, stp2_8); \
+ stp1_8 = _mm_sub_epi16(stp2_7, stp2_8); \
+ stp1_9 = _mm_sub_epi16(stp2_6, stp2_9); \
+ stp1_10 = _mm_sub_epi16(stp2_5, stp2_10); \
+ stp1_11 = _mm_sub_epi16(stp2_4, stp2_11); \
+ stp1_12 = _mm_sub_epi16(stp2_3, stp2_12); \
+ stp1_13 = _mm_sub_epi16(stp2_2, stp2_13); \
+ stp1_14 = _mm_sub_epi16(stp2_1, stp2_14); \
+ stp1_15 = _mm_sub_epi16(stp2_0, stp2_15); \
+ \
+ stp1_16 = stp2_16; \
+ stp1_17 = stp2_17; \
+ stp1_18 = stp2_18; \
+ stp1_19 = stp2_19; \
+ \
+ MULTIPLICATION_AND_ADD(lo_20_27, hi_20_27, lo_21_26, hi_21_26, stg6_0, \
+ stg4_0, stg6_0, stg4_0, stp1_20, stp1_27, stp1_21, \
+ stp1_26) \
+ MULTIPLICATION_AND_ADD(lo_22_25, hi_22_25, lo_23_24, hi_23_24, stg6_0, \
+ stg4_0, stg6_0, stg4_0, stp1_22, stp1_25, stp1_23, \
+ stp1_24) \
+ \
+ stp1_28 = stp2_28; \
+ stp1_29 = stp2_29; \
+ stp1_30 = stp2_30; \
+ stp1_31 = stp2_31; \
+ }
+
+// Only upper-left 8x8 has non-zero coeff
+void vp10_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
+ int stride) {
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i final_rounding = _mm_set1_epi16(1 << 5);
+
+ // vp10_idct constants for each stage
+ const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64);
+ const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64);
+ const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64);
+ const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64);
+ const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64);
+ const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64);
+ const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64);
+ const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64);
+
+ const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+ const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64);
+ const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+ const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64);
+
+ const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64);
+ const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64);
+ const __m128i stg3_8 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64);
+
+ const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+
+ const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+
+ __m128i in[32], col[32];
+ __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7,
+ stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15,
+ stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, stp1_23,
+ stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, stp1_30, stp1_31;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7,
+ stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15,
+ stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, stp2_23,
+ stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, stp2_30, stp2_31;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int i;
+
+ // Load input data. Only need to load the top left 8x8 block.
+ in[0] = _mm_load_si128((const __m128i *)input);
+ in[1] = _mm_load_si128((const __m128i *)(input + 32));
+ in[2] = _mm_load_si128((const __m128i *)(input + 64));
+ in[3] = _mm_load_si128((const __m128i *)(input + 96));
+ in[4] = _mm_load_si128((const __m128i *)(input + 128));
+ in[5] = _mm_load_si128((const __m128i *)(input + 160));
+ in[6] = _mm_load_si128((const __m128i *)(input + 192));
+ in[7] = _mm_load_si128((const __m128i *)(input + 224));
+
+ for (i = 8; i < 32; ++i) {
+ in[i] = _mm_setzero_si128();
+ }
+
+ array_transpose_8x8(in, in);
+ // TODO(hkuang): Following transposes are unnecessary. But remove them will
+ // lead to performance drop on some devices.
+ array_transpose_8x8(in + 8, in + 8);
+ array_transpose_8x8(in + 16, in + 16);
+ array_transpose_8x8(in + 24, in + 24);
+
+ IDCT32_34
+
+ // 1_D: Store 32 intermediate results for each 8x32 block.
+ col[0] = _mm_add_epi16(stp1_0, stp1_31);
+ col[1] = _mm_add_epi16(stp1_1, stp1_30);
+ col[2] = _mm_add_epi16(stp1_2, stp1_29);
+ col[3] = _mm_add_epi16(stp1_3, stp1_28);
+ col[4] = _mm_add_epi16(stp1_4, stp1_27);
+ col[5] = _mm_add_epi16(stp1_5, stp1_26);
+ col[6] = _mm_add_epi16(stp1_6, stp1_25);
+ col[7] = _mm_add_epi16(stp1_7, stp1_24);
+ col[8] = _mm_add_epi16(stp1_8, stp1_23);
+ col[9] = _mm_add_epi16(stp1_9, stp1_22);
+ col[10] = _mm_add_epi16(stp1_10, stp1_21);
+ col[11] = _mm_add_epi16(stp1_11, stp1_20);
+ col[12] = _mm_add_epi16(stp1_12, stp1_19);
+ col[13] = _mm_add_epi16(stp1_13, stp1_18);
+ col[14] = _mm_add_epi16(stp1_14, stp1_17);
+ col[15] = _mm_add_epi16(stp1_15, stp1_16);
+ col[16] = _mm_sub_epi16(stp1_15, stp1_16);
+ col[17] = _mm_sub_epi16(stp1_14, stp1_17);
+ col[18] = _mm_sub_epi16(stp1_13, stp1_18);
+ col[19] = _mm_sub_epi16(stp1_12, stp1_19);
+ col[20] = _mm_sub_epi16(stp1_11, stp1_20);
+ col[21] = _mm_sub_epi16(stp1_10, stp1_21);
+ col[22] = _mm_sub_epi16(stp1_9, stp1_22);
+ col[23] = _mm_sub_epi16(stp1_8, stp1_23);
+ col[24] = _mm_sub_epi16(stp1_7, stp1_24);
+ col[25] = _mm_sub_epi16(stp1_6, stp1_25);
+ col[26] = _mm_sub_epi16(stp1_5, stp1_26);
+ col[27] = _mm_sub_epi16(stp1_4, stp1_27);
+ col[28] = _mm_sub_epi16(stp1_3, stp1_28);
+ col[29] = _mm_sub_epi16(stp1_2, stp1_29);
+ col[30] = _mm_sub_epi16(stp1_1, stp1_30);
+ col[31] = _mm_sub_epi16(stp1_0, stp1_31);
+ for (i = 0; i < 4; i++) {
+ int j;
+ const __m128i zero = _mm_setzero_si128();
+ // Transpose 32x8 block to 8x32 block
+ array_transpose_8x8(col + i * 8, in);
+ IDCT32_34
+
+ // 2_D: Calculate the results and store them to destination.
+ in[0] = _mm_add_epi16(stp1_0, stp1_31);
+ in[1] = _mm_add_epi16(stp1_1, stp1_30);
+ in[2] = _mm_add_epi16(stp1_2, stp1_29);
+ in[3] = _mm_add_epi16(stp1_3, stp1_28);
+ in[4] = _mm_add_epi16(stp1_4, stp1_27);
+ in[5] = _mm_add_epi16(stp1_5, stp1_26);
+ in[6] = _mm_add_epi16(stp1_6, stp1_25);
+ in[7] = _mm_add_epi16(stp1_7, stp1_24);
+ in[8] = _mm_add_epi16(stp1_8, stp1_23);
+ in[9] = _mm_add_epi16(stp1_9, stp1_22);
+ in[10] = _mm_add_epi16(stp1_10, stp1_21);
+ in[11] = _mm_add_epi16(stp1_11, stp1_20);
+ in[12] = _mm_add_epi16(stp1_12, stp1_19);
+ in[13] = _mm_add_epi16(stp1_13, stp1_18);
+ in[14] = _mm_add_epi16(stp1_14, stp1_17);
+ in[15] = _mm_add_epi16(stp1_15, stp1_16);
+ in[16] = _mm_sub_epi16(stp1_15, stp1_16);
+ in[17] = _mm_sub_epi16(stp1_14, stp1_17);
+ in[18] = _mm_sub_epi16(stp1_13, stp1_18);
+ in[19] = _mm_sub_epi16(stp1_12, stp1_19);
+ in[20] = _mm_sub_epi16(stp1_11, stp1_20);
+ in[21] = _mm_sub_epi16(stp1_10, stp1_21);
+ in[22] = _mm_sub_epi16(stp1_9, stp1_22);
+ in[23] = _mm_sub_epi16(stp1_8, stp1_23);
+ in[24] = _mm_sub_epi16(stp1_7, stp1_24);
+ in[25] = _mm_sub_epi16(stp1_6, stp1_25);
+ in[26] = _mm_sub_epi16(stp1_5, stp1_26);
+ in[27] = _mm_sub_epi16(stp1_4, stp1_27);
+ in[28] = _mm_sub_epi16(stp1_3, stp1_28);
+ in[29] = _mm_sub_epi16(stp1_2, stp1_29);
+ in[30] = _mm_sub_epi16(stp1_1, stp1_30);
+ in[31] = _mm_sub_epi16(stp1_0, stp1_31);
+
+ for (j = 0; j < 32; ++j) {
+ // Final rounding and shift
+ in[j] = _mm_adds_epi16(in[j], final_rounding);
+ in[j] = _mm_srai_epi16(in[j], 6);
+ RECON_AND_STORE(dest + j * stride, in[j]);
+ }
+
+ dest += 8;
+ }
+}
+
+void vp10_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
+ int stride) {
+ const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i final_rounding = _mm_set1_epi16(1 << 5);
+ const __m128i zero = _mm_setzero_si128();
+
+ // vp10_idct constants for each stage
+ const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64);
+ const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64);
+ const __m128i stg1_2 = pair_set_epi16(cospi_15_64, -cospi_17_64);
+ const __m128i stg1_3 = pair_set_epi16(cospi_17_64, cospi_15_64);
+ const __m128i stg1_4 = pair_set_epi16(cospi_23_64, -cospi_9_64);
+ const __m128i stg1_5 = pair_set_epi16(cospi_9_64, cospi_23_64);
+ const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64);
+ const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64);
+ const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64);
+ const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64);
+ const __m128i stg1_10 = pair_set_epi16(cospi_11_64, -cospi_21_64);
+ const __m128i stg1_11 = pair_set_epi16(cospi_21_64, cospi_11_64);
+ const __m128i stg1_12 = pair_set_epi16(cospi_19_64, -cospi_13_64);
+ const __m128i stg1_13 = pair_set_epi16(cospi_13_64, cospi_19_64);
+ const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64);
+ const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64);
+
+ const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+ const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64);
+ const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+ const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64);
+ const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+ const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64);
+ const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+ const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64);
+
+ const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+ const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64);
+ const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64);
+ const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64);
+ const __m128i stg3_8 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i stg3_9 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i stg3_10 = pair_set_epi16(-cospi_12_64, -cospi_20_64);
+
+ const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
+ const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+
+ const __m128i stg6_0 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+
+ __m128i in[32], col[128], zero_idx[16];
+ __m128i stp1_0, stp1_1, stp1_2, stp1_3, stp1_4, stp1_5, stp1_6, stp1_7,
+ stp1_8, stp1_9, stp1_10, stp1_11, stp1_12, stp1_13, stp1_14, stp1_15,
+ stp1_16, stp1_17, stp1_18, stp1_19, stp1_20, stp1_21, stp1_22, stp1_23,
+ stp1_24, stp1_25, stp1_26, stp1_27, stp1_28, stp1_29, stp1_30, stp1_31;
+ __m128i stp2_0, stp2_1, stp2_2, stp2_3, stp2_4, stp2_5, stp2_6, stp2_7,
+ stp2_8, stp2_9, stp2_10, stp2_11, stp2_12, stp2_13, stp2_14, stp2_15,
+ stp2_16, stp2_17, stp2_18, stp2_19, stp2_20, stp2_21, stp2_22, stp2_23,
+ stp2_24, stp2_25, stp2_26, stp2_27, stp2_28, stp2_29, stp2_30, stp2_31;
+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int i, j, i32;
+
+ for (i = 0; i < 4; i++) {
+ i32 = (i << 5);
+ // First 1-D vp10_idct
+ // Load input data.
+ LOAD_DQCOEFF(in[0], input);
+ LOAD_DQCOEFF(in[8], input);
+ LOAD_DQCOEFF(in[16], input);
+ LOAD_DQCOEFF(in[24], input);
+ LOAD_DQCOEFF(in[1], input);
+ LOAD_DQCOEFF(in[9], input);
+ LOAD_DQCOEFF(in[17], input);
+ LOAD_DQCOEFF(in[25], input);
+ LOAD_DQCOEFF(in[2], input);
+ LOAD_DQCOEFF(in[10], input);
+ LOAD_DQCOEFF(in[18], input);
+ LOAD_DQCOEFF(in[26], input);
+ LOAD_DQCOEFF(in[3], input);
+ LOAD_DQCOEFF(in[11], input);
+ LOAD_DQCOEFF(in[19], input);
+ LOAD_DQCOEFF(in[27], input);
+
+ LOAD_DQCOEFF(in[4], input);
+ LOAD_DQCOEFF(in[12], input);
+ LOAD_DQCOEFF(in[20], input);
+ LOAD_DQCOEFF(in[28], input);
+ LOAD_DQCOEFF(in[5], input);
+ LOAD_DQCOEFF(in[13], input);
+ LOAD_DQCOEFF(in[21], input);
+ LOAD_DQCOEFF(in[29], input);
+ LOAD_DQCOEFF(in[6], input);
+ LOAD_DQCOEFF(in[14], input);
+ LOAD_DQCOEFF(in[22], input);
+ LOAD_DQCOEFF(in[30], input);
+ LOAD_DQCOEFF(in[7], input);
+ LOAD_DQCOEFF(in[15], input);
+ LOAD_DQCOEFF(in[23], input);
+ LOAD_DQCOEFF(in[31], input);
+
+ // checking if all entries are zero
+ zero_idx[0] = _mm_or_si128(in[0], in[1]);
+ zero_idx[1] = _mm_or_si128(in[2], in[3]);
+ zero_idx[2] = _mm_or_si128(in[4], in[5]);
+ zero_idx[3] = _mm_or_si128(in[6], in[7]);
+ zero_idx[4] = _mm_or_si128(in[8], in[9]);
+ zero_idx[5] = _mm_or_si128(in[10], in[11]);
+ zero_idx[6] = _mm_or_si128(in[12], in[13]);
+ zero_idx[7] = _mm_or_si128(in[14], in[15]);
+ zero_idx[8] = _mm_or_si128(in[16], in[17]);
+ zero_idx[9] = _mm_or_si128(in[18], in[19]);
+ zero_idx[10] = _mm_or_si128(in[20], in[21]);
+ zero_idx[11] = _mm_or_si128(in[22], in[23]);
+ zero_idx[12] = _mm_or_si128(in[24], in[25]);
+ zero_idx[13] = _mm_or_si128(in[26], in[27]);
+ zero_idx[14] = _mm_or_si128(in[28], in[29]);
+ zero_idx[15] = _mm_or_si128(in[30], in[31]);
+
+ zero_idx[0] = _mm_or_si128(zero_idx[0], zero_idx[1]);
+ zero_idx[1] = _mm_or_si128(zero_idx[2], zero_idx[3]);
+ zero_idx[2] = _mm_or_si128(zero_idx[4], zero_idx[5]);
+ zero_idx[3] = _mm_or_si128(zero_idx[6], zero_idx[7]);
+ zero_idx[4] = _mm_or_si128(zero_idx[8], zero_idx[9]);
+ zero_idx[5] = _mm_or_si128(zero_idx[10], zero_idx[11]);
+ zero_idx[6] = _mm_or_si128(zero_idx[12], zero_idx[13]);
+ zero_idx[7] = _mm_or_si128(zero_idx[14], zero_idx[15]);
+
+ zero_idx[8] = _mm_or_si128(zero_idx[0], zero_idx[1]);
+ zero_idx[9] = _mm_or_si128(zero_idx[2], zero_idx[3]);
+ zero_idx[10] = _mm_or_si128(zero_idx[4], zero_idx[5]);
+ zero_idx[11] = _mm_or_si128(zero_idx[6], zero_idx[7]);
+ zero_idx[12] = _mm_or_si128(zero_idx[8], zero_idx[9]);
+ zero_idx[13] = _mm_or_si128(zero_idx[10], zero_idx[11]);
+ zero_idx[14] = _mm_or_si128(zero_idx[12], zero_idx[13]);
+
+ if (_mm_movemask_epi8(_mm_cmpeq_epi32(zero_idx[14], zero)) == 0xFFFF) {
+ col[i32 + 0] = _mm_setzero_si128();
+ col[i32 + 1] = _mm_setzero_si128();
+ col[i32 + 2] = _mm_setzero_si128();
+ col[i32 + 3] = _mm_setzero_si128();
+ col[i32 + 4] = _mm_setzero_si128();
+ col[i32 + 5] = _mm_setzero_si128();
+ col[i32 + 6] = _mm_setzero_si128();
+ col[i32 + 7] = _mm_setzero_si128();
+ col[i32 + 8] = _mm_setzero_si128();
+ col[i32 + 9] = _mm_setzero_si128();
+ col[i32 + 10] = _mm_setzero_si128();
+ col[i32 + 11] = _mm_setzero_si128();
+ col[i32 + 12] = _mm_setzero_si128();
+ col[i32 + 13] = _mm_setzero_si128();
+ col[i32 + 14] = _mm_setzero_si128();
+ col[i32 + 15] = _mm_setzero_si128();
+ col[i32 + 16] = _mm_setzero_si128();
+ col[i32 + 17] = _mm_setzero_si128();
+ col[i32 + 18] = _mm_setzero_si128();
+ col[i32 + 19] = _mm_setzero_si128();
+ col[i32 + 20] = _mm_setzero_si128();
+ col[i32 + 21] = _mm_setzero_si128();
+ col[i32 + 22] = _mm_setzero_si128();
+ col[i32 + 23] = _mm_setzero_si128();
+ col[i32 + 24] = _mm_setzero_si128();
+ col[i32 + 25] = _mm_setzero_si128();
+ col[i32 + 26] = _mm_setzero_si128();
+ col[i32 + 27] = _mm_setzero_si128();
+ col[i32 + 28] = _mm_setzero_si128();
+ col[i32 + 29] = _mm_setzero_si128();
+ col[i32 + 30] = _mm_setzero_si128();
+ col[i32 + 31] = _mm_setzero_si128();
+ continue;
+ }
+
+ // Transpose 32x8 block to 8x32 block
+ array_transpose_8x8(in, in);
+ array_transpose_8x8(in + 8, in + 8);
+ array_transpose_8x8(in + 16, in + 16);
+ array_transpose_8x8(in + 24, in + 24);
+
+ IDCT32
+
+ // 1_D: Store 32 intermediate results for each 8x32 block.
+ col[i32 + 0] = _mm_add_epi16(stp1_0, stp1_31);
+ col[i32 + 1] = _mm_add_epi16(stp1_1, stp1_30);
+ col[i32 + 2] = _mm_add_epi16(stp1_2, stp1_29);
+ col[i32 + 3] = _mm_add_epi16(stp1_3, stp1_28);
+ col[i32 + 4] = _mm_add_epi16(stp1_4, stp1_27);
+ col[i32 + 5] = _mm_add_epi16(stp1_5, stp1_26);
+ col[i32 + 6] = _mm_add_epi16(stp1_6, stp1_25);
+ col[i32 + 7] = _mm_add_epi16(stp1_7, stp1_24);
+ col[i32 + 8] = _mm_add_epi16(stp1_8, stp1_23);
+ col[i32 + 9] = _mm_add_epi16(stp1_9, stp1_22);
+ col[i32 + 10] = _mm_add_epi16(stp1_10, stp1_21);
+ col[i32 + 11] = _mm_add_epi16(stp1_11, stp1_20);
+ col[i32 + 12] = _mm_add_epi16(stp1_12, stp1_19);
+ col[i32 + 13] = _mm_add_epi16(stp1_13, stp1_18);
+ col[i32 + 14] = _mm_add_epi16(stp1_14, stp1_17);
+ col[i32 + 15] = _mm_add_epi16(stp1_15, stp1_16);
+ col[i32 + 16] = _mm_sub_epi16(stp1_15, stp1_16);
+ col[i32 + 17] = _mm_sub_epi16(stp1_14, stp1_17);
+ col[i32 + 18] = _mm_sub_epi16(stp1_13, stp1_18);
+ col[i32 + 19] = _mm_sub_epi16(stp1_12, stp1_19);
+ col[i32 + 20] = _mm_sub_epi16(stp1_11, stp1_20);
+ col[i32 + 21] = _mm_sub_epi16(stp1_10, stp1_21);
+ col[i32 + 22] = _mm_sub_epi16(stp1_9, stp1_22);
+ col[i32 + 23] = _mm_sub_epi16(stp1_8, stp1_23);
+ col[i32 + 24] = _mm_sub_epi16(stp1_7, stp1_24);
+ col[i32 + 25] = _mm_sub_epi16(stp1_6, stp1_25);
+ col[i32 + 26] = _mm_sub_epi16(stp1_5, stp1_26);
+ col[i32 + 27] = _mm_sub_epi16(stp1_4, stp1_27);
+ col[i32 + 28] = _mm_sub_epi16(stp1_3, stp1_28);
+ col[i32 + 29] = _mm_sub_epi16(stp1_2, stp1_29);
+ col[i32 + 30] = _mm_sub_epi16(stp1_1, stp1_30);
+ col[i32 + 31] = _mm_sub_epi16(stp1_0, stp1_31);
+ }
+ for (i = 0; i < 4; i++) {
+ // Second 1-D vp10_idct
+ j = i << 3;
+
+ // Transpose 32x8 block to 8x32 block
+ array_transpose_8x8(col + j, in);
+ array_transpose_8x8(col + j + 32, in + 8);
+ array_transpose_8x8(col + j + 64, in + 16);
+ array_transpose_8x8(col + j + 96, in + 24);
+
+ IDCT32
+
+ // 2_D: Calculate the results and store them to destination.
+ in[0] = _mm_add_epi16(stp1_0, stp1_31);
+ in[1] = _mm_add_epi16(stp1_1, stp1_30);
+ in[2] = _mm_add_epi16(stp1_2, stp1_29);
+ in[3] = _mm_add_epi16(stp1_3, stp1_28);
+ in[4] = _mm_add_epi16(stp1_4, stp1_27);
+ in[5] = _mm_add_epi16(stp1_5, stp1_26);
+ in[6] = _mm_add_epi16(stp1_6, stp1_25);
+ in[7] = _mm_add_epi16(stp1_7, stp1_24);
+ in[8] = _mm_add_epi16(stp1_8, stp1_23);
+ in[9] = _mm_add_epi16(stp1_9, stp1_22);
+ in[10] = _mm_add_epi16(stp1_10, stp1_21);
+ in[11] = _mm_add_epi16(stp1_11, stp1_20);
+ in[12] = _mm_add_epi16(stp1_12, stp1_19);
+ in[13] = _mm_add_epi16(stp1_13, stp1_18);
+ in[14] = _mm_add_epi16(stp1_14, stp1_17);
+ in[15] = _mm_add_epi16(stp1_15, stp1_16);
+ in[16] = _mm_sub_epi16(stp1_15, stp1_16);
+ in[17] = _mm_sub_epi16(stp1_14, stp1_17);
+ in[18] = _mm_sub_epi16(stp1_13, stp1_18);
+ in[19] = _mm_sub_epi16(stp1_12, stp1_19);
+ in[20] = _mm_sub_epi16(stp1_11, stp1_20);
+ in[21] = _mm_sub_epi16(stp1_10, stp1_21);
+ in[22] = _mm_sub_epi16(stp1_9, stp1_22);
+ in[23] = _mm_sub_epi16(stp1_8, stp1_23);
+ in[24] = _mm_sub_epi16(stp1_7, stp1_24);
+ in[25] = _mm_sub_epi16(stp1_6, stp1_25);
+ in[26] = _mm_sub_epi16(stp1_5, stp1_26);
+ in[27] = _mm_sub_epi16(stp1_4, stp1_27);
+ in[28] = _mm_sub_epi16(stp1_3, stp1_28);
+ in[29] = _mm_sub_epi16(stp1_2, stp1_29);
+ in[30] = _mm_sub_epi16(stp1_1, stp1_30);
+ in[31] = _mm_sub_epi16(stp1_0, stp1_31);
+
+ for (j = 0; j < 32; ++j) {
+ // Final rounding and shift
+ in[j] = _mm_adds_epi16(in[j], final_rounding);
+ in[j] = _mm_srai_epi16(in[j], 6);
+ RECON_AND_STORE(dest + j * stride, in[j]);
+ }
+
+ dest += 8;
+ }
+}
+
+void vp10_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest,
+ int stride) {
+ __m128i dc_value;
+ const __m128i zero = _mm_setzero_si128();
+ int a, i;
+
+ a = (int)dct_const_round_shift(input[0] * cospi_16_64);
+ a = (int)dct_const_round_shift(a * cospi_16_64);
+ a = ROUND_POWER_OF_TWO(a, 6);
+
+ dc_value = _mm_set1_epi16(a);
+
+ for (i = 0; i < 4; ++i) {
+ int j;
+ for (j = 0; j < 32; ++j) {
+ RECON_AND_STORE(dest + j * stride, dc_value);
+ }
+ dest += 8;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE __m128i clamp_high_sse2(__m128i value, int bd) {
+ __m128i ubounded, retval;
+ const __m128i zero = _mm_set1_epi16(0);
+ const __m128i one = _mm_set1_epi16(1);
+ const __m128i max = _mm_subs_epi16(_mm_slli_epi16(one, bd), one);
+ ubounded = _mm_cmpgt_epi16(value, max);
+ retval = _mm_andnot_si128(ubounded, value);
+ ubounded = _mm_and_si128(ubounded, max);
+ retval = _mm_or_si128(retval, ubounded);
+ retval = _mm_and_si128(retval, _mm_cmpgt_epi16(retval, zero));
+ return retval;
+}
+
+void vp10_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ tran_low_t out[4 * 4];
+ tran_low_t *outptr = out;
+ int i, j;
+ __m128i inptr[4];
+ __m128i sign_bits[2];
+ __m128i temp_mm, min_input, max_input;
+ int test;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ int optimised_cols = 0;
+ const __m128i zero = _mm_set1_epi16(0);
+ const __m128i eight = _mm_set1_epi16(8);
+ const __m128i max = _mm_set1_epi16(12043);
+ const __m128i min = _mm_set1_epi16(-12043);
+ // Load input into __m128i
+ inptr[0] = _mm_loadu_si128((const __m128i *)input);
+ inptr[1] = _mm_loadu_si128((const __m128i *)(input + 4));
+ inptr[2] = _mm_loadu_si128((const __m128i *)(input + 8));
+ inptr[3] = _mm_loadu_si128((const __m128i *)(input + 12));
+
+ // Pack to 16 bits
+ inptr[0] = _mm_packs_epi32(inptr[0], inptr[1]);
+ inptr[1] = _mm_packs_epi32(inptr[2], inptr[3]);
+
+ max_input = _mm_max_epi16(inptr[0], inptr[1]);
+ min_input = _mm_min_epi16(inptr[0], inptr[1]);
+ max_input = _mm_cmpgt_epi16(max_input, max);
+ min_input = _mm_cmplt_epi16(min_input, min);
+ temp_mm = _mm_or_si128(max_input, min_input);
+ test = _mm_movemask_epi8(temp_mm);
+
+ if (!test) {
+ // Do the row transform
+ vp10_idct4_sse2(inptr);
+
+ // Check the min & max values
+ max_input = _mm_max_epi16(inptr[0], inptr[1]);
+ min_input = _mm_min_epi16(inptr[0], inptr[1]);
+ max_input = _mm_cmpgt_epi16(max_input, max);
+ min_input = _mm_cmplt_epi16(min_input, min);
+ temp_mm = _mm_or_si128(max_input, min_input);
+ test = _mm_movemask_epi8(temp_mm);
+
+ if (test) {
+ transpose_4x4(inptr);
+ sign_bits[0] = _mm_cmplt_epi16(inptr[0], zero);
+ sign_bits[1] = _mm_cmplt_epi16(inptr[1], zero);
+ inptr[3] = _mm_unpackhi_epi16(inptr[1], sign_bits[1]);
+ inptr[2] = _mm_unpacklo_epi16(inptr[1], sign_bits[1]);
+ inptr[1] = _mm_unpackhi_epi16(inptr[0], sign_bits[0]);
+ inptr[0] = _mm_unpacklo_epi16(inptr[0], sign_bits[0]);
+ _mm_storeu_si128((__m128i *)outptr, inptr[0]);
+ _mm_storeu_si128((__m128i *)(outptr + 4), inptr[1]);
+ _mm_storeu_si128((__m128i *)(outptr + 8), inptr[2]);
+ _mm_storeu_si128((__m128i *)(outptr + 12), inptr[3]);
+ } else {
+ // Set to use the optimised transform for the column
+ optimised_cols = 1;
+ }
+ } else {
+ // Run the un-optimised row transform
+ for (i = 0; i < 4; ++i) {
+ vp10_highbd_idct4_c(input, outptr, bd);
+ input += 4;
+ outptr += 4;
+ }
+ }
+
+ if (optimised_cols) {
+ vp10_idct4_sse2(inptr);
+
+ // Final round and shift
+ inptr[0] = _mm_add_epi16(inptr[0], eight);
+ inptr[1] = _mm_add_epi16(inptr[1], eight);
+
+ inptr[0] = _mm_srai_epi16(inptr[0], 4);
+ inptr[1] = _mm_srai_epi16(inptr[1], 4);
+
+ // Reconstruction and Store
+ {
+ __m128i d0 = _mm_loadl_epi64((const __m128i *)dest);
+ __m128i d2 = _mm_loadl_epi64((const __m128i *)(dest + stride * 2));
+ d0 = _mm_unpacklo_epi64(
+ d0, _mm_loadl_epi64((const __m128i *)(dest + stride)));
+ d2 = _mm_unpacklo_epi64(
+ d2, _mm_loadl_epi64((const __m128i *)(dest + stride * 3)));
+ d0 = clamp_high_sse2(_mm_adds_epi16(d0, inptr[0]), bd);
+ d2 = clamp_high_sse2(_mm_adds_epi16(d2, inptr[1]), bd);
+ // store input0
+ _mm_storel_epi64((__m128i *)dest, d0);
+ // store input1
+ d0 = _mm_srli_si128(d0, 8);
+ _mm_storel_epi64((__m128i *)(dest + stride), d0);
+ // store input2
+ _mm_storel_epi64((__m128i *)(dest + stride * 2), d2);
+ // store input3
+ d2 = _mm_srli_si128(d2, 8);
+ _mm_storel_epi64((__m128i *)(dest + stride * 3), d2);
+ }
+ } else {
+ // Run the un-optimised column transform
+ tran_low_t temp_in[4], temp_out[4];
+ // Columns
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
+ vp10_highbd_idct4_c(temp_in, temp_out, bd);
+ for (j = 0; j < 4; ++j) {
+ dest[j * stride + i] = highbd_clip_pixel_add(
+ dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
+ }
+ }
+ }
+}
+
+void vp10_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ tran_low_t out[8 * 8];
+ tran_low_t *outptr = out;
+ int i, j, test;
+ __m128i inptr[8];
+ __m128i min_input, max_input, temp1, temp2, sign_bits;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ const __m128i zero = _mm_set1_epi16(0);
+ const __m128i sixteen = _mm_set1_epi16(16);
+ const __m128i max = _mm_set1_epi16(6201);
+ const __m128i min = _mm_set1_epi16(-6201);
+ int optimised_cols = 0;
+
+ // Load input into __m128i & pack to 16 bits
+ for (i = 0; i < 8; i++) {
+ temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i));
+ temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4));
+ inptr[i] = _mm_packs_epi32(temp1, temp2);
+ }
+
+ // Find the min & max for the row transform
+ max_input = _mm_max_epi16(inptr[0], inptr[1]);
+ min_input = _mm_min_epi16(inptr[0], inptr[1]);
+ for (i = 2; i < 8; i++) {
+ max_input = _mm_max_epi16(max_input, inptr[i]);
+ min_input = _mm_min_epi16(min_input, inptr[i]);
+ }
+ max_input = _mm_cmpgt_epi16(max_input, max);
+ min_input = _mm_cmplt_epi16(min_input, min);
+ temp1 = _mm_or_si128(max_input, min_input);
+ test = _mm_movemask_epi8(temp1);
+
+ if (!test) {
+ // Do the row transform
+ vp10_idct8_sse2(inptr);
+
+ // Find the min & max for the column transform
+ max_input = _mm_max_epi16(inptr[0], inptr[1]);
+ min_input = _mm_min_epi16(inptr[0], inptr[1]);
+ for (i = 2; i < 8; i++) {
+ max_input = _mm_max_epi16(max_input, inptr[i]);
+ min_input = _mm_min_epi16(min_input, inptr[i]);
+ }
+ max_input = _mm_cmpgt_epi16(max_input, max);
+ min_input = _mm_cmplt_epi16(min_input, min);
+ temp1 = _mm_or_si128(max_input, min_input);
+ test = _mm_movemask_epi8(temp1);
+
+ if (test) {
+ array_transpose_8x8(inptr, inptr);
+ for (i = 0; i < 8; i++) {
+ sign_bits = _mm_cmplt_epi16(inptr[i], zero);
+ temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits);
+ temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits);
+ _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1);
+ _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2);
+ }
+ } else {
+ // Set to use the optimised transform for the column
+ optimised_cols = 1;
+ }
+ } else {
+ // Run the un-optimised row transform
+ for (i = 0; i < 8; ++i) {
+ vp10_highbd_idct8_c(input, outptr, bd);
+ input += 8;
+ outptr += 8;
+ }
+ }
+
+ if (optimised_cols) {
+ vp10_idct8_sse2(inptr);
+
+ // Final round & shift and Reconstruction and Store
+ {
+ __m128i d[8];
+ for (i = 0; i < 8; i++) {
+ inptr[i] = _mm_add_epi16(inptr[i], sixteen);
+ d[i] = _mm_loadu_si128((const __m128i *)(dest + stride * i));
+ inptr[i] = _mm_srai_epi16(inptr[i], 5);
+ d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd);
+ // Store
+ _mm_storeu_si128((__m128i *)(dest + stride * i), d[i]);
+ }
+ }
+ } else {
+ // Run the un-optimised column transform
+ tran_low_t temp_in[8], temp_out[8];
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+ vp10_highbd_idct8_c(temp_in, temp_out, bd);
+ for (j = 0; j < 8; ++j) {
+ dest[j * stride + i] = highbd_clip_pixel_add(
+ dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
+ }
+ }
+ }
+}
+
+void vp10_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ tran_low_t out[8 * 8] = { 0 };
+ tran_low_t *outptr = out;
+ int i, j, test;
+ __m128i inptr[8];
+ __m128i min_input, max_input, temp1, temp2, sign_bits;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ const __m128i zero = _mm_set1_epi16(0);
+ const __m128i sixteen = _mm_set1_epi16(16);
+ const __m128i max = _mm_set1_epi16(6201);
+ const __m128i min = _mm_set1_epi16(-6201);
+ int optimised_cols = 0;
+
+ // Load input into __m128i & pack to 16 bits
+ for (i = 0; i < 8; i++) {
+ temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i));
+ temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4));
+ inptr[i] = _mm_packs_epi32(temp1, temp2);
+ }
+
+ // Find the min & max for the row transform
+ // only first 4 row has non-zero coefs
+ max_input = _mm_max_epi16(inptr[0], inptr[1]);
+ min_input = _mm_min_epi16(inptr[0], inptr[1]);
+ for (i = 2; i < 4; i++) {
+ max_input = _mm_max_epi16(max_input, inptr[i]);
+ min_input = _mm_min_epi16(min_input, inptr[i]);
+ }
+ max_input = _mm_cmpgt_epi16(max_input, max);
+ min_input = _mm_cmplt_epi16(min_input, min);
+ temp1 = _mm_or_si128(max_input, min_input);
+ test = _mm_movemask_epi8(temp1);
+
+ if (!test) {
+ // Do the row transform
+ vp10_idct8_sse2(inptr);
+
+ // Find the min & max for the column transform
+ // N.B. Only first 4 cols contain non-zero coeffs
+ max_input = _mm_max_epi16(inptr[0], inptr[1]);
+ min_input = _mm_min_epi16(inptr[0], inptr[1]);
+ for (i = 2; i < 8; i++) {
+ max_input = _mm_max_epi16(max_input, inptr[i]);
+ min_input = _mm_min_epi16(min_input, inptr[i]);
+ }
+ max_input = _mm_cmpgt_epi16(max_input, max);
+ min_input = _mm_cmplt_epi16(min_input, min);
+ temp1 = _mm_or_si128(max_input, min_input);
+ test = _mm_movemask_epi8(temp1);
+
+ if (test) {
+ // Use fact only first 4 rows contain non-zero coeffs
+ array_transpose_4X8(inptr, inptr);
+ for (i = 0; i < 4; i++) {
+ sign_bits = _mm_cmplt_epi16(inptr[i], zero);
+ temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits);
+ temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits);
+ _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1);
+ _mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2);
+ }
+ } else {
+ // Set to use the optimised transform for the column
+ optimised_cols = 1;
+ }
+ } else {
+ // Run the un-optimised row transform
+ for (i = 0; i < 4; ++i) {
+ vp10_highbd_idct8_c(input, outptr, bd);
+ input += 8;
+ outptr += 8;
+ }
+ }
+
+ if (optimised_cols) {
+ vp10_idct8_sse2(inptr);
+
+ // Final round & shift and Reconstruction and Store
+ {
+ __m128i d[8];
+ for (i = 0; i < 8; i++) {
+ inptr[i] = _mm_add_epi16(inptr[i], sixteen);
+ d[i] = _mm_loadu_si128((const __m128i *)(dest + stride * i));
+ inptr[i] = _mm_srai_epi16(inptr[i], 5);
+ d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd);
+ // Store
+ _mm_storeu_si128((__m128i *)(dest + stride * i), d[i]);
+ }
+ }
+ } else {
+ // Run the un-optimised column transform
+ tran_low_t temp_in[8], temp_out[8];
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
+ vp10_highbd_idct8_c(temp_in, temp_out, bd);
+ for (j = 0; j < 8; ++j) {
+ dest[j * stride + i] = highbd_clip_pixel_add(
+ dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
+ }
+ }
+ }
+}
+
+void vp10_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ tran_low_t out[16 * 16];
+ tran_low_t *outptr = out;
+ int i, j, test;
+ __m128i inptr[32];
+ __m128i min_input, max_input, temp1, temp2, sign_bits;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ const __m128i zero = _mm_set1_epi16(0);
+ const __m128i rounding = _mm_set1_epi16(32);
+ const __m128i max = _mm_set1_epi16(3155);
+ const __m128i min = _mm_set1_epi16(-3155);
+ int optimised_cols = 0;
+
+ // Load input into __m128i & pack to 16 bits
+ for (i = 0; i < 16; i++) {
+ temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i));
+ temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4));
+ inptr[i] = _mm_packs_epi32(temp1, temp2);
+ temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8));
+ temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12));
+ inptr[i + 16] = _mm_packs_epi32(temp1, temp2);
+ }
+
+ // Find the min & max for the row transform
+ max_input = _mm_max_epi16(inptr[0], inptr[1]);
+ min_input = _mm_min_epi16(inptr[0], inptr[1]);
+ for (i = 2; i < 32; i++) {
+ max_input = _mm_max_epi16(max_input, inptr[i]);
+ min_input = _mm_min_epi16(min_input, inptr[i]);
+ }
+ max_input = _mm_cmpgt_epi16(max_input, max);
+ min_input = _mm_cmplt_epi16(min_input, min);
+ temp1 = _mm_or_si128(max_input, min_input);
+ test = _mm_movemask_epi8(temp1);
+
+ if (!test) {
+ // Do the row transform
+ vp10_idct16_sse2(inptr, inptr + 16);
+
+ // Find the min & max for the column transform
+ max_input = _mm_max_epi16(inptr[0], inptr[1]);
+ min_input = _mm_min_epi16(inptr[0], inptr[1]);
+ for (i = 2; i < 32; i++) {
+ max_input = _mm_max_epi16(max_input, inptr[i]);
+ min_input = _mm_min_epi16(min_input, inptr[i]);
+ }
+ max_input = _mm_cmpgt_epi16(max_input, max);
+ min_input = _mm_cmplt_epi16(min_input, min);
+ temp1 = _mm_or_si128(max_input, min_input);
+ test = _mm_movemask_epi8(temp1);
+
+ if (test) {
+ array_transpose_16x16(inptr, inptr + 16);
+ for (i = 0; i < 16; i++) {
+ sign_bits = _mm_cmplt_epi16(inptr[i], zero);
+ temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits);
+ temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits);
+ _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1);
+ _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2);
+ sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero);
+ temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits);
+ temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits);
+ _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1);
+ _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2);
+ }
+ } else {
+ // Set to use the optimised transform for the column
+ optimised_cols = 1;
+ }
+ } else {
+ // Run the un-optimised row transform
+ for (i = 0; i < 16; ++i) {
+ vp10_highbd_idct16_c(input, outptr, bd);
+ input += 16;
+ outptr += 16;
+ }
+ }
+
+ if (optimised_cols) {
+ vp10_idct16_sse2(inptr, inptr + 16);
+
+ // Final round & shift and Reconstruction and Store
+ {
+ __m128i d[2];
+ for (i = 0; i < 16; i++) {
+ inptr[i] = _mm_add_epi16(inptr[i], rounding);
+ inptr[i + 16] = _mm_add_epi16(inptr[i + 16], rounding);
+ d[0] = _mm_loadu_si128((const __m128i *)(dest + stride * i));
+ d[1] = _mm_loadu_si128((const __m128i *)(dest + stride * i + 8));
+ inptr[i] = _mm_srai_epi16(inptr[i], 6);
+ inptr[i + 16] = _mm_srai_epi16(inptr[i + 16], 6);
+ d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i]), bd);
+ d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i + 16]), bd);
+ // Store
+ _mm_storeu_si128((__m128i *)(dest + stride * i), d[0]);
+ _mm_storeu_si128((__m128i *)(dest + stride * i + 8), d[1]);
+ }
+ }
+ } else {
+ // Run the un-optimised column transform
+ tran_low_t temp_in[16], temp_out[16];
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+ vp10_highbd_idct16_c(temp_in, temp_out, bd);
+ for (j = 0; j < 16; ++j) {
+ dest[j * stride + i] = highbd_clip_pixel_add(
+ dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+ }
+ }
+ }
+}
+
+void vp10_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
+ int stride, int bd) {
+ tran_low_t out[16 * 16] = { 0 };
+ tran_low_t *outptr = out;
+ int i, j, test;
+ __m128i inptr[32];
+ __m128i min_input, max_input, temp1, temp2, sign_bits;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+ const __m128i zero = _mm_set1_epi16(0);
+ const __m128i rounding = _mm_set1_epi16(32);
+ const __m128i max = _mm_set1_epi16(3155);
+ const __m128i min = _mm_set1_epi16(-3155);
+ int optimised_cols = 0;
+
+ // Load input into __m128i & pack to 16 bits
+ for (i = 0; i < 16; i++) {
+ temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i));
+ temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4));
+ inptr[i] = _mm_packs_epi32(temp1, temp2);
+ temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8));
+ temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12));
+ inptr[i + 16] = _mm_packs_epi32(temp1, temp2);
+ }
+
+ // Find the min & max for the row transform
+ // Since all non-zero dct coefficients are in upper-left 4x4 area,
+ // we only need to consider first 4 rows here.
+ max_input = _mm_max_epi16(inptr[0], inptr[1]);
+ min_input = _mm_min_epi16(inptr[0], inptr[1]);
+ for (i = 2; i < 4; i++) {
+ max_input = _mm_max_epi16(max_input, inptr[i]);
+ min_input = _mm_min_epi16(min_input, inptr[i]);
+ }
+ max_input = _mm_cmpgt_epi16(max_input, max);
+ min_input = _mm_cmplt_epi16(min_input, min);
+ temp1 = _mm_or_si128(max_input, min_input);
+ test = _mm_movemask_epi8(temp1);
+
+ if (!test) {
+ // Do the row transform (N.B. This transposes inptr)
+ vp10_idct16_sse2(inptr, inptr + 16);
+
+ // Find the min & max for the column transform
+ // N.B. Only first 4 cols contain non-zero coeffs
+ max_input = _mm_max_epi16(inptr[0], inptr[1]);
+ min_input = _mm_min_epi16(inptr[0], inptr[1]);
+ for (i = 2; i < 16; i++) {
+ max_input = _mm_max_epi16(max_input, inptr[i]);
+ min_input = _mm_min_epi16(min_input, inptr[i]);
+ }
+ max_input = _mm_cmpgt_epi16(max_input, max);
+ min_input = _mm_cmplt_epi16(min_input, min);
+ temp1 = _mm_or_si128(max_input, min_input);
+ test = _mm_movemask_epi8(temp1);
+
+ if (test) {
+ // Use fact only first 4 rows contain non-zero coeffs
+ array_transpose_8x8(inptr, inptr);
+ array_transpose_8x8(inptr + 8, inptr + 16);
+ for (i = 0; i < 4; i++) {
+ sign_bits = _mm_cmplt_epi16(inptr[i], zero);
+ temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits);
+ temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits);
+ _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1);
+ _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2);
+ sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero);
+ temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits);
+ temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits);
+ _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1);
+ _mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2);
+ }
+ } else {
+ // Set to use the optimised transform for the column
+ optimised_cols = 1;
+ }
+ } else {
+ // Run the un-optimised row transform
+ for (i = 0; i < 4; ++i) {
+ vp10_highbd_idct16_c(input, outptr, bd);
+ input += 16;
+ outptr += 16;
+ }
+ }
+
+ if (optimised_cols) {
+ vp10_idct16_sse2(inptr, inptr + 16);
+
+ // Final round & shift and Reconstruction and Store
+ {
+ __m128i d[2];
+ for (i = 0; i < 16; i++) {
+ inptr[i] = _mm_add_epi16(inptr[i], rounding);
+ inptr[i + 16] = _mm_add_epi16(inptr[i + 16], rounding);
+ d[0] = _mm_loadu_si128((const __m128i *)(dest + stride * i));
+ d[1] = _mm_loadu_si128((const __m128i *)(dest + stride * i + 8));
+ inptr[i] = _mm_srai_epi16(inptr[i], 6);
+ inptr[i + 16] = _mm_srai_epi16(inptr[i + 16], 6);
+ d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i]), bd);
+ d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i + 16]), bd);
+ // Store
+ _mm_storeu_si128((__m128i *)(dest + stride * i), d[0]);
+ _mm_storeu_si128((__m128i *)(dest + stride * i + 8), d[1]);
+ }
+ }
+ } else {
+ // Run the un-optimised column transform
+ tran_low_t temp_in[16], temp_out[16];
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
+ vp10_highbd_idct16_c(temp_in, temp_out, bd);
+ for (j = 0; j < 16; ++j) {
+ dest[j * stride + i] = highbd_clip_pixel_add(
+ dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
+ }
+ }
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/common/x86/vp10_inv_txfm_sse2.h b/av1/common/x86/vp10_inv_txfm_sse2.h
new file mode 100644
index 0000000..0839ab9
--- /dev/null
+++ b/av1/common/x86/vp10_inv_txfm_sse2.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_X86_INV_TXFM_SSE2_H_
+#define VPX_DSP_X86_INV_TXFM_SSE2_H_
+
+#include <emmintrin.h> // SSE2
+#include "./vpx_config.h"
+#include "aom/vpx_integer.h"
+#include "av1/common/vp10_inv_txfm.h"
+
+// perform 8x8 transpose
+static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
+ const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
+ const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]);
+ const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
+ const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]);
+ const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]);
+
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+
+ res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1);
+ res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1);
+ res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3);
+ res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3);
+ res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5);
+ res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5);
+ res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7);
+ res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7);
+}
+
+#define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1) \
+ { \
+ const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
+ const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \
+ \
+ in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \
+ in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \
+ }
+
+static INLINE void array_transpose_4X8(__m128i *in, __m128i *out) {
+ const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
+
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+
+ out[0] = _mm_unpacklo_epi64(tr1_0, tr1_4);
+ out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4);
+ out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6);
+ out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6);
+}
+
+static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
+ __m128i tbuf[8];
+ array_transpose_8x8(res0, res0);
+ array_transpose_8x8(res1, tbuf);
+ array_transpose_8x8(res0 + 8, res1);
+ array_transpose_8x8(res1 + 8, res1 + 8);
+
+ res0[8] = tbuf[0];
+ res0[9] = tbuf[1];
+ res0[10] = tbuf[2];
+ res0[11] = tbuf[3];
+ res0[12] = tbuf[4];
+ res0[13] = tbuf[5];
+ res0[14] = tbuf[6];
+ res0[15] = tbuf[7];
+}
+
+static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
+ in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16));
+ in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16));
+ in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16));
+ in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16));
+ in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16));
+ in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16));
+ in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16));
+ in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16));
+
+ in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16));
+ in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16));
+ in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16));
+ in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16));
+ in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16));
+ in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16));
+ in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16));
+ in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16));
+}
+
+#define RECON_AND_STORE(dest, in_x) \
+ { \
+ __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \
+ d0 = _mm_unpacklo_epi8(d0, zero); \
+ d0 = _mm_add_epi16(in_x, d0); \
+ d0 = _mm_packus_epi16(d0, d0); \
+ _mm_storel_epi64((__m128i *)(dest), d0); \
+ }
+
+static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) {
+ const __m128i final_rounding = _mm_set1_epi16(1 << 5);
+ const __m128i zero = _mm_setzero_si128();
+ // Final rounding and shift
+ in[0] = _mm_adds_epi16(in[0], final_rounding);
+ in[1] = _mm_adds_epi16(in[1], final_rounding);
+ in[2] = _mm_adds_epi16(in[2], final_rounding);
+ in[3] = _mm_adds_epi16(in[3], final_rounding);
+ in[4] = _mm_adds_epi16(in[4], final_rounding);
+ in[5] = _mm_adds_epi16(in[5], final_rounding);
+ in[6] = _mm_adds_epi16(in[6], final_rounding);
+ in[7] = _mm_adds_epi16(in[7], final_rounding);
+ in[8] = _mm_adds_epi16(in[8], final_rounding);
+ in[9] = _mm_adds_epi16(in[9], final_rounding);
+ in[10] = _mm_adds_epi16(in[10], final_rounding);
+ in[11] = _mm_adds_epi16(in[11], final_rounding);
+ in[12] = _mm_adds_epi16(in[12], final_rounding);
+ in[13] = _mm_adds_epi16(in[13], final_rounding);
+ in[14] = _mm_adds_epi16(in[14], final_rounding);
+ in[15] = _mm_adds_epi16(in[15], final_rounding);
+
+ in[0] = _mm_srai_epi16(in[0], 6);
+ in[1] = _mm_srai_epi16(in[1], 6);
+ in[2] = _mm_srai_epi16(in[2], 6);
+ in[3] = _mm_srai_epi16(in[3], 6);
+ in[4] = _mm_srai_epi16(in[4], 6);
+ in[5] = _mm_srai_epi16(in[5], 6);
+ in[6] = _mm_srai_epi16(in[6], 6);
+ in[7] = _mm_srai_epi16(in[7], 6);
+ in[8] = _mm_srai_epi16(in[8], 6);
+ in[9] = _mm_srai_epi16(in[9], 6);
+ in[10] = _mm_srai_epi16(in[10], 6);
+ in[11] = _mm_srai_epi16(in[11], 6);
+ in[12] = _mm_srai_epi16(in[12], 6);
+ in[13] = _mm_srai_epi16(in[13], 6);
+ in[14] = _mm_srai_epi16(in[14], 6);
+ in[15] = _mm_srai_epi16(in[15], 6);
+
+ RECON_AND_STORE(dest + 0 * stride, in[0]);
+ RECON_AND_STORE(dest + 1 * stride, in[1]);
+ RECON_AND_STORE(dest + 2 * stride, in[2]);
+ RECON_AND_STORE(dest + 3 * stride, in[3]);
+ RECON_AND_STORE(dest + 4 * stride, in[4]);
+ RECON_AND_STORE(dest + 5 * stride, in[5]);
+ RECON_AND_STORE(dest + 6 * stride, in[6]);
+ RECON_AND_STORE(dest + 7 * stride, in[7]);
+ RECON_AND_STORE(dest + 8 * stride, in[8]);
+ RECON_AND_STORE(dest + 9 * stride, in[9]);
+ RECON_AND_STORE(dest + 10 * stride, in[10]);
+ RECON_AND_STORE(dest + 11 * stride, in[11]);
+ RECON_AND_STORE(dest + 12 * stride, in[12]);
+ RECON_AND_STORE(dest + 13 * stride, in[13]);
+ RECON_AND_STORE(dest + 14 * stride, in[14]);
+ RECON_AND_STORE(dest + 15 * stride, in[15]);
+}
+
+void idct4_sse2(__m128i *in);
+void idct8_sse2(__m128i *in);
+void idct16_sse2(__m128i *in0, __m128i *in1);
+void iadst4_sse2(__m128i *in);
+void iadst8_sse2(__m128i *in);
+void iadst16_sse2(__m128i *in0, __m128i *in1);
+
+#endif // VPX_DSP_X86_INV_TXFM_SSE2_H_
diff --git a/av1/common/x86/vp10_txfm1d_sse4.h b/av1/common/x86/vp10_txfm1d_sse4.h
new file mode 100644
index 0000000..f05a54c
--- /dev/null
+++ b/av1/common/x86/vp10_txfm1d_sse4.h
@@ -0,0 +1,144 @@
+#ifndef VP10_TXMF1D_SSE2_H_
+#define VP10_TXMF1D_SSE2_H_
+
+#include <smmintrin.h>
+#include "av1/common/vp10_txfm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_fdct4_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct8_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct16_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct32_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fdct64_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_fadst4_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst8_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst16_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_fadst32_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_idct4_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct8_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct16_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct32_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_idct64_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+void vp10_iadst4_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst8_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst16_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+void vp10_iadst32_new_sse4_1(const __m128i *input, __m128i *output,
+ const int8_t *cos_bit, const int8_t *stage_range);
+
+static INLINE void transpose_32_4x4(int stride, const __m128i *input,
+ __m128i *output) {
+ __m128i temp0 = _mm_unpacklo_epi32(input[0 * stride], input[2 * stride]);
+ __m128i temp1 = _mm_unpackhi_epi32(input[0 * stride], input[2 * stride]);
+ __m128i temp2 = _mm_unpacklo_epi32(input[1 * stride], input[3 * stride]);
+ __m128i temp3 = _mm_unpackhi_epi32(input[1 * stride], input[3 * stride]);
+
+ output[0 * stride] = _mm_unpacklo_epi32(temp0, temp2);
+ output[1 * stride] = _mm_unpackhi_epi32(temp0, temp2);
+ output[2 * stride] = _mm_unpacklo_epi32(temp1, temp3);
+ output[3 * stride] = _mm_unpackhi_epi32(temp1, temp3);
+}
+
+// the entire input block can be represent by a grid of 4x4 blocks
+// each 4x4 blocks can be represent by 4 vertical __m128i
+// we first transpose each 4x4 block internally
+// than transpose the grid
+static INLINE void transpose_32(int txfm_size, const __m128i *input,
+ __m128i *output) {
+ const int num_per_128 = 4;
+ const int row_size = txfm_size;
+ const int col_size = txfm_size / num_per_128;
+ int r, c;
+
+ // transpose each 4x4 block internally
+ for (r = 0; r < row_size; r += 4) {
+ for (c = 0; c < col_size; c++) {
+ transpose_32_4x4(col_size, &input[r * col_size + c],
+ &output[c * 4 * col_size + r / 4]);
+ }
+ }
+}
+
+static INLINE __m128i round_shift_32_sse4_1(__m128i vec, int bit) {
+ __m128i tmp, round;
+ round = _mm_set1_epi32(1 << (bit - 1));
+ tmp = _mm_add_epi32(vec, round);
+ return _mm_srai_epi32(tmp, bit);
+}
+
+static INLINE void round_shift_array_32_sse4_1(__m128i *input, __m128i *output,
+ const int size, const int bit) {
+ if (bit > 0) {
+ int i;
+ for (i = 0; i < size; i++) {
+ output[i] = round_shift_32_sse4_1(input[i], bit);
+ }
+ } else {
+ int i;
+ for (i = 0; i < size; i++) {
+ output[i] = _mm_slli_epi32(input[i], -bit);
+ }
+ }
+}
+
+// out0 = in0*w0 + in1*w1
+// out1 = -in1*w0 + in0*w1
+#define btf_32_sse4_1_type0(w0, w1, in0, in1, out0, out1, bit) \
+ do { \
+ __m128i ww0, ww1, in0_w0, in1_w1, in0_w1, in1_w0; \
+ ww0 = _mm_set1_epi32(w0); \
+ ww1 = _mm_set1_epi32(w1); \
+ in0_w0 = _mm_mullo_epi32(in0, ww0); \
+ in1_w1 = _mm_mullo_epi32(in1, ww1); \
+ out0 = _mm_add_epi32(in0_w0, in1_w1); \
+ out0 = round_shift_32_sse4_1(out0, bit); \
+ in0_w1 = _mm_mullo_epi32(in0, ww1); \
+ in1_w0 = _mm_mullo_epi32(in1, ww0); \
+ out1 = _mm_sub_epi32(in0_w1, in1_w0); \
+ out1 = round_shift_32_sse4_1(out1, bit); \
+ } while (0)
+
+// out0 = in0*w0 + in1*w1
+// out1 = in1*w0 - in0*w1
+#define btf_32_sse4_1_type1(w0, w1, in0, in1, out0, out1, bit) \
+ do { \
+ __m128i ww0, ww1, in0_w0, in1_w1, in0_w1, in1_w0; \
+ ww0 = _mm_set1_epi32(w0); \
+ ww1 = _mm_set1_epi32(w1); \
+ in0_w0 = _mm_mullo_epi32(in0, ww0); \
+ in1_w1 = _mm_mullo_epi32(in1, ww1); \
+ out0 = _mm_add_epi32(in0_w0, in1_w1); \
+ out0 = round_shift_32_sse4_1(out0, bit); \
+ in0_w1 = _mm_mullo_epi32(in0, ww1); \
+ in1_w0 = _mm_mullo_epi32(in1, ww0); \
+ out1 = _mm_sub_epi32(in1_w0, in0_w1); \
+ out1 = round_shift_32_sse4_1(out1, bit); \
+ } while (0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // VP10_TXMF1D_SSE2_H_
diff --git a/av1/decoder/bitreader.h b/av1/decoder/bitreader.h
new file mode 100644
index 0000000..75d6aa4
--- /dev/null
+++ b/av1/decoder/bitreader.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* The purpose of this header is to provide compile time pluggable bit reader
+ * implementations with a common interface. */
+
+#ifndef VPX10_DECODER_BITREADER_H_
+#define VPX10_DECODER_BITREADER_H_
+
+#include "./vpx_config.h"
+
+#if CONFIG_ANS
+#include "av1/common/ans.h"
+#include "aom/vp8dx.h" // for vp10_decrypt_cb
+#define vp10_reader struct AnsDecoder
+#define vp10_reader_has_error ans_reader_has_error
+#define vp10_read uabs_read
+#define vp10_read_bit uabs_read_bit
+#define vp10_read_literal uabs_read_literal
+#define vp10_read_tree uabs_read_tree
+#else
+#include "aom_dsp/bitreader.h"
+#define vp10_reader vpx_reader
+#define vp10_reader_has_error vpx_reader_has_error
+#define vp10_read vpx_read
+#define vp10_read_bit vpx_read_bit
+#define vp10_read_literal vpx_read_literal
+#define vp10_read_tree vpx_read_tree
+#endif
+
+#endif // VPX10_DECODER_BITREADER_H_
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
new file mode 100644
index 0000000..0f90c20
--- /dev/null
+++ b/av1/decoder/decodeframe.c
@@ -0,0 +1,3882 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <stdlib.h> // qsort()
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_scale_rtcd.h"
+#include "./vpx_config.h"
+
+#include "aom_dsp/bitreader_buffer.h"
+#include "av1/decoder/bitreader.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/mem_ops.h"
+#include "aom_scale/vpx_scale.h"
+#include "aom_util/vpx_thread.h"
+
+#include "av1/common/alloccommon.h"
+#if CONFIG_CLPF
+#include "av1/common/clpf.h"
+#endif
+#include "av1/common/common.h"
+#if CONFIG_DERING
+#include "av1/common/dering.h"
+#endif // CONFIG_DERING
+#include "av1/common/entropy.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/idct.h"
+#include "av1/common/thread_common.h"
+#include "av1/common/pred_common.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/reconintra.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/seg_common.h"
+#include "av1/common/tile_common.h"
+
+#include "av1/decoder/decodeframe.h"
+#include "av1/decoder/detokenize.h"
+#include "av1/decoder/decodemv.h"
+#include "av1/decoder/decoder.h"
+#include "av1/decoder/dsubexp.h"
+
+#define MAX_VPX_HEADER_SIZE 80
+
+static int is_compound_reference_allowed(const VP10_COMMON *cm) {
+ int i;
+ if (frame_is_intra_only(cm)) return 0;
+ for (i = 1; i < INTER_REFS_PER_FRAME; ++i)
+ if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) return 1;
+
+ return 0;
+}
+
+static void setup_compound_reference_mode(VP10_COMMON *cm) {
+#if CONFIG_EXT_REFS
+ cm->comp_fwd_ref[0] = LAST_FRAME;
+ cm->comp_fwd_ref[1] = LAST2_FRAME;
+ cm->comp_fwd_ref[2] = LAST3_FRAME;
+ cm->comp_fwd_ref[3] = GOLDEN_FRAME;
+
+ cm->comp_bwd_ref[0] = BWDREF_FRAME;
+ cm->comp_bwd_ref[1] = ALTREF_FRAME;
+#else
+ if (cm->ref_frame_sign_bias[LAST_FRAME] ==
+ cm->ref_frame_sign_bias[GOLDEN_FRAME]) {
+ cm->comp_fixed_ref = ALTREF_FRAME;
+ cm->comp_var_ref[0] = LAST_FRAME;
+ cm->comp_var_ref[1] = GOLDEN_FRAME;
+ } else if (cm->ref_frame_sign_bias[LAST_FRAME] ==
+ cm->ref_frame_sign_bias[ALTREF_FRAME]) {
+ cm->comp_fixed_ref = GOLDEN_FRAME;
+ cm->comp_var_ref[0] = LAST_FRAME;
+ cm->comp_var_ref[1] = ALTREF_FRAME;
+ } else {
+ cm->comp_fixed_ref = LAST_FRAME;
+ cm->comp_var_ref[0] = GOLDEN_FRAME;
+ cm->comp_var_ref[1] = ALTREF_FRAME;
+ }
+#endif // CONFIG_EXT_REFS
+}
+
+static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) {
+ return len != 0 && len <= (size_t)(end - start);
+}
+
+static int decode_unsigned_max(struct vpx_read_bit_buffer *rb, int max) {
+ const int data = vpx_rb_read_literal(rb, get_unsigned_bits(max));
+ return data > max ? max : data;
+}
+
+static TX_MODE read_tx_mode(struct vpx_read_bit_buffer *rb) {
+ return vpx_rb_read_bit(rb) ? TX_MODE_SELECT : vpx_rb_read_literal(rb, 2);
+}
+
+static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp10_reader *r) {
+ int i, j;
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
+ for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i)
+ vp10_diff_update_prob(r, &fc->switchable_interp_prob[j][i]);
+}
+
+static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp10_reader *r) {
+ int i;
+#if CONFIG_REF_MV
+ for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
+ vp10_diff_update_prob(r, &fc->newmv_prob[i]);
+ for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i)
+ vp10_diff_update_prob(r, &fc->zeromv_prob[i]);
+ for (i = 0; i < REFMV_MODE_CONTEXTS; ++i)
+ vp10_diff_update_prob(r, &fc->refmv_prob[i]);
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
+ vp10_diff_update_prob(r, &fc->drl_prob[i]);
+#if CONFIG_EXT_INTER
+ vp10_diff_update_prob(r, &fc->new2mv_prob);
+#endif // CONFIG_EXT_INTER
+#else
+ int j;
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+ for (j = 0; j < INTER_MODES - 1; ++j)
+ vp10_diff_update_prob(r, &fc->inter_mode_probs[i][j]);
+#endif
+}
+
+#if CONFIG_EXT_INTER
+static void read_inter_compound_mode_probs(FRAME_CONTEXT *fc, vp10_reader *r) {
+ int i, j;
+ if (vp10_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (j = 0; j < INTER_MODE_CONTEXTS; ++j) {
+ for (i = 0; i < INTER_COMPOUND_MODES - 1; ++i) {
+ vp10_diff_update_prob(r, &fc->inter_compound_mode_probs[j][i]);
+ }
+ }
+ }
+}
+#endif // CONFIG_EXT_INTER
+
+static REFERENCE_MODE read_frame_reference_mode(
+ const VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
+ if (is_compound_reference_allowed(cm)) {
+ return vpx_rb_read_bit(rb)
+ ? REFERENCE_MODE_SELECT
+ : (vpx_rb_read_bit(rb) ? COMPOUND_REFERENCE : SINGLE_REFERENCE);
+ } else {
+ return SINGLE_REFERENCE;
+ }
+}
+
+static void read_frame_reference_mode_probs(VP10_COMMON *cm, vp10_reader *r) {
+ FRAME_CONTEXT *const fc = cm->fc;
+ int i, j;
+
+ if (cm->reference_mode == REFERENCE_MODE_SELECT)
+ for (i = 0; i < COMP_INTER_CONTEXTS; ++i)
+ vp10_diff_update_prob(r, &fc->comp_inter_prob[i]);
+
+ if (cm->reference_mode != COMPOUND_REFERENCE) {
+ for (i = 0; i < REF_CONTEXTS; ++i) {
+ for (j = 0; j < (SINGLE_REFS - 1); ++j) {
+ vp10_diff_update_prob(r, &fc->single_ref_prob[i][j]);
+ }
+ }
+ }
+
+ if (cm->reference_mode != SINGLE_REFERENCE) {
+ for (i = 0; i < REF_CONTEXTS; ++i) {
+#if CONFIG_EXT_REFS
+ for (j = 0; j < (FWD_REFS - 1); ++j)
+ vp10_diff_update_prob(r, &fc->comp_ref_prob[i][j]);
+ for (j = 0; j < (BWD_REFS - 1); ++j)
+ vp10_diff_update_prob(r, &fc->comp_bwdref_prob[i][j]);
+#else
+ for (j = 0; j < (COMP_REFS - 1); ++j)
+ vp10_diff_update_prob(r, &fc->comp_ref_prob[i][j]);
+#endif // CONFIG_EXT_REFS
+ }
+ }
+}
+
+static void update_mv_probs(vpx_prob *p, int n, vp10_reader *r) {
+ int i;
+ for (i = 0; i < n; ++i) vp10_diff_update_prob(r, &p[i]);
+}
+
+static void read_mv_probs(nmv_context *ctx, int allow_hp, vp10_reader *r) {
+ int i, j;
+
+ update_mv_probs(ctx->joints, MV_JOINTS - 1, r);
+
+#if CONFIG_REF_MV
+ vp10_diff_update_prob(r, &ctx->zero_rmv);
+#endif
+
+ for (i = 0; i < 2; ++i) {
+ nmv_component *const comp_ctx = &ctx->comps[i];
+ update_mv_probs(&comp_ctx->sign, 1, r);
+ update_mv_probs(comp_ctx->classes, MV_CLASSES - 1, r);
+ update_mv_probs(comp_ctx->class0, CLASS0_SIZE - 1, r);
+ update_mv_probs(comp_ctx->bits, MV_OFFSET_BITS, r);
+ }
+
+ for (i = 0; i < 2; ++i) {
+ nmv_component *const comp_ctx = &ctx->comps[i];
+ for (j = 0; j < CLASS0_SIZE; ++j)
+ update_mv_probs(comp_ctx->class0_fp[j], MV_FP_SIZE - 1, r);
+ update_mv_probs(comp_ctx->fp, 3, r);
+ }
+
+ if (allow_hp) {
+ for (i = 0; i < 2; ++i) {
+ nmv_component *const comp_ctx = &ctx->comps[i];
+ update_mv_probs(&comp_ctx->class0_hp, 1, r);
+ update_mv_probs(&comp_ctx->hp, 1, r);
+ }
+ }
+}
+
+static void inverse_transform_block(MACROBLOCKD *xd, int plane,
+ const TX_TYPE tx_type,
+ const TX_SIZE tx_size, uint8_t *dst,
+ int stride, int eob) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ if (eob > 0) {
+ tran_low_t *const dqcoeff = pd->dqcoeff;
+ INV_TXFM_PARAM inv_txfm_param;
+ inv_txfm_param.tx_type = tx_type;
+ inv_txfm_param.tx_size = tx_size;
+ inv_txfm_param.eob = eob;
+ inv_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ inv_txfm_param.bd = xd->bd;
+ highbd_inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ inv_txfm_add(dqcoeff, dst, stride, &inv_txfm_param);
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (eob == 1) {
+ dqcoeff[0] = 0;
+ } else {
+ if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
+ memset(dqcoeff, 0, 4 * 4 * num_4x4_blocks_wide_txsize_lookup[tx_size] *
+ sizeof(dqcoeff[0]));
+#if CONFIG_EXT_TX
+ else
+ memset(dqcoeff, 0, get_tx2d_size(tx_size) * sizeof(dqcoeff[0]));
+#else
+ else if (tx_size == TX_32X32 && eob <= 34)
+ memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
+ else
+ memset(dqcoeff, 0, get_tx2d_size(tx_size) * sizeof(dqcoeff[0]));
+#endif
+ }
+ }
+}
+
+static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd,
+#if CONFIG_ANS
+ struct AnsDecoder *const r,
+#else
+ vp10_reader *r,
+#endif // CONFIG_ANS
+ MB_MODE_INFO *const mbmi,
+ int plane, int row, int col,
+ TX_SIZE tx_size) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ PREDICTION_MODE mode = (plane == 0) ? mbmi->mode : mbmi->uv_mode;
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ uint8_t *dst;
+ int block_idx = (row << 1) + col;
+ dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
+
+ if (mbmi->sb_type < BLOCK_8X8)
+ if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode;
+
+ vp10_predict_intra_block(xd, pd->n4_wl, pd->n4_hl, tx_size, mode, dst,
+ pd->dst.stride, dst, pd->dst.stride, col, row,
+ plane);
+
+ if (!mbmi->skip) {
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
+ const scan_order *sc = get_scan(tx_size, tx_type, 0);
+ const int eob = vp10_decode_block_tokens(xd, plane, sc, col, row, tx_size,
+ tx_type, r, mbmi->segment_id);
+ inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
+ eob);
+ }
+}
+
+#if CONFIG_VAR_TX
+static void decode_reconstruct_tx(MACROBLOCKD *const xd, vp10_reader *r,
+ MB_MODE_INFO *const mbmi, int plane,
+ BLOCK_SIZE plane_bsize, int block,
+ int blk_row, int blk_col, TX_SIZE tx_size,
+ int *eob_total) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ const TX_SIZE plane_tx_size =
+ plane ? get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0,
+ 0)
+ : mbmi->inter_tx_size[tx_row][tx_col];
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+ if (tx_size == plane_tx_size) {
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, plane_tx_size);
+ const scan_order *sc = get_scan(plane_tx_size, tx_type, 1);
+ const int eob =
+ vp10_decode_block_tokens(xd, plane, sc, blk_col, blk_row, plane_tx_size,
+ tx_type, r, mbmi->segment_id);
+ inverse_transform_block(
+ xd, plane, tx_type, plane_tx_size,
+ &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col],
+ pd->dst.stride, eob);
+ *eob_total += eob;
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+ int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
+
+ decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize, block + i * step,
+ offsetr, offsetc, tx_size - 1, eob_total);
+ }
+ }
+}
+#endif // CONFIG_VAR_TX
+
+#if !CONFIG_VAR_TX || CONFIG_SUPERTX || (CONFIG_EXT_TX && CONFIG_RECT_TX)
+static int reconstruct_inter_block(MACROBLOCKD *const xd,
+#if CONFIG_ANS
+ struct AnsDecoder *const r,
+#else
+ vp10_reader *r,
+#endif
+ int segment_id, int plane, int row, int col,
+ TX_SIZE tx_size) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ int block_idx = (row << 1) + col;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, tx_size);
+ const scan_order *sc = get_scan(tx_size, tx_type, 1);
+ const int eob = vp10_decode_block_tokens(xd, plane, sc, col, row, tx_size,
+ tx_type, r, segment_id);
+
+ inverse_transform_block(xd, plane, tx_type, tx_size,
+ &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
+ pd->dst.stride, eob);
+ return eob;
+}
+#endif // !CONFIG_VAR_TX || CONFIG_SUPER_TX
+
+static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi, int n4_wl,
+ int n4_hl) {
+ // get minimum log2 num4x4s dimension
+ const int x = VPXMIN(n4_wl, n4_hl);
+ return VPXMIN(txsize_sqr_map[mbmi->tx_size], x);
+}
+
+static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ struct macroblockd_plane *const pd = &xd->plane[i];
+ memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_w);
+ memset(pd->left_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_h);
+ }
+}
+
+static void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, int bwl,
+ int bhl) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].n4_w = (bw << 1) >> xd->plane[i].subsampling_x;
+ xd->plane[i].n4_h = (bh << 1) >> xd->plane[i].subsampling_y;
+ xd->plane[i].n4_wl = bwl - xd->plane[i].subsampling_x;
+ xd->plane[i].n4_hl = bhl - xd->plane[i].subsampling_y;
+ }
+}
+
+static MB_MODE_INFO *set_offsets(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int bw, int bh, int x_mis, int y_mis, int bwl,
+ int bhl) {
+ const int offset = mi_row * cm->mi_stride + mi_col;
+ int x, y;
+ const TileInfo *const tile = &xd->tile;
+
+ xd->mi = cm->mi_grid_visible + offset;
+ xd->mi[0] = &cm->mi[offset];
+ // TODO(slavarnway): Generate sb_type based on bwl and bhl, instead of
+ // passing bsize from decode_partition().
+ xd->mi[0]->mbmi.sb_type = bsize;
+ for (y = 0; y < y_mis; ++y)
+ for (x = !y; x < x_mis; ++x) {
+ xd->mi[y * cm->mi_stride + x] = xd->mi[0];
+ }
+
+ set_plane_n4(xd, bw, bh, bwl, bhl);
+
+ set_skip_context(xd, mi_row, mi_col);
+
+#if CONFIG_VAR_TX
+ xd->max_tx_size = max_txsize_lookup[bsize];
+#endif
+
+ // Distance of Mb to the various image edges. These are specified to 8th pel
+ // as they are always compared to values that are in 1/8th pel units
+ set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
+
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
+ return &xd->mi[0]->mbmi;
+}
+
+#if CONFIG_SUPERTX
+static MB_MODE_INFO *set_offsets_extend(VP10_COMMON *const cm,
+ MACROBLOCKD *const xd,
+ const TileInfo *const tile,
+ BLOCK_SIZE bsize_pred, int mi_row_pred,
+ int mi_col_pred, int mi_row_ori,
+ int mi_col_ori) {
+ // Used in supertx
+ // (mi_row_ori, mi_col_ori): location for mv
+ // (mi_row_pred, mi_col_pred, bsize_pred): region to predict
+ const int bw = num_8x8_blocks_wide_lookup[bsize_pred];
+ const int bh = num_8x8_blocks_high_lookup[bsize_pred];
+ const int offset = mi_row_ori * cm->mi_stride + mi_col_ori;
+ const int bwl = b_width_log2_lookup[bsize_pred];
+ const int bhl = b_height_log2_lookup[bsize_pred];
+ xd->mi = cm->mi_grid_visible + offset;
+ xd->mi[0] = cm->mi + offset;
+ set_mi_row_col(xd, tile, mi_row_pred, bh, mi_col_pred, bw, cm->mi_rows,
+ cm->mi_cols);
+
+ xd->up_available = (mi_row_ori > tile->mi_row_start);
+ xd->left_available = (mi_col_ori > tile->mi_col_start);
+
+ set_plane_n4(xd, bw, bh, bwl, bhl);
+
+ return &xd->mi[0]->mbmi;
+}
+
+static MB_MODE_INFO *set_mb_offsets(VP10_COMMON *const cm,
+ MACROBLOCKD *const xd, BLOCK_SIZE bsize,
+ int mi_row, int mi_col, int bw, int bh,
+ int x_mis, int y_mis) {
+ const int offset = mi_row * cm->mi_stride + mi_col;
+ const TileInfo *const tile = &xd->tile;
+ int x, y;
+
+ xd->mi = cm->mi_grid_visible + offset;
+ xd->mi[0] = cm->mi + offset;
+ xd->mi[0]->mbmi.sb_type = bsize;
+ for (y = 0; y < y_mis; ++y)
+ for (x = !y; x < x_mis; ++x) xd->mi[y * cm->mi_stride + x] = xd->mi[0];
+
+ set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
+ return &xd->mi[0]->mbmi;
+}
+
+static void set_offsets_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+ const TileInfo *const tile, BLOCK_SIZE bsize,
+ int mi_row, int mi_col) {
+ const int bw = num_8x8_blocks_wide_lookup[bsize];
+ const int bh = num_8x8_blocks_high_lookup[bsize];
+ const int offset = mi_row * cm->mi_stride + mi_col;
+ const int bwl = b_width_log2_lookup[bsize];
+ const int bhl = b_height_log2_lookup[bsize];
+
+ xd->mi = cm->mi_grid_visible + offset;
+ xd->mi[0] = cm->mi + offset;
+
+ set_plane_n4(xd, bw, bh, bwl, bhl);
+
+ set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
+
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
+}
+
+static void set_param_topblock(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int txfm, int skip) {
+ const int bw = num_8x8_blocks_wide_lookup[bsize];
+ const int bh = num_8x8_blocks_high_lookup[bsize];
+ const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
+ const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
+ const int offset = mi_row * cm->mi_stride + mi_col;
+ int x, y;
+
+ xd->mi = cm->mi_grid_visible + offset;
+ xd->mi[0] = cm->mi + offset;
+
+ for (y = 0; y < y_mis; ++y)
+ for (x = 0; x < x_mis; ++x) {
+ xd->mi[y * cm->mi_stride + x]->mbmi.skip = skip;
+ xd->mi[y * cm->mi_stride + x]->mbmi.tx_type = txfm;
+ }
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+ set_txfm_ctxs(xd->mi[0]->mbmi.tx_size, bw, bh, xd);
+#endif
+}
+
+static void set_ref(VP10_COMMON *const cm, MACROBLOCKD *const xd, int idx,
+ int mi_row, int mi_col) {
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ RefBuffer *ref_buffer = &cm->frame_refs[mbmi->ref_frame[idx] - LAST_FRAME];
+ xd->block_refs[idx] = ref_buffer;
+ if (!vp10_is_valid_scale(&ref_buffer->sf))
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Invalid scale factors");
+ vp10_setup_pre_planes(xd, idx, ref_buffer->buf, mi_row, mi_col,
+ &ref_buffer->sf);
+ xd->corrupted |= ref_buffer->buf->corrupted;
+}
+
+static void dec_predict_b_extend(
+ VP10Decoder *const pbi, MACROBLOCKD *const xd, const TileInfo *const tile,
+ int block, int mi_row_ori, int mi_col_ori, int mi_row_pred, int mi_col_pred,
+ int mi_row_top, int mi_col_top, uint8_t *dst_buf[3], int dst_stride[3],
+ BLOCK_SIZE bsize_top, BLOCK_SIZE bsize_pred, int b_sub8x8, int bextend) {
+ // Used in supertx
+ // (mi_row_ori, mi_col_ori): location for mv
+ // (mi_row_pred, mi_col_pred, bsize_pred): region to predict
+ // (mi_row_top, mi_col_top, bsize_top): region of the top partition size
+ // block: sub location of sub8x8 blocks
+ // b_sub8x8: 1: ori is sub8x8; 0: ori is not sub8x8
+ // bextend: 1: region to predict is an extension of ori; 0: not
+ int r = (mi_row_pred - mi_row_top) * MI_SIZE;
+ int c = (mi_col_pred - mi_col_top) * MI_SIZE;
+ const int mi_width_top = num_8x8_blocks_wide_lookup[bsize_top];
+ const int mi_height_top = num_8x8_blocks_high_lookup[bsize_top];
+ MB_MODE_INFO *mbmi;
+ VP10_COMMON *const cm = &pbi->common;
+
+ if (mi_row_pred < mi_row_top || mi_col_pred < mi_col_top ||
+ mi_row_pred >= mi_row_top + mi_height_top ||
+ mi_col_pred >= mi_col_top + mi_width_top || mi_row_pred >= cm->mi_rows ||
+ mi_col_pred >= cm->mi_cols)
+ return;
+
+ mbmi = set_offsets_extend(cm, xd, tile, bsize_pred, mi_row_pred, mi_col_pred,
+ mi_row_ori, mi_col_ori);
+ set_ref(cm, xd, 0, mi_row_pred, mi_col_pred);
+ if (has_second_ref(&xd->mi[0]->mbmi))
+ set_ref(cm, xd, 1, mi_row_pred, mi_col_pred);
+
+ if (!bextend) {
+ mbmi->tx_size = b_width_log2_lookup[bsize_top];
+ }
+
+ xd->plane[0].dst.stride = dst_stride[0];
+ xd->plane[1].dst.stride = dst_stride[1];
+ xd->plane[2].dst.stride = dst_stride[2];
+ xd->plane[0].dst.buf = dst_buf[0] +
+ (r >> xd->plane[0].subsampling_y) * dst_stride[0] +
+ (c >> xd->plane[0].subsampling_x);
+ xd->plane[1].dst.buf = dst_buf[1] +
+ (r >> xd->plane[1].subsampling_y) * dst_stride[1] +
+ (c >> xd->plane[1].subsampling_x);
+ xd->plane[2].dst.buf = dst_buf[2] +
+ (r >> xd->plane[2].subsampling_y) * dst_stride[2] +
+ (c >> xd->plane[2].subsampling_x);
+
+ if (!b_sub8x8)
+ vp10_build_inter_predictors_sb_extend(xd,
+#if CONFIG_EXT_INTER
+ mi_row_ori, mi_col_ori,
+#endif // CONFIG_EXT_INTER
+ mi_row_pred, mi_col_pred, bsize_pred);
+ else
+ vp10_build_inter_predictors_sb_sub8x8_extend(xd,
+#if CONFIG_EXT_INTER
+ mi_row_ori, mi_col_ori,
+#endif // CONFIG_EXT_INTER
+ mi_row_pred, mi_col_pred,
+ bsize_pred, block);
+}
+
+static void dec_extend_dir(VP10Decoder *const pbi, MACROBLOCKD *const xd,
+ const TileInfo *const tile, int block,
+ BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, int mi_row,
+ int mi_col, int mi_row_top, int mi_col_top,
+ uint8_t *dst_buf[3], int dst_stride[3], int dir) {
+ // dir: 0-lower, 1-upper, 2-left, 3-right
+ // 4-lowerleft, 5-upperleft, 6-lowerright, 7-upperright
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ int xss = xd->plane[1].subsampling_x;
+ int yss = xd->plane[1].subsampling_y;
+ int b_sub8x8 = (bsize < BLOCK_8X8) ? 1 : 0;
+ BLOCK_SIZE extend_bsize;
+ int unit, mi_row_pred, mi_col_pred;
+
+ if (dir == 0 || dir == 1) {
+ extend_bsize = (mi_width == 1 || bsize < BLOCK_8X8 || xss < yss)
+ ? BLOCK_8X8
+ : BLOCK_16X8;
+ unit = num_8x8_blocks_wide_lookup[extend_bsize];
+ mi_row_pred = mi_row + ((dir == 0) ? mi_height : -1);
+ mi_col_pred = mi_col;
+
+ dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, mi_row_pred,
+ mi_col_pred, mi_row_top, mi_col_top, dst_buf,
+ dst_stride, top_bsize, extend_bsize, b_sub8x8, 1);
+
+ if (mi_width > unit) {
+ int i;
+ assert(!b_sub8x8);
+ for (i = 0; i < mi_width / unit - 1; i++) {
+ mi_col_pred += unit;
+ dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, mi_row_pred,
+ mi_col_pred, mi_row_top, mi_col_top, dst_buf,
+ dst_stride, top_bsize, extend_bsize, b_sub8x8, 1);
+ }
+ }
+ } else if (dir == 2 || dir == 3) {
+ extend_bsize = (mi_height == 1 || bsize < BLOCK_8X8 || yss < xss)
+ ? BLOCK_8X8
+ : BLOCK_8X16;
+ unit = num_8x8_blocks_high_lookup[extend_bsize];
+ mi_row_pred = mi_row;
+ mi_col_pred = mi_col + ((dir == 3) ? mi_width : -1);
+
+ dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, mi_row_pred,
+ mi_col_pred, mi_row_top, mi_col_top, dst_buf,
+ dst_stride, top_bsize, extend_bsize, b_sub8x8, 1);
+
+ if (mi_height > unit) {
+ int i;
+ for (i = 0; i < mi_height / unit - 1; i++) {
+ mi_row_pred += unit;
+ dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, mi_row_pred,
+ mi_col_pred, mi_row_top, mi_col_top, dst_buf,
+ dst_stride, top_bsize, extend_bsize, b_sub8x8, 1);
+ }
+ }
+ } else {
+ extend_bsize = BLOCK_8X8;
+ mi_row_pred = mi_row + ((dir == 4 || dir == 6) ? mi_height : -1);
+ mi_col_pred = mi_col + ((dir == 6 || dir == 7) ? mi_width : -1);
+ dec_predict_b_extend(pbi, xd, tile, block, mi_row, mi_col, mi_row_pred,
+ mi_col_pred, mi_row_top, mi_col_top, dst_buf,
+ dst_stride, top_bsize, extend_bsize, b_sub8x8, 1);
+ }
+}
+
+static void dec_extend_all(VP10Decoder *const pbi, MACROBLOCKD *const xd,
+ const TileInfo *const tile, int block,
+ BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, int mi_row,
+ int mi_col, int mi_row_top, int mi_col_top,
+ uint8_t *dst_buf[3], int dst_stride[3]) {
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 0);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 1);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 2);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 3);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 4);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 5);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 6);
+ dec_extend_dir(pbi, xd, tile, block, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 7);
+}
+
+static void dec_predict_sb_complex(VP10Decoder *const pbi,
+ MACROBLOCKD *const xd,
+ const TileInfo *const tile, int mi_row,
+ int mi_col, int mi_row_top, int mi_col_top,
+ BLOCK_SIZE bsize, BLOCK_SIZE top_bsize,
+ uint8_t *dst_buf[3], int dst_stride[3]) {
+ const VP10_COMMON *const cm = &pbi->common;
+ const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
+ const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize);
+ const BLOCK_SIZE subsize = get_subsize(bsize, partition);
+#if CONFIG_EXT_PARTITION_TYPES
+ const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
+ int i;
+ const int mi_offset = mi_row * cm->mi_stride + mi_col;
+ uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3];
+
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf3[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
+ int dst_stride1[3] = { MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE };
+ int dst_stride2[3] = { MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE };
+ int dst_stride3[3] = { MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE };
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ int len = sizeof(uint16_t);
+ dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
+ dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_TX_SQUARE * len);
+ dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_TX_SQUARE * len);
+ dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
+ dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_TX_SQUARE * len);
+ dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_TX_SQUARE * len);
+ dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3);
+ dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAX_TX_SQUARE * len);
+ dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAX_TX_SQUARE * len);
+ } else {
+#endif
+ dst_buf1[0] = tmp_buf1;
+ dst_buf1[1] = tmp_buf1 + MAX_TX_SQUARE;
+ dst_buf1[2] = tmp_buf1 + 2 * MAX_TX_SQUARE;
+ dst_buf2[0] = tmp_buf2;
+ dst_buf2[1] = tmp_buf2 + MAX_TX_SQUARE;
+ dst_buf2[2] = tmp_buf2 + 2 * MAX_TX_SQUARE;
+ dst_buf3[0] = tmp_buf3;
+ dst_buf3[1] = tmp_buf3 + MAX_TX_SQUARE;
+ dst_buf3[2] = tmp_buf3 + 2 * MAX_TX_SQUARE;
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+ xd->mi = cm->mi_grid_visible + mi_offset;
+ xd->mi[0] = cm->mi + mi_offset;
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ }
+
+ switch (partition) {
+ case PARTITION_NONE:
+ assert(bsize < top_bsize);
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, bsize, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+ break;
+ case PARTITION_HORZ:
+ if (bsize == BLOCK_8X8) {
+ // For sub8x8, predict in 8x8 unit
+ // First half
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, BLOCK_8X8, 1, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+
+ // Second half
+ dec_predict_b_extend(pbi, xd, tile, 2, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ top_bsize, BLOCK_8X8, 1, 1);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 2, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+
+ // weighted average to smooth the boundary
+ xd->plane[0].dst.buf = dst_buf[0];
+ xd->plane[0].dst.stride = dst_stride[0];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[0], dst_stride[0], dst_buf1[0], dst_stride1[0], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_HORZ,
+ 0);
+ } else {
+ // First half
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 0);
+
+ if (mi_row + hbs < cm->mi_rows) {
+ // Second half
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col,
+ mi_row + hbs, mi_col, mi_row_top, mi_col_top,
+ dst_buf1, dst_stride1, top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1, 1);
+
+ // weighted average to smooth the boundary
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ }
+ }
+ break;
+ case PARTITION_VERT:
+ if (bsize == BLOCK_8X8) {
+ // First half
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, BLOCK_8X8, 1, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+
+ // Second half
+ dec_predict_b_extend(pbi, xd, tile, 1, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ top_bsize, BLOCK_8X8, 1, 1);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 1, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+
+ // Smooth
+ xd->plane[0].dst.buf = dst_buf[0];
+ xd->plane[0].dst.stride = dst_stride[0];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[0], dst_stride[0], dst_buf1[0], dst_stride1[0], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_VERT,
+ 0);
+ } else {
+ // First half
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 3);
+
+ // Second half
+ if (mi_col + hbs < cm->mi_cols) {
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col + hbs, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1, top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1, 2);
+
+ // Smooth
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_VERT, i);
+ }
+ }
+ }
+ break;
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8) {
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, BLOCK_8X8, 1, 0);
+ dec_predict_b_extend(pbi, xd, tile, 1, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ top_bsize, BLOCK_8X8, 1, 1);
+ dec_predict_b_extend(pbi, xd, tile, 2, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf2, dst_stride2,
+ top_bsize, BLOCK_8X8, 1, 1);
+ dec_predict_b_extend(pbi, xd, tile, 3, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf3, dst_stride3,
+ top_bsize, BLOCK_8X8, 1, 1);
+ if (bsize < top_bsize) {
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+ dec_extend_all(pbi, xd, tile, 1, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+ dec_extend_all(pbi, xd, tile, 2, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf2, dst_stride2);
+ dec_extend_all(pbi, xd, tile, 3, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf3, dst_stride3);
+ }
+ } else {
+ dec_predict_sb_complex(pbi, xd, tile, mi_row, mi_col, mi_row_top,
+ mi_col_top, subsize, top_bsize, dst_buf,
+ dst_stride);
+ if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols)
+ dec_predict_sb_complex(pbi, xd, tile, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, subsize, top_bsize,
+ dst_buf1, dst_stride1);
+ if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols)
+ dec_predict_sb_complex(pbi, xd, tile, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, subsize, top_bsize,
+ dst_buf2, dst_stride2);
+ if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols)
+ dec_predict_sb_complex(pbi, xd, tile, mi_row + hbs, mi_col + hbs,
+ mi_row_top, mi_col_top, subsize, top_bsize,
+ dst_buf3, dst_stride3);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ if (bsize == BLOCK_8X8 && i != 0)
+ continue; // Skip <4x4 chroma smoothing
+ if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_VERT, i);
+ if (mi_row + hbs < cm->mi_rows) {
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf2[i], dst_stride2[i], dst_buf3[i], dst_stride3[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_VERT, i);
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) {
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ }
+ break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col + hbs, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1, top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf2,
+ dst_stride2, top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf2, dst_stride2);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf2, dst_stride2,
+ 1);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_VERT,
+ i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_HORZ,
+ i);
+ }
+ break;
+ case PARTITION_VERT_A:
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1, top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col + hbs, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf2,
+ dst_stride2, top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf2,
+ dst_stride2);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf2,
+ dst_stride2, 2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_HORZ,
+ i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_VERT,
+ i);
+ }
+ break;
+ case PARTITION_HORZ_B:
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 0);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1, top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col + hbs,
+ mi_row + hbs, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2, top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row + hbs,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf2,
+ dst_stride2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf1[i];
+ xd->plane[i].dst.stride = dst_stride1[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf1[i], dst_stride1[i], dst_buf2[i], dst_stride2[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_VERT, i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_HORZ,
+ i);
+ }
+ break;
+ case PARTITION_VERT_B:
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, subsize, 0, 0);
+ if (bsize < top_bsize)
+ dec_extend_all(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride);
+ else
+ dec_extend_dir(pbi, xd, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, 3);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row, mi_col + hbs, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1, top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1);
+
+ dec_predict_b_extend(pbi, xd, tile, 0, mi_row + hbs, mi_col + hbs,
+ mi_row + hbs, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2, top_bsize, bsize2, 0, 0);
+ dec_extend_all(pbi, xd, tile, 0, bsize2, top_bsize, mi_row + hbs,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf2,
+ dst_stride2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf1[i];
+ xd->plane[i].dst.stride = dst_stride1[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf1[i], dst_stride1[i], dst_buf2[i], dst_stride2[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_VERT,
+ i);
+ }
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
+ default: assert(0);
+ }
+}
+
+static void set_segment_id_supertx(const VP10_COMMON *const cm,
+ const int mi_row, const int mi_col,
+ const BLOCK_SIZE bsize) {
+ const struct segmentation *seg = &cm->seg;
+ const int miw =
+ VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col);
+ const int mih =
+ VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row);
+ const int mi_offset = mi_row * cm->mi_stride + mi_col;
+ MODE_INFO **const mip = cm->mi_grid_visible + mi_offset;
+ int r, c;
+ int seg_id_supertx = MAX_SEGMENTS;
+
+ if (!seg->enabled) {
+ seg_id_supertx = 0;
+ } else {
+ // Find the minimum segment_id
+ for (r = 0; r < mih; r++)
+ for (c = 0; c < miw; c++)
+ seg_id_supertx =
+ VPXMIN(mip[r * cm->mi_stride + c]->mbmi.segment_id, seg_id_supertx);
+ assert(0 <= seg_id_supertx && seg_id_supertx < MAX_SEGMENTS);
+ }
+
+ // Assign the the segment_id back to segment_id_supertx
+ for (r = 0; r < mih; r++)
+ for (c = 0; c < miw; c++)
+ mip[r * cm->mi_stride + c]->mbmi.segment_id_supertx = seg_id_supertx;
+}
+#endif // CONFIG_SUPERTX
+
+static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif // CONFIG_SUPERTX
+ int mi_row, int mi_col, vp10_reader *r,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
+ BLOCK_SIZE bsize, int bwl, int bhl) {
+ VP10_COMMON *const cm = &pbi->common;
+ const int less8x8 = bsize < BLOCK_8X8;
+ const int bw = 1 << (bwl - 1);
+ const int bh = 1 << (bhl - 1);
+ const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
+ const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
+
+#if CONFIG_SUPERTX
+ MB_MODE_INFO *mbmi;
+ if (supertx_enabled) {
+ mbmi = set_mb_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis);
+ } else {
+ mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis, bwl,
+ bhl);
+ }
+#if CONFIG_EXT_PARTITION_TYPES
+ xd->mi[0]->mbmi.partition = partition;
+#endif
+ vp10_read_mode_info(pbi, xd, supertx_enabled, mi_row, mi_col, r, x_mis,
+ y_mis);
+#else
+ MB_MODE_INFO *mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis,
+ y_mis, bwl, bhl);
+#if CONFIG_EXT_PARTITION_TYPES
+ xd->mi[0]->mbmi.partition = partition;
+#endif
+ vp10_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis);
+#endif // CONFIG_SUPERTX
+
+ if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) {
+ const BLOCK_SIZE uv_subsize =
+ ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y];
+ if (uv_subsize == BLOCK_INVALID)
+ vpx_internal_error(xd->error_info, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid block size.");
+ }
+
+#if CONFIG_SUPERTX
+ mbmi->segment_id_supertx = MAX_SEGMENTS;
+
+ if (supertx_enabled) {
+ xd->corrupted |= vp10_reader_has_error(r);
+ return;
+ }
+#endif // CONFIG_SUPERTX
+
+ if (mbmi->skip) {
+ dec_reset_skip_context(xd);
+ }
+ if (!is_inter_block(mbmi)) {
+ int plane;
+ for (plane = 0; plane <= 1; ++plane) {
+ if (mbmi->palette_mode_info.palette_size[plane])
+ vp10_decode_palette_tokens(xd, plane, r);
+ }
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const TX_SIZE tx_size =
+ plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
+ : mbmi->tx_size;
+ const int num_4x4_w = pd->n4_w;
+ const int num_4x4_h = pd->n4_h;
+ const int stepr = num_4x4_blocks_high_txsize_lookup[tx_size];
+ const int stepc = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ int row, col;
+ const int max_blocks_wide =
+ num_4x4_w + (xd->mb_to_right_edge >= 0
+ ? 0
+ : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ const int max_blocks_high =
+ num_4x4_h + (xd->mb_to_bottom_edge >= 0
+ ? 0
+ : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+
+ for (row = 0; row < max_blocks_high; row += stepr)
+ for (col = 0; col < max_blocks_wide; col += stepc)
+ predict_and_reconstruct_intra_block(xd, r, mbmi, plane, row, col,
+ tx_size);
+ }
+ } else {
+ // Prediction
+ vp10_build_inter_predictors_sb(xd, mi_row, mi_col,
+ VPXMAX(bsize, BLOCK_8X8));
+#if CONFIG_OBMC
+ if (mbmi->motion_variation == OBMC_CAUSAL) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
+ int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+
+ assert(mbmi->sb_type >= BLOCK_8X8);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ int len = sizeof(uint16_t);
+ dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
+ dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
+ dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len);
+ dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
+ dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
+ dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len);
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ dst_buf1[0] = tmp_buf1;
+ dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
+ dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2;
+ dst_buf2[0] = tmp_buf2;
+ dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
+ dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2;
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vp10_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1,
+ dst_width1, dst_height1,
+ dst_stride1);
+ vp10_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
+ dst_width2, dst_height2, dst_stride2);
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row,
+ mi_col);
+ vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1,
+ dst_stride1, dst_buf2, dst_stride2);
+ }
+#endif // CONFIG_OBMC
+
+ // Reconstruction
+ if (!mbmi->skip) {
+ int eobtotal = 0;
+ int plane;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int num_4x4_w = pd->n4_w;
+ const int num_4x4_h = pd->n4_h;
+ int row, col;
+#if CONFIG_VAR_TX
+ // TODO(jingning): This can be simplified for decoder performance.
+ const BLOCK_SIZE plane_bsize =
+ get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), pd);
+ const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ int bw = num_4x4_blocks_wide_txsize_lookup[max_tx_size];
+ int bh = num_4x4_blocks_high_txsize_lookup[max_tx_size];
+ const int step = num_4x4_blocks_txsize_lookup[max_tx_size];
+ int block = 0;
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ const TX_SIZE tx_size =
+ plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
+ : mbmi->tx_size;
+
+ if (tx_size >= TX_SIZES) { // rect txsize is used
+ const int stepr = num_4x4_blocks_high_txsize_lookup[tx_size];
+ const int stepc = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int max_blocks_wide =
+ num_4x4_w +
+ (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >>
+ (5 + pd->subsampling_x));
+ const int max_blocks_high =
+ num_4x4_h +
+ (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >>
+ (5 + pd->subsampling_y));
+
+ for (row = 0; row < max_blocks_high; row += stepr)
+ for (col = 0; col < max_blocks_wide; col += stepc)
+ eobtotal += reconstruct_inter_block(xd, r, mbmi->segment_id,
+ plane, row, col, tx_size);
+ } else {
+#endif
+ for (row = 0; row < num_4x4_h; row += bh) {
+ for (col = 0; col < num_4x4_w; col += bw) {
+ decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize, block, row,
+ col, max_tx_size, &eobtotal);
+ block += step;
+ }
+ }
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ }
+#endif
+#else
+ const TX_SIZE tx_size =
+ plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
+ : mbmi->tx_size;
+ const int stepr = num_4x4_blocks_high_txsize_lookup[tx_size];
+ const int stepc = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int max_blocks_wide =
+ num_4x4_w + (xd->mb_to_right_edge >= 0
+ ? 0
+ : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ const int max_blocks_high =
+ num_4x4_h +
+ (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >>
+ (5 + pd->subsampling_y));
+
+ for (row = 0; row < max_blocks_high; row += stepr)
+ for (col = 0; col < max_blocks_wide; col += stepc)
+ eobtotal += reconstruct_inter_block(xd, r, mbmi->segment_id, plane,
+ row, col, tx_size);
+#endif
+ }
+
+ if (!less8x8 && eobtotal == 0)
+ mbmi->has_no_coeffs = 1; // skip loopfilter
+ }
+ }
+
+ xd->corrupted |= vp10_reader_has_error(r);
+}
+
+static INLINE int dec_partition_plane_context(const MACROBLOCKD *xd, int mi_row,
+ int mi_col, int bsl) {
+ const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col;
+ const PARTITION_CONTEXT *left_ctx =
+ xd->left_seg_context + (mi_row & MAX_MIB_MASK);
+ int above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1;
+
+ // assert(bsl >= 0);
+
+ return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
+}
+
+#if !CONFIG_EXT_PARTITION_TYPES
+static INLINE void dec_update_partition_context(MACROBLOCKD *xd, int mi_row,
+ int mi_col, BLOCK_SIZE subsize,
+ int bw) {
+ PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col;
+ PARTITION_CONTEXT *const left_ctx =
+ xd->left_seg_context + (mi_row & MAX_MIB_MASK);
+
+ // update the partition context at the end notes. set partition bits
+ // of block sizes larger than the current one to be one, and partition
+ // bits of smaller block sizes to be zero.
+ memset(above_ctx, partition_context_lookup[subsize].above, bw);
+ memset(left_ctx, partition_context_lookup[subsize].left, bw);
+}
+#endif // !CONFIG_EXT_PARTITION_TYPES
+
+static PARTITION_TYPE read_partition(VP10_COMMON *cm, MACROBLOCKD *xd,
+ int mi_row, int mi_col, vp10_reader *r,
+ int has_rows, int has_cols,
+#if CONFIG_EXT_PARTITION_TYPES
+ BLOCK_SIZE bsize,
+#endif
+ int bsl) {
+ const int ctx = dec_partition_plane_context(xd, mi_row, mi_col, bsl);
+ const vpx_prob *const probs = cm->fc->partition_prob[ctx];
+ FRAME_COUNTS *counts = xd->counts;
+ PARTITION_TYPE p;
+
+ if (has_rows && has_cols)
+#if CONFIG_EXT_PARTITION_TYPES
+ if (bsize <= BLOCK_8X8)
+ p = (PARTITION_TYPE)vp10_read_tree(r, vp10_partition_tree, probs);
+ else
+ p = (PARTITION_TYPE)vp10_read_tree(r, vp10_ext_partition_tree, probs);
+#else
+ p = (PARTITION_TYPE)vp10_read_tree(r, vp10_partition_tree, probs);
+#endif // CONFIG_EXT_PARTITION_TYPES
+ else if (!has_rows && has_cols)
+ p = vp10_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ;
+ else if (has_rows && !has_cols)
+ p = vp10_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT;
+ else
+ p = PARTITION_SPLIT;
+
+ if (counts) ++counts->partition[ctx][p];
+
+ return p;
+}
+
+#if CONFIG_SUPERTX
+static int read_skip(VP10_COMMON *cm, const MACROBLOCKD *xd, int segment_id,
+ vp10_reader *r) {
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
+ return 1;
+ } else {
+ const int ctx = vp10_get_skip_context(xd);
+ const int skip = vp10_read(r, cm->fc->skip_probs[ctx]);
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts) ++counts->skip[ctx][skip];
+ return skip;
+ }
+}
+#endif // CONFIG_SUPERTX
+
+// TODO(slavarnway): eliminate bsize and subsize in future commits
+static void decode_partition(VP10Decoder *const pbi, MACROBLOCKD *const xd,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif
+ int mi_row, int mi_col, vp10_reader *r,
+ BLOCK_SIZE bsize, int n4x4_l2) {
+ VP10_COMMON *const cm = &pbi->common;
+ const int n8x8_l2 = n4x4_l2 - 1;
+ const int num_8x8_wh = 1 << n8x8_l2;
+ const int hbs = num_8x8_wh >> 1;
+ PARTITION_TYPE partition;
+ BLOCK_SIZE subsize;
+#if CONFIG_EXT_PARTITION_TYPES
+ BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
+ const int has_rows = (mi_row + hbs) < cm->mi_rows;
+ const int has_cols = (mi_col + hbs) < cm->mi_cols;
+#if CONFIG_SUPERTX
+ const int read_token = !supertx_enabled;
+ int skip = 0;
+ TX_SIZE supertx_size = b_width_log2_lookup[bsize];
+ const TileInfo *const tile = &xd->tile;
+ int txfm = DCT_DCT;
+#endif // CONFIG_SUPERTX
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+ partition = read_partition(cm, xd, mi_row, mi_col, r, has_rows, has_cols,
+#if CONFIG_EXT_PARTITION_TYPES
+ bsize,
+#endif
+ n8x8_l2);
+ subsize = subsize_lookup[partition][bsize]; // get_subsize(bsize, partition);
+#if CONFIG_SUPERTX
+ if (!frame_is_intra_only(cm) && partition != PARTITION_NONE &&
+ bsize <= MAX_SUPERTX_BLOCK_SIZE && !supertx_enabled && !xd->lossless[0]) {
+ const int supertx_context = partition_supertx_context_lookup[partition];
+ supertx_enabled =
+ vp10_read(r, cm->fc->supertx_prob[supertx_context][supertx_size]);
+ if (xd->counts)
+ xd->counts->supertx[supertx_context][supertx_size][supertx_enabled]++;
+#if CONFIG_VAR_TX
+ if (supertx_enabled) xd->supertx_size = supertx_size;
+#endif
+ }
+#endif // CONFIG_SUPERTX
+ if (!hbs) {
+ // calculate bmode block dimensions (log 2)
+ xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT);
+ xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, r,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
+ subsize, 1, 1);
+ } else {
+ switch (partition) {
+ case PARTITION_NONE:
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, r,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
+ subsize, n4x4_l2, n4x4_l2);
+ break;
+ case PARTITION_HORZ:
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, r,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
+ subsize, n4x4_l2, n8x8_l2);
+ if (has_rows)
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row + hbs, mi_col, r,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
+ subsize, n4x4_l2, n8x8_l2);
+ break;
+ case PARTITION_VERT:
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, r,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
+ subsize, n8x8_l2, n4x4_l2);
+ if (has_cols)
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col + hbs, r,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif // CONFIG_EXT_PARTITION_TYPES
+ subsize, n8x8_l2, n4x4_l2);
+ break;
+ case PARTITION_SPLIT:
+ decode_partition(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, r, subsize, n8x8_l2);
+ decode_partition(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col + hbs, r, subsize, n8x8_l2);
+ decode_partition(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row + hbs, mi_col, r, subsize, n8x8_l2);
+ decode_partition(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row + hbs, mi_col + hbs, r, subsize, n8x8_l2);
+ break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col, r, partition, bsize2, n8x8_l2, n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col + hbs, r, partition, bsize2, n8x8_l2,
+ n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row + hbs, mi_col, r, partition, subsize, n4x4_l2,
+ n8x8_l2);
+ break;
+ case PARTITION_HORZ_B:
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col, r, partition, subsize, n4x4_l2, n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row + hbs, mi_col, r, partition, bsize2, n8x8_l2,
+ n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row + hbs, mi_col + hbs, r, partition, bsize2, n8x8_l2,
+ n8x8_l2);
+ break;
+ case PARTITION_VERT_A:
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col, r, partition, bsize2, n8x8_l2, n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row + hbs, mi_col, r, partition, bsize2, n8x8_l2,
+ n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col + hbs, r, partition, subsize, n8x8_l2,
+ n4x4_l2);
+ break;
+ case PARTITION_VERT_B:
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col, r, partition, subsize, n8x8_l2, n4x4_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row, mi_col + hbs, r, partition, bsize2, n8x8_l2,
+ n8x8_l2);
+ decode_block(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ mi_row + hbs, mi_col + hbs, r, partition, bsize2, n8x8_l2,
+ n8x8_l2);
+ break;
+#endif
+ default: assert(0 && "Invalid partition type");
+ }
+ }
+
+#if CONFIG_SUPERTX
+ if (supertx_enabled && read_token) {
+ uint8_t *dst_buf[3];
+ int dst_stride[3], i;
+ int offset = mi_row * cm->mi_stride + mi_col;
+
+ set_segment_id_supertx(cm, mi_row, mi_col, bsize);
+
+ xd->mi = cm->mi_grid_visible + offset;
+ xd->mi[0] = cm->mi + offset;
+ set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[bsize], mi_col,
+ num_8x8_blocks_wide_lookup[bsize], cm->mi_rows, cm->mi_cols);
+ set_skip_context(xd, mi_row, mi_col);
+ skip = read_skip(cm, xd, xd->mi[0]->mbmi.segment_id_supertx, r);
+ if (skip) {
+ reset_skip_context(xd, bsize);
+ } else {
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(supertx_size, bsize, 1) > 1) {
+ int eset = get_ext_tx_set(supertx_size, bsize, 1);
+ if (eset > 0) {
+ txfm = vp10_read_tree(r, vp10_ext_tx_inter_tree[eset],
+ cm->fc->inter_ext_tx_prob[eset][supertx_size]);
+ if (xd->counts) ++xd->counts->inter_ext_tx[eset][supertx_size][txfm];
+ }
+ }
+#else
+ if (supertx_size < TX_32X32) {
+ txfm = vp10_read_tree(r, vp10_ext_tx_tree,
+ cm->fc->inter_ext_tx_prob[supertx_size]);
+ if (xd->counts) ++xd->counts->inter_ext_tx[supertx_size][txfm];
+ }
+#endif // CONFIG_EXT_TX
+ }
+
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ dst_buf[i] = xd->plane[i].dst.buf;
+ dst_stride[i] = xd->plane[i].dst.stride;
+ }
+ dec_predict_sb_complex(pbi, xd, tile, mi_row, mi_col, mi_row, mi_col, bsize,
+ bsize, dst_buf, dst_stride);
+
+ if (!skip) {
+ int eobtotal = 0;
+ MB_MODE_INFO *mbmi;
+ set_offsets_topblock(cm, xd, tile, bsize, mi_row, mi_col);
+ mbmi = &xd->mi[0]->mbmi;
+ mbmi->tx_type = txfm;
+ assert(mbmi->segment_id_supertx != MAX_SEGMENTS);
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ const struct macroblockd_plane *const pd = &xd->plane[i];
+ const int num_4x4_w = pd->n4_w;
+ const int num_4x4_h = pd->n4_h;
+ int row, col;
+ const TX_SIZE tx_size =
+ i ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl) : mbmi->tx_size;
+ const int stepr = num_4x4_blocks_high_txsize_lookup[tx_size];
+ const int stepc = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int max_blocks_wide =
+ num_4x4_w + (xd->mb_to_right_edge >= 0
+ ? 0
+ : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ const int max_blocks_high =
+ num_4x4_h +
+ (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >>
+ (5 + pd->subsampling_y));
+
+ for (row = 0; row < max_blocks_high; row += stepr)
+ for (col = 0; col < max_blocks_wide; col += stepc)
+ eobtotal += reconstruct_inter_block(xd, r, mbmi->segment_id_supertx,
+ i, row, col, tx_size);
+ }
+ if (!(subsize < BLOCK_8X8) && eobtotal == 0) skip = 1;
+ }
+ set_param_topblock(cm, xd, bsize, mi_row, mi_col, txfm, skip);
+ }
+#endif // CONFIG_SUPERTX
+
+#if CONFIG_EXT_PARTITION_TYPES
+ if (bsize >= BLOCK_8X8) {
+ switch (partition) {
+ case PARTITION_SPLIT:
+ if (bsize > BLOCK_8X8) break;
+ case PARTITION_NONE:
+ case PARTITION_HORZ:
+ case PARTITION_VERT:
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+ break;
+ case PARTITION_HORZ_A:
+ update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
+ update_partition_context(xd, mi_row + hbs, mi_col, subsize, subsize);
+ break;
+ case PARTITION_HORZ_B:
+ update_partition_context(xd, mi_row, mi_col, subsize, subsize);
+ update_partition_context(xd, mi_row + hbs, mi_col, bsize2, subsize);
+ break;
+ case PARTITION_VERT_A:
+ update_partition_context(xd, mi_row, mi_col, bsize2, subsize);
+ update_partition_context(xd, mi_row, mi_col + hbs, subsize, subsize);
+ break;
+ case PARTITION_VERT_B:
+ update_partition_context(xd, mi_row, mi_col, subsize, subsize);
+ update_partition_context(xd, mi_row, mi_col + hbs, bsize2, subsize);
+ break;
+ default: assert(0 && "Invalid partition type");
+ }
+ }
+#else
+ // update partition context
+ if (bsize >= BLOCK_8X8 &&
+ (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
+ dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh);
+#if DERING_REFINEMENT
+ if (bsize == BLOCK_64X64) {
+ if (cm->dering_level != 0 && !sb_all_skip(cm, mi_row, mi_col)) {
+ cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.dering_gain =
+ vpx_read_literal(r, DERING_REFINEMENT_BITS);
+ } else {
+ cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.dering_gain =
+ 0;
+ }
+ }
+#endif // DERGING_REFINEMENT
+#endif // CONFIG_EXT_PARTITION_TYPES
+}
+
+#if !CONFIG_ANS
+static void setup_bool_decoder(const uint8_t *data, const uint8_t *data_end,
+ const size_t read_size,
+ struct vpx_internal_error_info *error_info,
+ vp10_reader *r, vpx_decrypt_cb decrypt_cb,
+ void *decrypt_state) {
+ // Validate the calculated partition length. If the buffer
+ // described by the partition can't be fully read, then restrict
+ // it to the portion that can be (for EC mode) or throw an error.
+ if (!read_is_valid(data, read_size, data_end))
+ vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt tile length");
+
+ if (vpx_reader_init(r, data, read_size, decrypt_cb, decrypt_state))
+ vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate bool decoder %d", 1);
+}
+#else
+static void setup_token_decoder(const uint8_t *data, const uint8_t *data_end,
+ const size_t read_size,
+ struct vpx_internal_error_info *error_info,
+ struct AnsDecoder *const ans,
+ vpx_decrypt_cb decrypt_cb,
+ void *decrypt_state) {
+ (void)decrypt_cb;
+ (void)decrypt_state;
+ // Validate the calculated partition length. If the buffer
+ // described by the partition can't be fully read, then restrict
+ // it to the portion that can be (for EC mode) or throw an error.
+ if (!read_is_valid(data, read_size, data_end))
+ vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt tile length");
+
+ if (read_size > INT_MAX || ans_read_init(ans, data, (int)read_size))
+ vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate token decoder %d", 1);
+}
+#endif
+
+static void read_coef_probs_common(vp10_coeff_probs_model *coef_probs,
+ vp10_reader *r) {
+ int i, j, k, l, m;
+
+ if (vp10_read_bit(r))
+ for (i = 0; i < PLANE_TYPES; ++i)
+ for (j = 0; j < REF_TYPES; ++j)
+ for (k = 0; k < COEF_BANDS; ++k)
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
+ for (m = 0; m < UNCONSTRAINED_NODES; ++m)
+ vp10_diff_update_prob(r, &coef_probs[i][j][k][l][m]);
+}
+
+static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode,
+ vp10_reader *r) {
+ const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
+ TX_SIZE tx_size;
+ for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
+ read_coef_probs_common(fc->coef_probs[tx_size], r);
+#if CONFIG_ANS
+ vp10_coef_pareto_cdfs(fc);
+#endif // CONFIG_ANS
+}
+
+static void setup_segmentation(VP10_COMMON *const cm,
+ struct vpx_read_bit_buffer *rb) {
+ struct segmentation *const seg = &cm->seg;
+ int i, j;
+
+ seg->update_map = 0;
+ seg->update_data = 0;
+
+ seg->enabled = vpx_rb_read_bit(rb);
+ if (!seg->enabled) return;
+
+ // Segmentation map update
+ if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
+ seg->update_map = 1;
+ } else {
+ seg->update_map = vpx_rb_read_bit(rb);
+ }
+ if (seg->update_map) {
+ if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
+ seg->temporal_update = 0;
+ } else {
+ seg->temporal_update = vpx_rb_read_bit(rb);
+ }
+ }
+
+ // Segmentation data update
+ seg->update_data = vpx_rb_read_bit(rb);
+ if (seg->update_data) {
+ seg->abs_delta = vpx_rb_read_bit(rb);
+
+ vp10_clearall_segfeatures(seg);
+
+ for (i = 0; i < MAX_SEGMENTS; i++) {
+ for (j = 0; j < SEG_LVL_MAX; j++) {
+ int data = 0;
+ const int feature_enabled = vpx_rb_read_bit(rb);
+ if (feature_enabled) {
+ vp10_enable_segfeature(seg, i, j);
+ data = decode_unsigned_max(rb, vp10_seg_feature_data_max(j));
+ if (vp10_is_segfeature_signed(j))
+ data = vpx_rb_read_bit(rb) ? -data : data;
+ }
+ vp10_set_segdata(seg, i, j, data);
+ }
+ }
+ }
+}
+
+#if CONFIG_LOOP_RESTORATION
+static void setup_restoration(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
+ int i;
+ RestorationInfo *rsi = &cm->rst_info;
+ int ntiles;
+ if (vpx_rb_read_bit(rb)) {
+ if (vpx_rb_read_bit(rb)) {
+ rsi->restoration_type = RESTORE_BILATERAL;
+ ntiles = vp10_get_restoration_ntiles(BILATERAL_TILESIZE, cm->width,
+ cm->height);
+ rsi->bilateral_level = (int *)vpx_realloc(
+ rsi->bilateral_level, sizeof(*rsi->bilateral_level) * ntiles);
+ assert(rsi->bilateral_level != NULL);
+ for (i = 0; i < ntiles; ++i) {
+ if (vpx_rb_read_bit(rb)) {
+ rsi->bilateral_level[i] =
+ vpx_rb_read_literal(rb, vp10_bilateral_level_bits(cm));
+ } else {
+ rsi->bilateral_level[i] = -1;
+ }
+ }
+ } else {
+ rsi->restoration_type = RESTORE_WIENER;
+ ntiles =
+ vp10_get_restoration_ntiles(WIENER_TILESIZE, cm->width, cm->height);
+ rsi->wiener_level = (int *)vpx_realloc(
+ rsi->wiener_level, sizeof(*rsi->wiener_level) * ntiles);
+ assert(rsi->wiener_level != NULL);
+ rsi->vfilter = (int(*)[RESTORATION_HALFWIN])vpx_realloc(
+ rsi->vfilter, sizeof(*rsi->vfilter) * ntiles);
+ assert(rsi->vfilter != NULL);
+ rsi->hfilter = (int(*)[RESTORATION_HALFWIN])vpx_realloc(
+ rsi->hfilter, sizeof(*rsi->hfilter) * ntiles);
+ assert(rsi->hfilter != NULL);
+ for (i = 0; i < ntiles; ++i) {
+ rsi->wiener_level[i] = vpx_rb_read_bit(rb);
+ if (rsi->wiener_level[i]) {
+ rsi->vfilter[i][0] = vpx_rb_read_literal(rb, WIENER_FILT_TAP0_BITS) +
+ WIENER_FILT_TAP0_MINV;
+ rsi->vfilter[i][1] = vpx_rb_read_literal(rb, WIENER_FILT_TAP1_BITS) +
+ WIENER_FILT_TAP1_MINV;
+ rsi->vfilter[i][2] = vpx_rb_read_literal(rb, WIENER_FILT_TAP2_BITS) +
+ WIENER_FILT_TAP2_MINV;
+ rsi->hfilter[i][0] = vpx_rb_read_literal(rb, WIENER_FILT_TAP0_BITS) +
+ WIENER_FILT_TAP0_MINV;
+ rsi->hfilter[i][1] = vpx_rb_read_literal(rb, WIENER_FILT_TAP1_BITS) +
+ WIENER_FILT_TAP1_MINV;
+ rsi->hfilter[i][2] = vpx_rb_read_literal(rb, WIENER_FILT_TAP2_BITS) +
+ WIENER_FILT_TAP2_MINV;
+ } else {
+ rsi->vfilter[i][0] = rsi->vfilter[i][1] = rsi->vfilter[i][2] = 0;
+ rsi->hfilter[i][0] = rsi->hfilter[i][1] = rsi->hfilter[i][2] = 0;
+ }
+ }
+ }
+ } else {
+ rsi->restoration_type = RESTORE_NONE;
+ }
+}
+#endif // CONFIG_LOOP_RESTORATION
+
+static void setup_loopfilter(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
+ struct loopfilter *lf = &cm->lf;
+ lf->filter_level = vpx_rb_read_literal(rb, 6);
+ lf->sharpness_level = vpx_rb_read_literal(rb, 3);
+
+ // Read in loop filter deltas applied at the MB level based on mode or ref
+ // frame.
+ lf->mode_ref_delta_update = 0;
+
+ lf->mode_ref_delta_enabled = vpx_rb_read_bit(rb);
+ if (lf->mode_ref_delta_enabled) {
+ lf->mode_ref_delta_update = vpx_rb_read_bit(rb);
+ if (lf->mode_ref_delta_update) {
+ int i;
+
+ for (i = 0; i < TOTAL_REFS_PER_FRAME; i++)
+ if (vpx_rb_read_bit(rb))
+ lf->ref_deltas[i] = vpx_rb_read_inv_signed_literal(rb, 6);
+
+ for (i = 0; i < MAX_MODE_LF_DELTAS; i++)
+ if (vpx_rb_read_bit(rb))
+ lf->mode_deltas[i] = vpx_rb_read_inv_signed_literal(rb, 6);
+ }
+ }
+}
+
+#if CONFIG_CLPF
+static void setup_clpf(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
+ cm->clpf = vpx_rb_read_literal(rb, 1);
+}
+#endif
+
+#if CONFIG_DERING
+static void setup_dering(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
+ cm->dering_level = vpx_rb_read_literal(rb, DERING_LEVEL_BITS);
+}
+#endif // CONFIG_DERING
+
+static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) {
+ return vpx_rb_read_bit(rb) ? vpx_rb_read_inv_signed_literal(rb, 6) : 0;
+}
+
+static void setup_quantization(VP10_COMMON *const cm,
+ struct vpx_read_bit_buffer *rb) {
+ cm->base_qindex = vpx_rb_read_literal(rb, QINDEX_BITS);
+ cm->y_dc_delta_q = read_delta_q(rb);
+ cm->uv_dc_delta_q = read_delta_q(rb);
+ cm->uv_ac_delta_q = read_delta_q(rb);
+ cm->dequant_bit_depth = cm->bit_depth;
+#if CONFIG_AOM_QM
+ cm->using_qmatrix = vpx_rb_read_bit(rb);
+ if (cm->using_qmatrix) {
+ cm->min_qmlevel = vpx_rb_read_literal(rb, QM_LEVEL_BITS);
+ cm->max_qmlevel = vpx_rb_read_literal(rb, QM_LEVEL_BITS);
+ } else {
+ cm->min_qmlevel = 0;
+ cm->max_qmlevel = 0;
+ }
+#endif
+}
+
+static void setup_segmentation_dequant(VP10_COMMON *const cm) {
+ // Build y/uv dequant values based on segmentation.
+ int i = 0;
+#if CONFIG_AOM_QM
+ int lossless;
+ int j = 0;
+ int qmlevel;
+ int using_qm = cm->using_qmatrix;
+ int minqm = cm->min_qmlevel;
+ int maxqm = cm->max_qmlevel;
+#endif
+#if CONFIG_NEW_QUANT
+ int b;
+ int dq;
+#endif // CONFIG_NEW_QUANT
+ if (cm->seg.enabled) {
+ for (i = 0; i < MAX_SEGMENTS; ++i) {
+ const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
+ cm->y_dequant[i][0] =
+ vp10_dc_quant(qindex, cm->y_dc_delta_q, cm->bit_depth);
+ cm->y_dequant[i][1] = vp10_ac_quant(qindex, 0, cm->bit_depth);
+ cm->uv_dequant[i][0] =
+ vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth);
+ cm->uv_dequant[i][1] =
+ vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth);
+#if CONFIG_AOM_QM
+ lossless = qindex == 0 && cm->y_dc_delta_q == 0 &&
+ cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
+ // NB: depends on base index so there is only 1 set per frame
+ // No quant weighting when lossless or signalled not using QM
+ qmlevel = (lossless || using_qm == 0)
+ ? NUM_QM_LEVELS - 1
+ : aom_get_qmlevel(cm->base_qindex, minqm, maxqm);
+ for (j = 0; j < TX_SIZES; ++j) {
+ cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmlevel, 0, j, 1);
+ cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmlevel, 0, j, 0);
+ cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmlevel, 1, j, 1);
+ cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmlevel, 1, j, 0);
+ }
+#endif // CONFIG_AOM_QM
+#if CONFIG_NEW_QUANT
+ for (dq = 0; dq < QUANT_PROFILES; dq++) {
+ for (b = 0; b < COEF_BANDS; ++b) {
+ vp10_get_dequant_val_nuq(cm->y_dequant[i][b != 0], qindex, b,
+ cm->y_dequant_nuq[i][dq][b], NULL, dq);
+ vp10_get_dequant_val_nuq(cm->uv_dequant[i][b != 0], qindex, b,
+ cm->uv_dequant_nuq[i][dq][b], NULL, dq);
+ }
+ }
+#endif // CONFIG_NEW_QUANT
+ }
+ } else {
+ const int qindex = cm->base_qindex;
+ // When segmentation is disabled, only the first value is used. The
+ // remaining are don't cares.
+ cm->y_dequant[0][0] =
+ vp10_dc_quant(qindex, cm->y_dc_delta_q, cm->bit_depth);
+ cm->y_dequant[0][1] = vp10_ac_quant(qindex, 0, cm->bit_depth);
+ cm->uv_dequant[0][0] =
+ vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth);
+ cm->uv_dequant[0][1] =
+ vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth);
+#if CONFIG_AOM_QM
+ lossless = qindex == 0 && cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 &&
+ cm->uv_ac_delta_q == 0;
+ // No quant weighting when lossless or signalled not using QM
+ qmlevel = (lossless || using_qm == 0)
+ ? NUM_QM_LEVELS - 1
+ : aom_get_qmlevel(cm->base_qindex, minqm, maxqm);
+ for (j = 0; j < TX_SIZES; ++j) {
+ cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmlevel, 0, j, 1);
+ cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmlevel, 0, j, 0);
+ cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmlevel, 1, j, 1);
+ cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmlevel, 1, j, 0);
+ }
+#endif
+#if CONFIG_NEW_QUANT
+ for (dq = 0; dq < QUANT_PROFILES; dq++) {
+ for (b = 0; b < COEF_BANDS; ++b) {
+ vp10_get_dequant_val_nuq(cm->y_dequant[0][b != 0], qindex, b,
+ cm->y_dequant_nuq[0][dq][b], NULL, dq);
+ vp10_get_dequant_val_nuq(cm->uv_dequant[0][b != 0], qindex, b,
+ cm->uv_dequant_nuq[0][dq][b], NULL, dq);
+ }
+ }
+#endif // CONFIG_NEW_QUANT
+ }
+}
+
+static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) {
+ return vpx_rb_read_bit(rb) ? SWITCHABLE
+ : vpx_rb_read_literal(rb, 2 + CONFIG_EXT_INTERP);
+}
+
+static void setup_render_size(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
+ cm->render_width = cm->width;
+ cm->render_height = cm->height;
+ if (vpx_rb_read_bit(rb))
+ vp10_read_frame_size(rb, &cm->render_width, &cm->render_height);
+}
+
+static void resize_mv_buffer(VP10_COMMON *cm) {
+ vpx_free(cm->cur_frame->mvs);
+ cm->cur_frame->mi_rows = cm->mi_rows;
+ cm->cur_frame->mi_cols = cm->mi_cols;
+ CHECK_MEM_ERROR(cm, cm->cur_frame->mvs,
+ (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
+ sizeof(*cm->cur_frame->mvs)));
+}
+
+static void resize_context_buffers(VP10_COMMON *cm, int width, int height) {
+#if CONFIG_SIZE_LIMIT
+ if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT)
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Dimensions of %dx%d beyond allowed size of %dx%d.",
+ width, height, DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT);
+#endif
+ if (cm->width != width || cm->height != height) {
+ const int new_mi_rows =
+ ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2) >> MI_SIZE_LOG2;
+ const int new_mi_cols =
+ ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2) >> MI_SIZE_LOG2;
+
+ // Allocations in vp10_alloc_context_buffers() depend on individual
+ // dimensions as well as the overall size.
+ if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) {
+ if (vp10_alloc_context_buffers(cm, width, height))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate context buffers");
+ } else {
+ vp10_set_mb_mi(cm, width, height);
+ }
+ vp10_init_context_buffers(cm);
+ cm->width = width;
+ cm->height = height;
+ }
+ if (cm->cur_frame->mvs == NULL || cm->mi_rows > cm->cur_frame->mi_rows ||
+ cm->mi_cols > cm->cur_frame->mi_cols) {
+ resize_mv_buffer(cm);
+ }
+}
+
+static void setup_frame_size(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
+ int width, height;
+ BufferPool *const pool = cm->buffer_pool;
+ vp10_read_frame_size(rb, &width, &height);
+ resize_context_buffers(cm, width, height);
+ setup_render_size(cm, rb);
+
+ lock_buffer_pool(pool);
+ if (vpx_realloc_frame_buffer(
+ get_frame_new_buffer(cm), cm->width, cm->height, cm->subsampling_x,
+ cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VPX_DEC_BORDER_IN_PIXELS, cm->byte_alignment,
+ &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb,
+ pool->cb_priv)) {
+ unlock_buffer_pool(pool);
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffer");
+ }
+ unlock_buffer_pool(pool);
+
+ pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x;
+ pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y;
+ pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
+ pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space;
+ pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range;
+ pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width;
+ pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
+}
+
+static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth,
+ int ref_xss, int ref_yss,
+ vpx_bit_depth_t this_bit_depth,
+ int this_xss, int this_yss) {
+ return ref_bit_depth == this_bit_depth && ref_xss == this_xss &&
+ ref_yss == this_yss;
+}
+
+static void setup_frame_size_with_refs(VP10_COMMON *cm,
+ struct vpx_read_bit_buffer *rb) {
+ int width, height;
+ int found = 0, i;
+ int has_valid_ref_frame = 0;
+ BufferPool *const pool = cm->buffer_pool;
+ for (i = 0; i < INTER_REFS_PER_FRAME; ++i) {
+ if (vpx_rb_read_bit(rb)) {
+ YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf;
+ width = buf->y_crop_width;
+ height = buf->y_crop_height;
+ cm->render_width = buf->render_width;
+ cm->render_height = buf->render_height;
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ vp10_read_frame_size(rb, &width, &height);
+ setup_render_size(cm, rb);
+ }
+
+ if (width <= 0 || height <= 0)
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid frame size");
+
+ // Check to make sure at least one of frames that this frame references
+ // has valid dimensions.
+ for (i = 0; i < INTER_REFS_PER_FRAME; ++i) {
+ RefBuffer *const ref_frame = &cm->frame_refs[i];
+ has_valid_ref_frame |=
+ valid_ref_frame_size(ref_frame->buf->y_crop_width,
+ ref_frame->buf->y_crop_height, width, height);
+ }
+ if (!has_valid_ref_frame)
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Referenced frame has invalid size");
+ for (i = 0; i < INTER_REFS_PER_FRAME; ++i) {
+ RefBuffer *const ref_frame = &cm->frame_refs[i];
+ if (!valid_ref_frame_img_fmt(ref_frame->buf->bit_depth,
+ ref_frame->buf->subsampling_x,
+ ref_frame->buf->subsampling_y, cm->bit_depth,
+ cm->subsampling_x, cm->subsampling_y))
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Referenced frame has incompatible color format");
+ }
+
+ resize_context_buffers(cm, width, height);
+
+ lock_buffer_pool(pool);
+ if (vpx_realloc_frame_buffer(
+ get_frame_new_buffer(cm), cm->width, cm->height, cm->subsampling_x,
+ cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VPX_DEC_BORDER_IN_PIXELS, cm->byte_alignment,
+ &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb,
+ pool->cb_priv)) {
+ unlock_buffer_pool(pool);
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffer");
+ }
+ unlock_buffer_pool(pool);
+
+ pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x;
+ pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y;
+ pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
+ pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space;
+ pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range;
+ pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width;
+ pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
+}
+
+static void read_tile_info(VP10Decoder *const pbi,
+ struct vpx_read_bit_buffer *const rb) {
+ VP10_COMMON *const cm = &pbi->common;
+#if CONFIG_EXT_TILE
+// Read the tile width/height
+#if CONFIG_EXT_PARTITION
+ if (cm->sb_size == BLOCK_128X128) {
+ cm->tile_width = vpx_rb_read_literal(rb, 5) + 1;
+ cm->tile_height = vpx_rb_read_literal(rb, 5) + 1;
+ } else
+#endif // CONFIG_EXT_PARTITION
+ {
+ cm->tile_width = vpx_rb_read_literal(rb, 6) + 1;
+ cm->tile_height = vpx_rb_read_literal(rb, 6) + 1;
+ }
+
+ cm->tile_width <<= cm->mib_size_log2;
+ cm->tile_height <<= cm->mib_size_log2;
+
+ cm->tile_width = VPXMIN(cm->tile_width, cm->mi_cols);
+ cm->tile_height = VPXMIN(cm->tile_height, cm->mi_rows);
+
+ // Get the number of tiles
+ cm->tile_cols = 1;
+ while (cm->tile_cols * cm->tile_width < cm->mi_cols) ++cm->tile_cols;
+
+ cm->tile_rows = 1;
+ while (cm->tile_rows * cm->tile_height < cm->mi_rows) ++cm->tile_rows;
+
+ if (cm->tile_cols * cm->tile_rows > 1) {
+ // Read the number of bytes used to store tile size
+ pbi->tile_col_size_bytes = vpx_rb_read_literal(rb, 2) + 1;
+ pbi->tile_size_bytes = vpx_rb_read_literal(rb, 2) + 1;
+ }
+#else
+ int min_log2_tile_cols, max_log2_tile_cols, max_ones;
+ vp10_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
+
+ // columns
+ max_ones = max_log2_tile_cols - min_log2_tile_cols;
+ cm->log2_tile_cols = min_log2_tile_cols;
+ while (max_ones-- && vpx_rb_read_bit(rb)) cm->log2_tile_cols++;
+
+ if (cm->log2_tile_cols > 6)
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid number of tile columns");
+
+ // rows
+ cm->log2_tile_rows = vpx_rb_read_bit(rb);
+ if (cm->log2_tile_rows) cm->log2_tile_rows += vpx_rb_read_bit(rb);
+
+ cm->tile_cols = 1 << cm->log2_tile_cols;
+ cm->tile_rows = 1 << cm->log2_tile_rows;
+
+ cm->tile_width = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
+ cm->tile_width >>= cm->log2_tile_cols;
+ cm->tile_height = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
+ cm->tile_height >>= cm->log2_tile_rows;
+
+ // round to integer multiples of superblock size
+ cm->tile_width = ALIGN_POWER_OF_TWO(cm->tile_width, MAX_MIB_SIZE_LOG2);
+ cm->tile_height = ALIGN_POWER_OF_TWO(cm->tile_height, MAX_MIB_SIZE_LOG2);
+
+ // tile size magnitude
+ if (cm->tile_rows > 1 || cm->tile_cols > 1) {
+ pbi->tile_size_bytes = vpx_rb_read_literal(rb, 2) + 1;
+ }
+#endif // CONFIG_EXT_TILE
+}
+
+static int mem_get_varsize(const uint8_t *src, const int sz) {
+ switch (sz) {
+ case 1: return src[0];
+ case 2: return mem_get_le16(src);
+ case 3: return mem_get_le24(src);
+ case 4: return mem_get_le32(src);
+ default: assert("Invalid size" && 0); return -1;
+ }
+}
+
+#if CONFIG_EXT_TILE
+// Reads the next tile returning its size and adjusting '*data' accordingly
+// based on 'is_last'.
+static void get_tile_buffer(const uint8_t *const data_end,
+ struct vpx_internal_error_info *error_info,
+ const uint8_t **data, vpx_decrypt_cb decrypt_cb,
+ void *decrypt_state,
+ TileBufferDec (*const tile_buffers)[MAX_TILE_COLS],
+ int tile_size_bytes, int col, int row) {
+ size_t size;
+
+ size_t copy_size = 0;
+ const uint8_t *copy_data = NULL;
+
+ if (!read_is_valid(*data, tile_size_bytes, data_end))
+ vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt tile length");
+ if (decrypt_cb) {
+ uint8_t be_data[4];
+ decrypt_cb(decrypt_state, *data, be_data, tile_size_bytes);
+
+ // Only read number of bytes in cm->tile_size_bytes.
+ size = mem_get_varsize(be_data, tile_size_bytes);
+ } else {
+ size = mem_get_varsize(*data, tile_size_bytes);
+ }
+
+ // The top bit indicates copy mode
+ if ((size >> (tile_size_bytes * 8 - 1)) == 1) {
+ // The remaining bits in the top byte signal the row offset
+ int offset = (size >> (tile_size_bytes - 1) * 8) & 0x7f;
+
+ // Currently, only use tiles in same column as reference tiles.
+ copy_data = tile_buffers[row - offset][col].data;
+ copy_size = tile_buffers[row - offset][col].size;
+ size = 0;
+ }
+
+ *data += tile_size_bytes;
+
+ if (size > (size_t)(data_end - *data))
+ vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt tile size");
+
+ if (size > 0) {
+ tile_buffers[row][col].data = *data;
+ tile_buffers[row][col].size = size;
+ } else {
+ tile_buffers[row][col].data = copy_data;
+ tile_buffers[row][col].size = copy_size;
+ }
+
+ *data += size;
+
+ tile_buffers[row][col].raw_data_end = *data;
+}
+
+static void get_tile_buffers(
+ VP10Decoder *pbi, const uint8_t *data, const uint8_t *data_end,
+ TileBufferDec (*const tile_buffers)[MAX_TILE_COLS]) {
+ VP10_COMMON *const cm = &pbi->common;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
+ const int have_tiles = tile_cols * tile_rows > 1;
+
+ if (!have_tiles) {
+ const uint32_t tile_size = data_end - data;
+ tile_buffers[0][0].data = data;
+ tile_buffers[0][0].size = tile_size;
+ tile_buffers[0][0].raw_data_end = NULL;
+ } else {
+ // We locate only the tile buffers that are required, which are the ones
+ // specified by pbi->dec_tile_col and pbi->dec_tile_row. Also, we always
+ // need the last (bottom right) tile buffer, as we need to know where the
+ // end of the compressed frame buffer is for proper superframe decoding.
+
+ const uint8_t *tile_col_data_end[MAX_TILE_COLS];
+ const uint8_t *const data_start = data;
+
+ const int dec_tile_row = VPXMIN(pbi->dec_tile_row, tile_rows);
+ const int single_row = pbi->dec_tile_row >= 0;
+ const int tile_rows_start = single_row ? dec_tile_row : 0;
+ const int tile_rows_end = single_row ? tile_rows_start + 1 : tile_rows;
+ const int dec_tile_col = VPXMIN(pbi->dec_tile_col, tile_cols);
+ const int single_col = pbi->dec_tile_col >= 0;
+ const int tile_cols_start = single_col ? dec_tile_col : 0;
+ const int tile_cols_end = single_col ? tile_cols_start + 1 : tile_cols;
+
+ const int tile_col_size_bytes = pbi->tile_col_size_bytes;
+ const int tile_size_bytes = pbi->tile_size_bytes;
+
+ size_t tile_col_size;
+ int r, c;
+
+ // Read tile column sizes for all columns (we need the last tile buffer)
+ for (c = 0; c < tile_cols; ++c) {
+ const int is_last = c == tile_cols - 1;
+ if (!is_last) {
+ tile_col_size = mem_get_varsize(data, tile_col_size_bytes);
+ data += tile_col_size_bytes;
+ tile_col_data_end[c] = data + tile_col_size;
+ } else {
+ tile_col_size = data_end - data;
+ tile_col_data_end[c] = data_end;
+ }
+ data += tile_col_size;
+ }
+
+ data = data_start;
+
+ // Read the required tile sizes.
+ for (c = tile_cols_start; c < tile_cols_end; ++c) {
+ const int is_last = c == tile_cols - 1;
+
+ if (c > 0) data = tile_col_data_end[c - 1];
+
+ if (!is_last) data += tile_col_size_bytes;
+
+ // Get the whole of the last column, otherwise stop at the required tile.
+ for (r = 0; r < (is_last ? tile_rows : tile_rows_end); ++r) {
+ tile_buffers[r][c].col = c;
+
+ get_tile_buffer(tile_col_data_end[c], &pbi->common.error, &data,
+ pbi->decrypt_cb, pbi->decrypt_state, tile_buffers,
+ tile_size_bytes, c, r);
+ }
+ }
+
+ // If we have not read the last column, then read it to get the last tile.
+ if (tile_cols_end != tile_cols) {
+ c = tile_cols - 1;
+
+ data = tile_col_data_end[c - 1];
+
+ for (r = 0; r < tile_rows; ++r) {
+ tile_buffers[r][c].col = c;
+
+ get_tile_buffer(tile_col_data_end[c], &pbi->common.error, &data,
+ pbi->decrypt_cb, pbi->decrypt_state, tile_buffers,
+ tile_size_bytes, c, r);
+ }
+ }
+ }
+}
+#else
+// Reads the next tile returning its size and adjusting '*data' accordingly
+// based on 'is_last'.
+static void get_tile_buffer(const uint8_t *const data_end,
+ const int tile_size_bytes, int is_last,
+ struct vpx_internal_error_info *error_info,
+ const uint8_t **data, vpx_decrypt_cb decrypt_cb,
+ void *decrypt_state, TileBufferDec *const buf) {
+ size_t size;
+
+ if (!is_last) {
+ if (!read_is_valid(*data, 4, data_end))
+ vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt tile length");
+
+ if (decrypt_cb) {
+ uint8_t be_data[4];
+ decrypt_cb(decrypt_state, *data, be_data, tile_size_bytes);
+ size = mem_get_varsize(be_data, tile_size_bytes);
+ } else {
+ size = mem_get_varsize(*data, tile_size_bytes);
+ }
+ *data += tile_size_bytes;
+
+ if (size > (size_t)(data_end - *data))
+ vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt tile size");
+ } else {
+ size = data_end - *data;
+ }
+
+ buf->data = *data;
+ buf->size = size;
+
+ *data += size;
+}
+
+static void get_tile_buffers(
+ VP10Decoder *pbi, const uint8_t *data, const uint8_t *data_end,
+ TileBufferDec (*const tile_buffers)[MAX_TILE_COLS]) {
+ VP10_COMMON *const cm = &pbi->common;
+ int r, c;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
+
+ for (r = 0; r < tile_rows; ++r) {
+ for (c = 0; c < tile_cols; ++c) {
+ const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1);
+ TileBufferDec *const buf = &tile_buffers[r][c];
+ buf->col = c;
+ get_tile_buffer(data_end, pbi->tile_size_bytes, is_last, &cm->error,
+ &data, pbi->decrypt_cb, pbi->decrypt_state, buf);
+ }
+ }
+}
+#endif // CONFIG_EXT_TILE
+
+static const uint8_t *decode_tiles(VP10Decoder *pbi, const uint8_t *data,
+ const uint8_t *data_end) {
+ VP10_COMMON *const cm = &pbi->common;
+ const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
+ const int n_tiles = tile_cols * tile_rows;
+ TileBufferDec (*const tile_buffers)[MAX_TILE_COLS] = pbi->tile_buffers;
+#if CONFIG_EXT_TILE
+ const int dec_tile_row = VPXMIN(pbi->dec_tile_row, tile_rows);
+ const int single_row = pbi->dec_tile_row >= 0;
+ const int tile_rows_start = single_row ? dec_tile_row : 0;
+ const int tile_rows_end = single_row ? dec_tile_row + 1 : tile_rows;
+ const int dec_tile_col = VPXMIN(pbi->dec_tile_col, tile_cols);
+ const int single_col = pbi->dec_tile_col >= 0;
+ const int tile_cols_start = single_col ? dec_tile_col : 0;
+ const int tile_cols_end = single_col ? tile_cols_start + 1 : tile_cols;
+ const int inv_col_order = pbi->inv_tile_order && !single_col;
+ const int inv_row_order = pbi->inv_tile_order && !single_row;
+#else
+ const int tile_rows_start = 0;
+ const int tile_rows_end = tile_rows;
+ const int tile_cols_start = 0;
+ const int tile_cols_end = tile_cols;
+ const int inv_col_order = pbi->inv_tile_order;
+ const int inv_row_order = pbi->inv_tile_order;
+#endif // CONFIG_EXT_TILE
+ int tile_row, tile_col;
+
+#if CONFIG_ENTROPY
+ cm->do_subframe_update = n_tiles == 1;
+#endif // CONFIG_ENTROPY
+
+ if (cm->lf.filter_level && !cm->skip_loop_filter &&
+ pbi->lf_worker.data1 == NULL) {
+ CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
+ vpx_memalign(32, sizeof(LFWorkerData)));
+ pbi->lf_worker.hook = (VPxWorkerHook)vp10_loop_filter_worker;
+ if (pbi->max_threads > 1 && !winterface->reset(&pbi->lf_worker)) {
+ vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
+ "Loop filter thread creation failed");
+ }
+ }
+
+ if (cm->lf.filter_level && !cm->skip_loop_filter) {
+ LFWorkerData *const lf_data = (LFWorkerData *)pbi->lf_worker.data1;
+ // Be sure to sync as we might be resuming after a failed frame decode.
+ winterface->sync(&pbi->lf_worker);
+ vp10_loop_filter_data_reset(lf_data, get_frame_new_buffer(cm), cm,
+ pbi->mb.plane);
+ }
+
+ assert(tile_rows <= MAX_TILE_ROWS);
+ assert(tile_cols <= MAX_TILE_COLS);
+
+ get_tile_buffers(pbi, data, data_end, tile_buffers);
+
+ if (pbi->tile_data == NULL || n_tiles != pbi->allocated_tiles) {
+ vpx_free(pbi->tile_data);
+ CHECK_MEM_ERROR(cm, pbi->tile_data,
+ vpx_memalign(32, n_tiles * (sizeof(*pbi->tile_data))));
+ pbi->allocated_tiles = n_tiles;
+ }
+
+ // Load all tile information into tile_data.
+ for (tile_row = tile_rows_start; tile_row < tile_rows_end; ++tile_row) {
+ for (tile_col = tile_cols_start; tile_col < tile_cols_end; ++tile_col) {
+ const TileBufferDec *const buf = &tile_buffers[tile_row][tile_col];
+ TileData *const td = pbi->tile_data + tile_cols * tile_row + tile_col;
+
+ td->cm = cm;
+ td->xd = pbi->mb;
+ td->xd.corrupted = 0;
+ td->xd.counts =
+ cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD
+ ? &cm->counts
+ : NULL;
+ vp10_zero(td->dqcoeff);
+ vp10_tile_init(&td->xd.tile, td->cm, tile_row, tile_col);
+#if !CONFIG_ANS
+ setup_bool_decoder(buf->data, data_end, buf->size, &cm->error,
+ &td->bit_reader, pbi->decrypt_cb, pbi->decrypt_state);
+#else
+ setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
+ &td->bit_reader, pbi->decrypt_cb, pbi->decrypt_state);
+#endif
+ vp10_init_macroblockd(cm, &td->xd, td->dqcoeff);
+ td->xd.plane[0].color_index_map = td->color_index_map[0];
+ td->xd.plane[1].color_index_map = td->color_index_map[1];
+ }
+ }
+
+ for (tile_row = tile_rows_start; tile_row < tile_rows_end; ++tile_row) {
+ const int row = inv_row_order ? tile_rows - 1 - tile_row : tile_row;
+ int mi_row = 0;
+ TileInfo tile_info;
+
+ vp10_tile_set_row(&tile_info, cm, row);
+
+ for (tile_col = tile_cols_start; tile_col < tile_cols_end; ++tile_col) {
+ const int col = inv_col_order ? tile_cols - 1 - tile_col : tile_col;
+ TileData *const td = pbi->tile_data + tile_cols * row + col;
+
+ vp10_tile_set_col(&tile_info, cm, col);
+
+ vp10_zero_above_context(cm, tile_info.mi_col_start, tile_info.mi_col_end);
+
+ for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
+ mi_row += cm->mib_size) {
+ int mi_col;
+
+ vp10_zero_left_context(&td->xd);
+
+ for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
+ mi_col += cm->mib_size) {
+ decode_partition(pbi, &td->xd,
+#if CONFIG_SUPERTX
+ 0,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, &td->bit_reader, cm->sb_size,
+ b_width_log2_lookup[cm->sb_size]);
+ }
+ pbi->mb.corrupted |= td->xd.corrupted;
+ if (pbi->mb.corrupted)
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Failed to decode tile data");
+#if CONFIG_ENTROPY
+ if (cm->do_subframe_update &&
+ cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+ if ((mi_row + MI_SIZE) %
+ (MI_SIZE *
+ VPXMAX(cm->mi_rows / MI_SIZE / COEF_PROBS_BUFS, 1)) ==
+ 0 &&
+ mi_row + MI_SIZE < cm->mi_rows &&
+ cm->coef_probs_update_idx < COEF_PROBS_BUFS - 1) {
+ vp10_partial_adapt_probs(cm, mi_row, mi_col);
+ ++cm->coef_probs_update_idx;
+ }
+ }
+#endif // CONFIG_ENTROPY
+ }
+ }
+
+ assert(mi_row > 0);
+
+#if !CONFIG_VAR_TX
+ // Loopfilter one tile row.
+ if (cm->lf.filter_level && !cm->skip_loop_filter) {
+ LFWorkerData *const lf_data = (LFWorkerData *)pbi->lf_worker.data1;
+ const int lf_start = VPXMAX(0, tile_info.mi_row_start - cm->mib_size);
+ const int lf_end = tile_info.mi_row_end - cm->mib_size;
+
+ // Delay the loopfilter if the first tile row is only
+ // a single superblock high.
+ if (lf_end <= 0) continue;
+
+ // Decoding has completed. Finish up the loop filter in this thread.
+ if (tile_info.mi_row_end >= cm->mi_rows) continue;
+
+ winterface->sync(&pbi->lf_worker);
+ lf_data->start = lf_start;
+ lf_data->stop = lf_end;
+ if (pbi->max_threads > 1) {
+ winterface->launch(&pbi->lf_worker);
+ } else {
+ winterface->execute(&pbi->lf_worker);
+ }
+ }
+
+ // After loopfiltering, the last 7 row pixels in each superblock row may
+ // still be changed by the longest loopfilter of the next superblock row.
+ if (cm->frame_parallel_decode)
+ vp10_frameworker_broadcast(pbi->cur_buf, mi_row << cm->mib_size_log2);
+#endif // !CONFIG_VAR_TX
+ }
+
+#if CONFIG_VAR_TX
+ // Loopfilter the whole frame.
+ vp10_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb,
+ cm->lf.filter_level, 0, 0);
+#else
+ // Loopfilter remaining rows in the frame.
+ if (cm->lf.filter_level && !cm->skip_loop_filter) {
+ LFWorkerData *const lf_data = (LFWorkerData *)pbi->lf_worker.data1;
+ winterface->sync(&pbi->lf_worker);
+ lf_data->start = lf_data->stop;
+ lf_data->stop = cm->mi_rows;
+ winterface->execute(&pbi->lf_worker);
+ }
+#endif // CONFIG_VAR_TX
+#if CONFIG_CLPF
+ if (cm->clpf && !cm->skip_loop_filter)
+ vp10_clpf_frame(&pbi->cur_buf->buf, cm, &pbi->mb);
+#endif
+#if CONFIG_DERING
+ if (cm->dering_level && !cm->skip_loop_filter) {
+ vp10_dering_frame(&pbi->cur_buf->buf, cm, &pbi->mb, cm->dering_level);
+ }
+#endif // CONFIG_DERING
+
+ if (cm->frame_parallel_decode)
+ vp10_frameworker_broadcast(pbi->cur_buf, INT_MAX);
+
+#if CONFIG_EXT_TILE
+ if (n_tiles == 1) {
+#if CONFIG_ANS
+ return data_end;
+#else
+ // Find the end of the single tile buffer
+ return vpx_reader_find_end(&pbi->tile_data->bit_reader);
+#endif // CONFIG_ANS
+ } else {
+ // Return the end of the last tile buffer
+ return tile_buffers[tile_rows - 1][tile_cols - 1].raw_data_end;
+ }
+#else
+#if CONFIG_ANS
+ return data_end;
+#else
+ {
+ // Get last tile data.
+ TileData *const td = pbi->tile_data + tile_cols * tile_rows - 1;
+ return vpx_reader_find_end(&td->bit_reader);
+ }
+#endif // CONFIG_ANS
+#endif // CONFIG_EXT_TILE
+}
+
+static int tile_worker_hook(TileWorkerData *const tile_data,
+ const TileInfo *const tile) {
+ VP10Decoder *const pbi = tile_data->pbi;
+ const VP10_COMMON *const cm = &pbi->common;
+ int mi_row, mi_col;
+
+ if (setjmp(tile_data->error_info.jmp)) {
+ tile_data->error_info.setjmp = 0;
+ tile_data->xd.corrupted = 1;
+ return 0;
+ }
+
+ tile_data->error_info.setjmp = 1;
+ tile_data->xd.error_info = &tile_data->error_info;
+
+ vp10_zero_above_context(&pbi->common, tile->mi_col_start, tile->mi_col_end);
+
+ for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
+ mi_row += cm->mib_size) {
+ vp10_zero_left_context(&tile_data->xd);
+
+ for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
+ mi_col += cm->mib_size) {
+ decode_partition(pbi, &tile_data->xd,
+#if CONFIG_SUPERTX
+ 0,
+#endif
+ mi_row, mi_col, &tile_data->bit_reader, cm->sb_size,
+ b_width_log2_lookup[cm->sb_size]);
+ }
+ }
+ return !tile_data->xd.corrupted;
+}
+
+// sorts in descending order
+static int compare_tile_buffers(const void *a, const void *b) {
+ const TileBufferDec *const buf1 = (const TileBufferDec *)a;
+ const TileBufferDec *const buf2 = (const TileBufferDec *)b;
+ return (int)(buf2->size - buf1->size);
+}
+
+static const uint8_t *decode_tiles_mt(VP10Decoder *pbi, const uint8_t *data,
+ const uint8_t *data_end) {
+ VP10_COMMON *const cm = &pbi->common;
+ const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
+ const int num_workers = VPXMIN(pbi->max_threads & ~1, tile_cols);
+ TileBufferDec (*const tile_buffers)[MAX_TILE_COLS] = pbi->tile_buffers;
+#if CONFIG_EXT_TILE
+ const int dec_tile_row = VPXMIN(pbi->dec_tile_row, tile_rows);
+ const int single_row = pbi->dec_tile_row >= 0;
+ const int tile_rows_start = single_row ? dec_tile_row : 0;
+ const int tile_rows_end = single_row ? dec_tile_row + 1 : tile_rows;
+ const int dec_tile_col = VPXMIN(pbi->dec_tile_col, tile_cols);
+ const int single_col = pbi->dec_tile_col >= 0;
+ const int tile_cols_start = single_col ? dec_tile_col : 0;
+ const int tile_cols_end = single_col ? tile_cols_start + 1 : tile_cols;
+#else
+ const int tile_rows_start = 0;
+ const int tile_rows_end = tile_rows;
+ const int tile_cols_start = 0;
+ const int tile_cols_end = tile_cols;
+#endif // CONFIG_EXT_TILE
+ int tile_row, tile_col;
+ int i;
+
+#if !(CONFIG_ANS || CONFIG_EXT_TILE)
+ int final_worker = -1;
+#endif // !(CONFIG_ANS || CONFIG_EXT_TILE)
+
+ assert(tile_rows <= MAX_TILE_ROWS);
+ assert(tile_cols <= MAX_TILE_COLS);
+
+ assert(tile_cols * tile_rows > 1);
+
+#if CONFIG_ANS
+ // TODO(any): This might just work now. Needs to be tested.
+ abort(); // FIXME: Tile parsing broken
+#endif // CONFIG_ANS
+
+ // TODO(jzern): See if we can remove the restriction of passing in max
+ // threads to the decoder.
+ if (pbi->num_tile_workers == 0) {
+ const int num_threads = pbi->max_threads & ~1;
+ CHECK_MEM_ERROR(cm, pbi->tile_workers,
+ vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
+ // Ensure tile data offsets will be properly aligned. This may fail on
+ // platforms without DECLARE_ALIGNED().
+ assert((sizeof(*pbi->tile_worker_data) % 16) == 0);
+ CHECK_MEM_ERROR(
+ cm, pbi->tile_worker_data,
+ vpx_memalign(32, num_threads * sizeof(*pbi->tile_worker_data)));
+ CHECK_MEM_ERROR(cm, pbi->tile_worker_info,
+ vpx_malloc(num_threads * sizeof(*pbi->tile_worker_info)));
+ for (i = 0; i < num_threads; ++i) {
+ VPxWorker *const worker = &pbi->tile_workers[i];
+ ++pbi->num_tile_workers;
+
+ winterface->init(worker);
+ if (i < num_threads - 1 && !winterface->reset(worker)) {
+ vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
+ "Tile decoder thread creation failed");
+ }
+ }
+ }
+
+ // Reset tile decoding hook
+ for (i = 0; i < num_workers; ++i) {
+ VPxWorker *const worker = &pbi->tile_workers[i];
+ winterface->sync(worker);
+ worker->hook = (VPxWorkerHook)tile_worker_hook;
+ worker->data1 = &pbi->tile_worker_data[i];
+ worker->data2 = &pbi->tile_worker_info[i];
+ }
+
+ // Initialize thread frame counts.
+ if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+ for (i = 0; i < num_workers; ++i) {
+ TileWorkerData *const twd = (TileWorkerData *)pbi->tile_workers[i].data1;
+ vp10_zero(twd->counts);
+ }
+ }
+
+ // Load tile data into tile_buffers
+ get_tile_buffers(pbi, data, data_end, tile_buffers);
+
+ for (tile_row = tile_rows_start; tile_row < tile_rows_end; ++tile_row) {
+ // Sort the buffers in this tile row based on size in descending order.
+ qsort(&tile_buffers[tile_row][tile_cols_start],
+ tile_cols_end - tile_cols_start, sizeof(tile_buffers[0][0]),
+ compare_tile_buffers);
+
+ // Rearrange the tile buffers in this tile row such that per-tile group
+ // the largest, and presumably the most difficult tile will be decoded in
+ // the main thread. This should help minimize the number of instances
+ // where the main thread is waiting for a worker to complete.
+ {
+ int group_start;
+ for (group_start = tile_cols_start; group_start < tile_cols_end;
+ group_start += num_workers) {
+ const int group_end = VPXMIN(group_start + num_workers, tile_cols);
+ const TileBufferDec largest = tile_buffers[tile_row][group_start];
+ memmove(&tile_buffers[tile_row][group_start],
+ &tile_buffers[tile_row][group_start + 1],
+ (group_end - group_start - 1) * sizeof(tile_buffers[0][0]));
+ tile_buffers[tile_row][group_end - 1] = largest;
+ }
+ }
+
+ for (tile_col = tile_cols_start; tile_col < tile_cols_end;) {
+ // Launch workers for individual columns
+ for (i = 0; i < num_workers && tile_col < tile_cols_end;
+ ++i, ++tile_col) {
+ TileBufferDec *const buf = &tile_buffers[tile_row][tile_col];
+ VPxWorker *const worker = &pbi->tile_workers[i];
+ TileWorkerData *const twd = (TileWorkerData *)worker->data1;
+ TileInfo *const tile_info = (TileInfo *)worker->data2;
+
+ twd->pbi = pbi;
+ twd->xd = pbi->mb;
+ twd->xd.corrupted = 0;
+ twd->xd.counts =
+ cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD
+ ? &twd->counts
+ : NULL;
+ vp10_zero(twd->dqcoeff);
+ vp10_tile_init(tile_info, cm, tile_row, buf->col);
+ vp10_tile_init(&twd->xd.tile, cm, tile_row, buf->col);
+#if !CONFIG_ANS
+ setup_bool_decoder(buf->data, data_end, buf->size, &cm->error,
+ &twd->bit_reader, pbi->decrypt_cb,
+ pbi->decrypt_state);
+#else
+ setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
+ &twd->bit_reader, pbi->decrypt_cb,
+ pbi->decrypt_state);
+#endif // CONFIG_ANS
+ vp10_init_macroblockd(cm, &twd->xd, twd->dqcoeff);
+ twd->xd.plane[0].color_index_map = twd->color_index_map[0];
+ twd->xd.plane[1].color_index_map = twd->color_index_map[1];
+
+ worker->had_error = 0;
+ if (i == num_workers - 1 || tile_col == tile_cols_end - 1) {
+ winterface->execute(worker);
+ } else {
+ winterface->launch(worker);
+ }
+
+#if !(CONFIG_ANS || CONFIG_EXT_TILE)
+ if (tile_row == tile_rows - 1 && buf->col == tile_cols - 1) {
+ final_worker = i;
+ }
+#endif // !(CONFIG_ANS || CONFIG_EXT_TILE)
+ }
+
+ // Sync all workers
+ for (; i > 0; --i) {
+ VPxWorker *const worker = &pbi->tile_workers[i - 1];
+ // TODO(jzern): The tile may have specific error data associated with
+ // its vpx_internal_error_info which could be propagated to the main
+ // info in cm. Additionally once the threads have been synced and an
+ // error is detected, there's no point in continuing to decode tiles.
+ pbi->mb.corrupted |= !winterface->sync(worker);
+ }
+ }
+ }
+
+ // Accumulate thread frame counts.
+ if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+ for (i = 0; i < num_workers; ++i) {
+ TileWorkerData *const twd = (TileWorkerData *)pbi->tile_workers[i].data1;
+ vp10_accumulate_frame_counts(cm, &twd->counts);
+ }
+ }
+
+#if CONFIG_EXT_TILE
+ // Return the end of the last tile buffer
+ return tile_buffers[tile_rows - 1][tile_cols - 1].raw_data_end;
+#else
+#if CONFIG_ANS
+ return data_end;
+#else
+ assert(final_worker != -1);
+ {
+ TileWorkerData *const twd =
+ (TileWorkerData *)pbi->tile_workers[final_worker].data1;
+ return vpx_reader_find_end(&twd->bit_reader);
+ }
+#endif // CONFIG_ANS
+#endif // CONFIG_EXT_TILE
+}
+
+static void error_handler(void *data) {
+ VP10_COMMON *const cm = (VP10_COMMON *)data;
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet");
+}
+
+static void read_bitdepth_colorspace_sampling(VP10_COMMON *cm,
+ struct vpx_read_bit_buffer *rb) {
+ if (cm->profile >= PROFILE_2) {
+ cm->bit_depth = vpx_rb_read_bit(rb) ? VPX_BITS_12 : VPX_BITS_10;
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth = 1;
+#endif
+ } else {
+ cm->bit_depth = VPX_BITS_8;
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth = 0;
+#endif
+ }
+ cm->color_space = vpx_rb_read_literal(rb, 3);
+ if (cm->color_space != VPX_CS_SRGB) {
+ // [16,235] (including xvycc) vs [0,255] range
+ cm->color_range = vpx_rb_read_bit(rb);
+ if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
+ cm->subsampling_x = vpx_rb_read_bit(rb);
+ cm->subsampling_y = vpx_rb_read_bit(rb);
+ if (cm->subsampling_x == 1 && cm->subsampling_y == 1)
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "4:2:0 color not supported in profile 1 or 3");
+ if (vpx_rb_read_bit(rb))
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Reserved bit set");
+ } else {
+ cm->subsampling_y = cm->subsampling_x = 1;
+ }
+ } else {
+ if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
+ // Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed.
+ // 4:2:2 or 4:4:0 chroma sampling is not allowed.
+ cm->subsampling_y = cm->subsampling_x = 0;
+ if (vpx_rb_read_bit(rb))
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Reserved bit set");
+ } else {
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "4:4:4 color not supported in profile 0 or 2");
+ }
+ }
+}
+
+static size_t read_uncompressed_header(VP10Decoder *pbi,
+ struct vpx_read_bit_buffer *rb) {
+ VP10_COMMON *const cm = &pbi->common;
+ MACROBLOCKD *const xd = &pbi->mb;
+ BufferPool *const pool = cm->buffer_pool;
+ RefCntBuffer *const frame_bufs = pool->frame_bufs;
+ int i, mask, ref_index = 0;
+ size_t sz;
+#if CONFIG_EXT_REFS
+ cm->last3_frame_type = cm->last2_frame_type;
+ cm->last2_frame_type = cm->last_frame_type;
+#endif // CONFIG_EXT_REFS
+ cm->last_frame_type = cm->frame_type;
+ cm->last_intra_only = cm->intra_only;
+
+#if CONFIG_EXT_REFS
+ // NOTE: By default all coded frames to be used as a reference
+ cm->is_reference_frame = 1;
+#endif // CONFIG_EXT_REFS
+
+ if (vpx_rb_read_literal(rb, 2) != VPX_FRAME_MARKER)
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Invalid frame marker");
+
+ cm->profile = vp10_read_profile(rb);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->profile >= MAX_PROFILES)
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Unsupported bitstream profile");
+#else
+ if (cm->profile >= PROFILE_2)
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Unsupported bitstream profile");
+#endif
+
+ cm->show_existing_frame = vpx_rb_read_bit(rb);
+
+ if (cm->show_existing_frame) {
+ // Show an existing frame directly.
+ const int frame_to_show = cm->ref_frame_map[vpx_rb_read_literal(rb, 3)];
+
+ lock_buffer_pool(pool);
+ if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) {
+ unlock_buffer_pool(pool);
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Buffer %d does not contain a decoded frame",
+ frame_to_show);
+ }
+ ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show);
+ unlock_buffer_pool(pool);
+
+ cm->lf.filter_level = 0;
+ cm->show_frame = 1;
+ pbi->refresh_frame_flags = 0;
+
+ if (cm->frame_parallel_decode) {
+ for (i = 0; i < REF_FRAMES; ++i)
+ cm->next_ref_frame_map[i] = cm->ref_frame_map[i];
+ }
+
+ return 0;
+ }
+
+ cm->frame_type = (FRAME_TYPE)vpx_rb_read_bit(rb);
+ cm->show_frame = vpx_rb_read_bit(rb);
+ cm->error_resilient_mode = vpx_rb_read_bit(rb);
+
+ if (cm->frame_type == KEY_FRAME) {
+ if (!vp10_read_sync_code(rb))
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Invalid frame sync code");
+
+ read_bitdepth_colorspace_sampling(cm, rb);
+ pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1;
+
+ for (i = 0; i < INTER_REFS_PER_FRAME; ++i) {
+ cm->frame_refs[i].idx = INVALID_IDX;
+ cm->frame_refs[i].buf = NULL;
+ }
+
+ setup_frame_size(cm, rb);
+ if (pbi->need_resync) {
+ memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
+ pbi->need_resync = 0;
+ }
+ if (frame_is_intra_only(cm))
+ cm->allow_screen_content_tools = vpx_rb_read_bit(rb);
+ } else {
+ cm->intra_only = cm->show_frame ? 0 : vpx_rb_read_bit(rb);
+
+ if (cm->error_resilient_mode) {
+ cm->reset_frame_context = RESET_FRAME_CONTEXT_ALL;
+ } else {
+ if (cm->intra_only) {
+ cm->reset_frame_context = vpx_rb_read_bit(rb)
+ ? RESET_FRAME_CONTEXT_ALL
+ : RESET_FRAME_CONTEXT_CURRENT;
+ } else {
+ cm->reset_frame_context = vpx_rb_read_bit(rb)
+ ? RESET_FRAME_CONTEXT_CURRENT
+ : RESET_FRAME_CONTEXT_NONE;
+ if (cm->reset_frame_context == RESET_FRAME_CONTEXT_CURRENT)
+ cm->reset_frame_context = vpx_rb_read_bit(rb)
+ ? RESET_FRAME_CONTEXT_ALL
+ : RESET_FRAME_CONTEXT_CURRENT;
+ }
+ }
+
+ if (cm->intra_only) {
+ if (!vp10_read_sync_code(rb))
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Invalid frame sync code");
+
+ read_bitdepth_colorspace_sampling(cm, rb);
+
+ pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES);
+ setup_frame_size(cm, rb);
+ if (pbi->need_resync) {
+ memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
+ pbi->need_resync = 0;
+ }
+ } else if (pbi->need_resync != 1) { /* Skip if need resync */
+ pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES);
+
+#if CONFIG_EXT_REFS
+ if (!pbi->refresh_frame_flags) {
+ // NOTE: "pbi->refresh_frame_flags == 0" indicates that the coded frame
+ // will not be used as a reference
+ cm->is_reference_frame = 0;
+ }
+#endif // CONFIG_EXT_REFS
+
+ for (i = 0; i < INTER_REFS_PER_FRAME; ++i) {
+ const int ref = vpx_rb_read_literal(rb, REF_FRAMES_LOG2);
+ const int idx = cm->ref_frame_map[ref];
+ RefBuffer *const ref_frame = &cm->frame_refs[i];
+ ref_frame->idx = idx;
+ ref_frame->buf = &frame_bufs[idx].buf;
+ cm->ref_frame_sign_bias[LAST_FRAME + i] = vpx_rb_read_bit(rb);
+ }
+
+ setup_frame_size_with_refs(cm, rb);
+
+ cm->allow_high_precision_mv = vpx_rb_read_bit(rb);
+ cm->interp_filter = read_interp_filter(rb);
+
+ for (i = 0; i < INTER_REFS_PER_FRAME; ++i) {
+ RefBuffer *const ref_buf = &cm->frame_refs[i];
+#if CONFIG_VP9_HIGHBITDEPTH
+ vp10_setup_scale_factors_for_frame(
+ &ref_buf->sf, ref_buf->buf->y_crop_width,
+ ref_buf->buf->y_crop_height, cm->width, cm->height,
+ cm->use_highbitdepth);
+#else
+ vp10_setup_scale_factors_for_frame(
+ &ref_buf->sf, ref_buf->buf->y_crop_width,
+ ref_buf->buf->y_crop_height, cm->width, cm->height);
+#endif
+ }
+ }
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ get_frame_new_buffer(cm)->bit_depth = cm->bit_depth;
+#endif
+ get_frame_new_buffer(cm)->color_space = cm->color_space;
+ get_frame_new_buffer(cm)->color_range = cm->color_range;
+ get_frame_new_buffer(cm)->render_width = cm->render_width;
+ get_frame_new_buffer(cm)->render_height = cm->render_height;
+
+ if (pbi->need_resync) {
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Keyframe / intra-only frame required to reset decoder"
+ " state");
+ }
+
+ if (!cm->error_resilient_mode) {
+ cm->refresh_frame_context = vpx_rb_read_bit(rb)
+ ? REFRESH_FRAME_CONTEXT_FORWARD
+ : REFRESH_FRAME_CONTEXT_BACKWARD;
+ } else {
+ cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_FORWARD;
+ }
+
+ // This flag will be overridden by the call to vp10_setup_past_independence
+ // below, forcing the use of context 0 for those frame types.
+ cm->frame_context_idx = vpx_rb_read_literal(rb, FRAME_CONTEXTS_LOG2);
+
+ // Generate next_ref_frame_map.
+ lock_buffer_pool(pool);
+ for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
+ if (mask & 1) {
+ cm->next_ref_frame_map[ref_index] = cm->new_fb_idx;
+ ++frame_bufs[cm->new_fb_idx].ref_count;
+ } else {
+ cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index];
+ }
+ // Current thread holds the reference frame.
+ if (cm->ref_frame_map[ref_index] >= 0)
+ ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count;
+ ++ref_index;
+ }
+
+ for (; ref_index < REF_FRAMES; ++ref_index) {
+ cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index];
+
+ // Current thread holds the reference frame.
+ if (cm->ref_frame_map[ref_index] >= 0)
+ ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count;
+ }
+ unlock_buffer_pool(pool);
+ pbi->hold_ref_buf = 1;
+
+ if (frame_is_intra_only(cm) || cm->error_resilient_mode)
+ vp10_setup_past_independence(cm);
+
+#if CONFIG_EXT_PARTITION
+ set_sb_size(cm, vpx_rb_read_bit(rb) ? BLOCK_128X128 : BLOCK_64X64);
+#else
+ set_sb_size(cm, BLOCK_64X64);
+#endif // CONFIG_EXT_PARTITION
+
+ setup_loopfilter(cm, rb);
+#if CONFIG_CLPF
+ setup_clpf(cm, rb);
+#endif
+#if CONFIG_DERING
+ setup_dering(cm, rb);
+#endif
+#if CONFIG_LOOP_RESTORATION
+ setup_restoration(cm, rb);
+#endif // CONFIG_LOOP_RESTORATION
+ setup_quantization(cm, rb);
+#if CONFIG_VP9_HIGHBITDEPTH
+ xd->bd = (int)cm->bit_depth;
+#endif
+
+#if CONFIG_ENTROPY
+ vp10_default_coef_probs(cm);
+ if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
+ cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL) {
+ for (i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc;
+ } else if (cm->reset_frame_context == RESET_FRAME_CONTEXT_CURRENT) {
+ cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+ }
+#endif // CONFIG_ENTROPY
+
+ setup_segmentation(cm, rb);
+
+ {
+ int i;
+ for (i = 0; i < MAX_SEGMENTS; ++i) {
+ const int qindex = cm->seg.enabled
+ ? vp10_get_qindex(&cm->seg, i, cm->base_qindex)
+ : cm->base_qindex;
+ xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
+ cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
+ }
+ }
+
+ setup_segmentation_dequant(cm);
+ cm->tx_mode =
+ (!cm->seg.enabled && xd->lossless[0]) ? ONLY_4X4 : read_tx_mode(rb);
+ cm->reference_mode = read_frame_reference_mode(cm, rb);
+
+ read_tile_info(pbi, rb);
+ sz = vpx_rb_read_literal(rb, 16);
+
+ if (sz == 0)
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid header size");
+
+ return sz;
+}
+
+#if CONFIG_EXT_TX
+static void read_ext_tx_probs(FRAME_CONTEXT *fc, vp10_reader *r) {
+ int i, j, k;
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ if (vp10_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_inter_ext_tx_for_txsize[s][i]) continue;
+ for (j = 0; j < num_ext_tx_set_inter[s] - 1; ++j)
+ vp10_diff_update_prob(r, &fc->inter_ext_tx_prob[s][i][j]);
+ }
+ }
+ }
+
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ if (vp10_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_intra_ext_tx_for_txsize[s][i]) continue;
+ for (j = 0; j < INTRA_MODES; ++j)
+ for (k = 0; k < num_ext_tx_set_intra[s] - 1; ++k)
+ vp10_diff_update_prob(r, &fc->intra_ext_tx_prob[s][i][j][k]);
+ }
+ }
+ }
+}
+
+#else
+
+static void read_ext_tx_probs(FRAME_CONTEXT *fc, vp10_reader *r) {
+ int i, j, k;
+ if (vp10_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ for (j = 0; j < TX_TYPES; ++j)
+ for (k = 0; k < TX_TYPES - 1; ++k)
+ vp10_diff_update_prob(r, &fc->intra_ext_tx_prob[i][j][k]);
+ }
+ }
+ if (vp10_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ for (k = 0; k < TX_TYPES - 1; ++k)
+ vp10_diff_update_prob(r, &fc->inter_ext_tx_prob[i][k]);
+ }
+ }
+}
+#endif // CONFIG_EXT_TX
+
+#if CONFIG_SUPERTX
+static void read_supertx_probs(FRAME_CONTEXT *fc, vp10_reader *r) {
+ int i, j;
+ if (vp10_read(r, GROUP_DIFF_UPDATE_PROB)) {
+ for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
+ for (j = 1; j < TX_SIZES; ++j) {
+ vp10_diff_update_prob(r, &fc->supertx_prob[i][j]);
+ }
+ }
+ }
+}
+#endif // CONFIG_SUPERTX
+
+#if CONFIG_GLOBAL_MOTION
+static void read_global_motion_params(Global_Motion_Params *params,
+ vpx_prob *probs, vp10_reader *r) {
+ GLOBAL_MOTION_TYPE gmtype =
+ vp10_read_tree(r, vp10_global_motion_types_tree, probs);
+ params->gmtype = gmtype;
+ params->motion_params.wmtype = gm_to_trans_type(gmtype);
+ switch (gmtype) {
+ case GLOBAL_ZERO: break;
+ case GLOBAL_AFFINE:
+ params->motion_params.wmmat[4] =
+ (vp10_read_primitive_symmetric(r, GM_ABS_ALPHA_BITS) *
+ GM_ALPHA_DECODE_FACTOR);
+ params->motion_params.wmmat[5] =
+ vp10_read_primitive_symmetric(r, GM_ABS_ALPHA_BITS) *
+ GM_ALPHA_DECODE_FACTOR +
+ (1 << WARPEDMODEL_PREC_BITS);
+ // fallthrough intended
+ case GLOBAL_ROTZOOM:
+ params->motion_params.wmmat[2] =
+ (vp10_read_primitive_symmetric(r, GM_ABS_ALPHA_BITS) *
+ GM_ALPHA_DECODE_FACTOR) +
+ (1 << WARPEDMODEL_PREC_BITS);
+ params->motion_params.wmmat[3] =
+ vp10_read_primitive_symmetric(r, GM_ABS_ALPHA_BITS) *
+ GM_ALPHA_DECODE_FACTOR;
+ // fallthrough intended
+ case GLOBAL_TRANSLATION:
+ params->motion_params.wmmat[0] =
+ vp10_read_primitive_symmetric(r, GM_ABS_TRANS_BITS) *
+ GM_TRANS_DECODE_FACTOR;
+ params->motion_params.wmmat[1] =
+ vp10_read_primitive_symmetric(r, GM_ABS_TRANS_BITS) *
+ GM_TRANS_DECODE_FACTOR;
+ break;
+ default: assert(0);
+ }
+}
+
+static void read_global_motion(VP10_COMMON *cm, vp10_reader *r) {
+ int frame;
+ memset(cm->global_motion, 0, sizeof(cm->global_motion));
+ for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
+ read_global_motion_params(&cm->global_motion[frame],
+ cm->fc->global_motion_types_prob, r);
+ }
+}
+#endif // CONFIG_GLOBAL_MOTION
+
+static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data,
+ size_t partition_size) {
+ VP10_COMMON *const cm = &pbi->common;
+#if CONFIG_SUPERTX
+ MACROBLOCKD *const xd = &pbi->mb;
+#endif
+ FRAME_CONTEXT *const fc = cm->fc;
+ vp10_reader r;
+ int k, i, j;
+
+#if !CONFIG_ANS
+ if (vpx_reader_init(&r, data, partition_size, pbi->decrypt_cb,
+ pbi->decrypt_state))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate bool decoder 0");
+#else
+ if (ans_read_init(&r, data, partition_size))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate compressed header ANS decoder");
+#endif // !CONFIG_ANS
+
+ if (cm->tx_mode == TX_MODE_SELECT) {
+ for (i = 0; i < TX_SIZES - 1; ++i)
+ for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
+ for (k = 0; k < i + 1; ++k)
+ vp10_diff_update_prob(&r, &fc->tx_size_probs[i][j][k]);
+ }
+
+ read_coef_probs(fc, cm->tx_mode, &r);
+
+#if CONFIG_VAR_TX
+ for (k = 0; k < TXFM_PARTITION_CONTEXTS; ++k)
+ vp10_diff_update_prob(&r, &fc->txfm_partition_prob[k]);
+#endif
+
+ for (k = 0; k < SKIP_CONTEXTS; ++k)
+ vp10_diff_update_prob(&r, &fc->skip_probs[k]);
+
+ if (cm->seg.enabled && cm->seg.update_map) {
+ if (cm->seg.temporal_update) {
+ for (k = 0; k < PREDICTION_PROBS; k++)
+ vp10_diff_update_prob(&r, &cm->fc->seg.pred_probs[k]);
+ }
+ for (k = 0; k < MAX_SEGMENTS - 1; k++)
+ vp10_diff_update_prob(&r, &cm->fc->seg.tree_probs[k]);
+ }
+
+ for (j = 0; j < INTRA_MODES; j++)
+ for (i = 0; i < INTRA_MODES - 1; ++i)
+ vp10_diff_update_prob(&r, &fc->uv_mode_prob[j][i]);
+
+#if CONFIG_EXT_PARTITION_TYPES
+ for (i = 0; i < PARTITION_TYPES - 1; ++i)
+ vp10_diff_update_prob(&r, &fc->partition_prob[0][i]);
+ for (j = 1; j < PARTITION_CONTEXTS; ++j)
+ for (i = 0; i < EXT_PARTITION_TYPES - 1; ++i)
+ vp10_diff_update_prob(&r, &fc->partition_prob[j][i]);
+#else
+ for (j = 0; j < PARTITION_CONTEXTS; ++j)
+ for (i = 0; i < PARTITION_TYPES - 1; ++i)
+ vp10_diff_update_prob(&r, &fc->partition_prob[j][i]);
+#endif // CONFIG_EXT_PARTITION_TYPES
+
+#if CONFIG_EXT_INTRA
+ for (i = 0; i < INTRA_FILTERS + 1; ++i)
+ for (j = 0; j < INTRA_FILTERS - 1; ++j)
+ vp10_diff_update_prob(&r, &fc->intra_filter_probs[i][j]);
+#endif // CONFIG_EXT_INTRA
+
+ if (frame_is_intra_only(cm)) {
+ vp10_copy(cm->kf_y_prob, vp10_kf_y_mode_prob);
+ for (k = 0; k < INTRA_MODES; k++)
+ for (j = 0; j < INTRA_MODES; j++)
+ for (i = 0; i < INTRA_MODES - 1; ++i)
+ vp10_diff_update_prob(&r, &cm->kf_y_prob[k][j][i]);
+ } else {
+#if !CONFIG_REF_MV
+ nmv_context *const nmvc = &fc->nmvc;
+#endif
+
+ read_inter_mode_probs(fc, &r);
+
+#if CONFIG_EXT_INTER
+ read_inter_compound_mode_probs(fc, &r);
+ if (cm->reference_mode != COMPOUND_REFERENCE) {
+ for (i = 0; i < BLOCK_SIZE_GROUPS; i++) {
+ if (is_interintra_allowed_bsize_group(i)) {
+ vp10_diff_update_prob(&r, &fc->interintra_prob[i]);
+ }
+ }
+ for (i = 0; i < BLOCK_SIZE_GROUPS; i++) {
+ for (j = 0; j < INTERINTRA_MODES - 1; j++)
+ vp10_diff_update_prob(&r, &fc->interintra_mode_prob[i][j]);
+ }
+ for (i = 0; i < BLOCK_SIZES; i++) {
+ if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i)) {
+ vp10_diff_update_prob(&r, &fc->wedge_interintra_prob[i]);
+ }
+ }
+ }
+ if (cm->reference_mode != SINGLE_REFERENCE) {
+ for (i = 0; i < BLOCK_SIZES; i++) {
+ if (is_interinter_wedge_used(i)) {
+ vp10_diff_update_prob(&r, &fc->wedge_interinter_prob[i]);
+ }
+ }
+ }
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ for (i = BLOCK_8X8; i < BLOCK_SIZES; ++i) {
+ for (j = 0; j < MOTION_VARIATIONS - 1; ++j)
+ vp10_diff_update_prob(&r, &fc->motvar_prob[i][j]);
+ }
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+ if (cm->interp_filter == SWITCHABLE) read_switchable_interp_probs(fc, &r);
+
+ for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
+ vp10_diff_update_prob(&r, &fc->intra_inter_prob[i]);
+
+ if (cm->reference_mode != SINGLE_REFERENCE)
+ setup_compound_reference_mode(cm);
+
+ read_frame_reference_mode_probs(cm, &r);
+
+ for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
+ for (i = 0; i < INTRA_MODES - 1; ++i)
+ vp10_diff_update_prob(&r, &fc->y_mode_prob[j][i]);
+
+#if CONFIG_REF_MV
+ for (i = 0; i < NMV_CONTEXTS; ++i)
+ read_mv_probs(&fc->nmvc[i], cm->allow_high_precision_mv, &r);
+#else
+ read_mv_probs(nmvc, cm->allow_high_precision_mv, &r);
+#endif
+ read_ext_tx_probs(fc, &r);
+#if CONFIG_SUPERTX
+ if (!xd->lossless[0]) read_supertx_probs(fc, &r);
+#endif
+#if CONFIG_GLOBAL_MOTION
+ read_global_motion(cm, &r);
+#endif // CONFIG_GLOBAL_MOTION
+ }
+
+ return vp10_reader_has_error(&r);
+}
+
+#ifdef NDEBUG
+#define debug_check_frame_counts(cm) (void)0
+#else // !NDEBUG
+// Counts should only be incremented when frame_parallel_decoding_mode and
+// error_resilient_mode are disabled.
+static void debug_check_frame_counts(const VP10_COMMON *const cm) {
+ FRAME_COUNTS zero_counts;
+ vp10_zero(zero_counts);
+ assert(cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_BACKWARD ||
+ cm->error_resilient_mode);
+ assert(!memcmp(cm->counts.y_mode, zero_counts.y_mode,
+ sizeof(cm->counts.y_mode)));
+ assert(!memcmp(cm->counts.uv_mode, zero_counts.uv_mode,
+ sizeof(cm->counts.uv_mode)));
+ assert(!memcmp(cm->counts.partition, zero_counts.partition,
+ sizeof(cm->counts.partition)));
+ assert(!memcmp(cm->counts.coef, zero_counts.coef, sizeof(cm->counts.coef)));
+ assert(!memcmp(cm->counts.eob_branch, zero_counts.eob_branch,
+ sizeof(cm->counts.eob_branch)));
+ assert(!memcmp(cm->counts.switchable_interp, zero_counts.switchable_interp,
+ sizeof(cm->counts.switchable_interp)));
+ assert(!memcmp(cm->counts.inter_mode, zero_counts.inter_mode,
+ sizeof(cm->counts.inter_mode)));
+#if CONFIG_EXT_INTER
+ assert(!memcmp(cm->counts.inter_compound_mode,
+ zero_counts.inter_compound_mode,
+ sizeof(cm->counts.inter_compound_mode)));
+ assert(!memcmp(cm->counts.interintra, zero_counts.interintra,
+ sizeof(cm->counts.interintra)));
+ assert(!memcmp(cm->counts.wedge_interintra, zero_counts.wedge_interintra,
+ sizeof(cm->counts.wedge_interintra)));
+ assert(!memcmp(cm->counts.wedge_interinter, zero_counts.wedge_interinter,
+ sizeof(cm->counts.wedge_interinter)));
+#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ assert(!memcmp(cm->counts.motvar, zero_counts.motvar,
+ sizeof(cm->counts.motvar)));
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ assert(!memcmp(cm->counts.intra_inter, zero_counts.intra_inter,
+ sizeof(cm->counts.intra_inter)));
+ assert(!memcmp(cm->counts.comp_inter, zero_counts.comp_inter,
+ sizeof(cm->counts.comp_inter)));
+ assert(!memcmp(cm->counts.single_ref, zero_counts.single_ref,
+ sizeof(cm->counts.single_ref)));
+ assert(!memcmp(cm->counts.comp_ref, zero_counts.comp_ref,
+ sizeof(cm->counts.comp_ref)));
+#if CONFIG_EXT_REFS
+ assert(!memcmp(cm->counts.comp_bwdref, zero_counts.comp_bwdref,
+ sizeof(cm->counts.comp_bwdref)));
+#endif // CONFIG_EXT_REFS
+ assert(!memcmp(&cm->counts.tx_size, &zero_counts.tx_size,
+ sizeof(cm->counts.tx_size)));
+ assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
+#if CONFIG_REF_MV
+ assert(
+ !memcmp(&cm->counts.mv[0], &zero_counts.mv[0], sizeof(cm->counts.mv[0])));
+ assert(
+ !memcmp(&cm->counts.mv[1], &zero_counts.mv[1], sizeof(cm->counts.mv[0])));
+#else
+ assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv)));
+#endif
+ assert(!memcmp(cm->counts.inter_ext_tx, zero_counts.inter_ext_tx,
+ sizeof(cm->counts.inter_ext_tx)));
+ assert(!memcmp(cm->counts.intra_ext_tx, zero_counts.intra_ext_tx,
+ sizeof(cm->counts.intra_ext_tx)));
+}
+#endif // NDEBUG
+
+static struct vpx_read_bit_buffer *init_read_bit_buffer(
+ VP10Decoder *pbi, struct vpx_read_bit_buffer *rb, const uint8_t *data,
+ const uint8_t *data_end, uint8_t clear_data[MAX_VPX_HEADER_SIZE]) {
+ rb->bit_offset = 0;
+ rb->error_handler = error_handler;
+ rb->error_handler_data = &pbi->common;
+ if (pbi->decrypt_cb) {
+ const int n = (int)VPXMIN(MAX_VPX_HEADER_SIZE, data_end - data);
+ pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n);
+ rb->bit_buffer = clear_data;
+ rb->bit_buffer_end = clear_data + n;
+ } else {
+ rb->bit_buffer = data;
+ rb->bit_buffer_end = data_end;
+ }
+ return rb;
+}
+
+//------------------------------------------------------------------------------
+
+int vp10_read_sync_code(struct vpx_read_bit_buffer *const rb) {
+ return vpx_rb_read_literal(rb, 8) == VP10_SYNC_CODE_0 &&
+ vpx_rb_read_literal(rb, 8) == VP10_SYNC_CODE_1 &&
+ vpx_rb_read_literal(rb, 8) == VP10_SYNC_CODE_2;
+}
+
+void vp10_read_frame_size(struct vpx_read_bit_buffer *rb, int *width,
+ int *height) {
+ *width = vpx_rb_read_literal(rb, 16) + 1;
+ *height = vpx_rb_read_literal(rb, 16) + 1;
+}
+
+BITSTREAM_PROFILE vp10_read_profile(struct vpx_read_bit_buffer *rb) {
+ int profile = vpx_rb_read_bit(rb);
+ profile |= vpx_rb_read_bit(rb) << 1;
+ if (profile > 2) profile += vpx_rb_read_bit(rb);
+ return (BITSTREAM_PROFILE)profile;
+}
+
+void vp10_decode_frame(VP10Decoder *pbi, const uint8_t *data,
+ const uint8_t *data_end, const uint8_t **p_data_end) {
+ VP10_COMMON *const cm = &pbi->common;
+ MACROBLOCKD *const xd = &pbi->mb;
+ struct vpx_read_bit_buffer rb;
+ int context_updated = 0;
+ uint8_t clear_data[MAX_VPX_HEADER_SIZE];
+ const size_t first_partition_size = read_uncompressed_header(
+ pbi, init_read_bit_buffer(pbi, &rb, data, data_end, clear_data));
+ YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
+ xd->cur_buf = new_fb;
+#if CONFIG_GLOBAL_MOTION
+ xd->global_motion = cm->global_motion;
+#endif // CONFIG_GLOBAL_MOTION
+
+ if (!first_partition_size) {
+// showing a frame directly
+#if CONFIG_EXT_REFS
+ if (cm->show_existing_frame)
+ *p_data_end = data + vpx_rb_bytes_read(&rb);
+ else
+#endif // CONFIG_EXT_REFS
+ *p_data_end = data + (cm->profile <= PROFILE_2 ? 1 : 2);
+
+ return;
+ }
+
+ data += vpx_rb_bytes_read(&rb);
+ if (!read_is_valid(data, first_partition_size, data_end))
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt header length");
+
+ cm->use_prev_frame_mvs =
+ !cm->error_resilient_mode && cm->width == cm->last_width &&
+ cm->height == cm->last_height && !cm->last_intra_only &&
+ cm->last_show_frame && (cm->last_frame_type != KEY_FRAME);
+#if CONFIG_EXT_REFS
+ // NOTE(zoeliu): As cm->prev_frame can take neither a frame of
+ // show_exisiting_frame=1, nor can it take a frame not used as
+ // a reference, it is probable that by the time it is being
+ // referred to, the frame buffer it originally points to may
+ // already get expired and have been reassigned to the current
+ // newly coded frame. Hence, we need to check whether this is
+ // the case, and if yes, we have 2 choices:
+ // (1) Simply disable the use of previous frame mvs; or
+ // (2) Have cm->prev_frame point to one reference frame buffer,
+ // e.g. LAST_FRAME.
+ if (cm->use_prev_frame_mvs && !dec_is_ref_frame_buf(pbi, cm->prev_frame)) {
+ // Reassign the LAST_FRAME buffer to cm->prev_frame.
+ RefBuffer *last_fb_ref_buf = &cm->frame_refs[LAST_FRAME - LAST_FRAME];
+ cm->prev_frame = &cm->buffer_pool->frame_bufs[last_fb_ref_buf->idx];
+ }
+#endif // CONFIG_EXT_REFS
+
+ vp10_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
+
+ *cm->fc = cm->frame_contexts[cm->frame_context_idx];
+ if (!cm->fc->initialized)
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Uninitialized entropy context.");
+
+ vp10_zero(cm->counts);
+
+ xd->corrupted = 0;
+ new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size);
+ if (new_fb->corrupted)
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Decode failed. Frame data header is corrupted.");
+
+ if (cm->lf.filter_level && !cm->skip_loop_filter) {
+ vp10_loop_filter_frame_init(cm, cm->lf.filter_level);
+ }
+
+ // If encoded in frame parallel mode, frame context is ready after decoding
+ // the frame header.
+ if (cm->frame_parallel_decode &&
+ cm->refresh_frame_context != REFRESH_FRAME_CONTEXT_BACKWARD) {
+ VPxWorker *const worker = pbi->frame_worker_owner;
+ FrameWorkerData *const frame_worker_data = worker->data1;
+ if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_FORWARD) {
+ context_updated = 1;
+ cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+ }
+ vp10_frameworker_lock_stats(worker);
+ pbi->cur_buf->row = -1;
+ pbi->cur_buf->col = -1;
+ frame_worker_data->frame_context_ready = 1;
+ // Signal the main thread that context is ready.
+ vp10_frameworker_signal_stats(worker);
+ vp10_frameworker_unlock_stats(worker);
+ }
+
+#if CONFIG_ENTROPY
+ vp10_copy(cm->starting_coef_probs, cm->fc->coef_probs);
+ cm->coef_probs_update_idx = 0;
+#endif // CONFIG_ENTROPY
+
+ if (pbi->max_threads > 1
+#if CONFIG_EXT_TILE
+ && pbi->dec_tile_col < 0 // Decoding all columns
+#endif // CONFIG_EXT_TILE
+ && cm->tile_cols > 1) {
+ // Multi-threaded tile decoder
+ *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
+ if (!xd->corrupted) {
+ if (!cm->skip_loop_filter) {
+ // If multiple threads are used to decode tiles, then we use those
+ // threads to do parallel loopfiltering.
+ vp10_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane,
+ cm->lf.filter_level, 0, 0, pbi->tile_workers,
+ pbi->num_tile_workers, &pbi->lf_row_sync);
+ }
+ } else {
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Decode failed. Frame data is corrupted.");
+ }
+ } else {
+ *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
+ }
+#if CONFIG_LOOP_RESTORATION
+ if (cm->rst_info.restoration_type != RESTORE_NONE) {
+ vp10_loop_restoration_init(&cm->rst_internal, &cm->rst_info,
+ cm->frame_type == KEY_FRAME, cm->width,
+ cm->height);
+ vp10_loop_restoration_rows(new_fb, cm, 0, cm->mi_rows, 0);
+ }
+#endif // CONFIG_LOOP_RESTORATION
+
+ if (!xd->corrupted) {
+ if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+#if CONFIG_ENTROPY
+ cm->partial_prob_update = 0;
+#endif // CONFIG_ENTROPY
+ vp10_adapt_coef_probs(cm);
+ vp10_adapt_intra_frame_probs(cm);
+
+ if (!frame_is_intra_only(cm)) {
+ vp10_adapt_inter_frame_probs(cm);
+ vp10_adapt_mv_probs(cm, cm->allow_high_precision_mv);
+ }
+ } else {
+ debug_check_frame_counts(cm);
+ }
+ } else {
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Decode failed. Frame data is corrupted.");
+ }
+
+ // Non frame parallel update frame context here.
+ if (!cm->error_resilient_mode && !context_updated)
+ cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+}
diff --git a/av1/decoder/decodeframe.h b/av1/decoder/decodeframe.h
new file mode 100644
index 0000000..7fdff0b
--- /dev/null
+++ b/av1/decoder/decodeframe.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_DECODER_DECODEFRAME_H_
+#define VP10_DECODER_DECODEFRAME_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP10Decoder;
+struct vpx_read_bit_buffer;
+
+int vp10_read_sync_code(struct vpx_read_bit_buffer *const rb);
+void vp10_read_frame_size(struct vpx_read_bit_buffer *rb, int *width,
+ int *height);
+BITSTREAM_PROFILE vp10_read_profile(struct vpx_read_bit_buffer *rb);
+
+void vp10_decode_frame(struct VP10Decoder *pbi, const uint8_t *data,
+ const uint8_t *data_end, const uint8_t **p_data_end);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_DECODER_DECODEFRAME_H_
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
new file mode 100644
index 0000000..ef776a0
--- /dev/null
+++ b/av1/decoder/decodemv.c
@@ -0,0 +1,1782 @@
+/*
+ Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/common.h"
+#include "av1/common/entropy.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/entropymv.h"
+#include "av1/common/mvref_common.h"
+#include "av1/common/pred_common.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/seg_common.h"
+
+#include "av1/decoder/decodemv.h"
+#include "av1/decoder/decodeframe.h"
+
+#include "aom_dsp/vpx_dsp_common.h"
+
+static INLINE int read_uniform(vp10_reader *r, int n) {
+ int l = get_unsigned_bits(n);
+ int m = (1 << l) - n;
+ int v = vp10_read_literal(r, l - 1);
+
+ assert(l != 0);
+
+ if (v < m)
+ return v;
+ else
+ return (v << 1) - m + vp10_read_literal(r, 1);
+}
+
+static PREDICTION_MODE read_intra_mode(vp10_reader *r, const vpx_prob *p) {
+ return (PREDICTION_MODE)vp10_read_tree(r, vp10_intra_mode_tree, p);
+}
+
+static PREDICTION_MODE read_intra_mode_y(VP10_COMMON *cm, MACROBLOCKD *xd,
+ vp10_reader *r, int size_group) {
+ const PREDICTION_MODE y_mode =
+ read_intra_mode(r, cm->fc->y_mode_prob[size_group]);
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts) ++counts->y_mode[size_group][y_mode];
+ return y_mode;
+}
+
+static PREDICTION_MODE read_intra_mode_uv(VP10_COMMON *cm, MACROBLOCKD *xd,
+ vp10_reader *r,
+ PREDICTION_MODE y_mode) {
+ const PREDICTION_MODE uv_mode =
+ read_intra_mode(r, cm->fc->uv_mode_prob[y_mode]);
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts) ++counts->uv_mode[y_mode][uv_mode];
+ return uv_mode;
+}
+
+#if CONFIG_EXT_INTER
+static INTERINTRA_MODE read_interintra_mode(VP10_COMMON *cm, MACROBLOCKD *xd,
+ vp10_reader *r, int size_group) {
+ const INTERINTRA_MODE ii_mode = (INTERINTRA_MODE)vp10_read_tree(
+ r, vp10_interintra_mode_tree, cm->fc->interintra_mode_prob[size_group]);
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts) ++counts->interintra_mode[size_group][ii_mode];
+ return ii_mode;
+}
+#endif // CONFIG_EXT_INTER
+
+static PREDICTION_MODE read_inter_mode(VP10_COMMON *cm, MACROBLOCKD *xd,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ MB_MODE_INFO *mbmi,
+#endif
+ vp10_reader *r, int16_t ctx) {
+#if CONFIG_REF_MV
+ FRAME_COUNTS *counts = xd->counts;
+ int16_t mode_ctx = ctx & NEWMV_CTX_MASK;
+ vpx_prob mode_prob = cm->fc->newmv_prob[mode_ctx];
+
+ if (vp10_read(r, mode_prob) == 0) {
+ if (counts) ++counts->newmv_mode[mode_ctx][0];
+
+#if CONFIG_EXT_INTER
+ if (has_second_ref(mbmi)) {
+#endif // CONFIG_EXT_INTER
+ return NEWMV;
+#if CONFIG_EXT_INTER
+ } else {
+ mode_prob = cm->fc->new2mv_prob;
+ if (vp10_read(r, mode_prob) == 0) {
+ if (counts) ++counts->new2mv_mode[0];
+ return NEWMV;
+ } else {
+ if (counts) ++counts->new2mv_mode[1];
+ return NEWFROMNEARMV;
+ }
+ }
+#endif // CONFIG_EXT_INTER
+ }
+ if (counts) ++counts->newmv_mode[mode_ctx][1];
+
+ if (ctx & (1 << ALL_ZERO_FLAG_OFFSET)) return ZEROMV;
+
+ mode_ctx = (ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
+
+ mode_prob = cm->fc->zeromv_prob[mode_ctx];
+ if (vp10_read(r, mode_prob) == 0) {
+ if (counts) ++counts->zeromv_mode[mode_ctx][0];
+ return ZEROMV;
+ }
+ if (counts) ++counts->zeromv_mode[mode_ctx][1];
+
+ mode_ctx = (ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
+
+ if (ctx & (1 << SKIP_NEARESTMV_OFFSET)) mode_ctx = 6;
+ if (ctx & (1 << SKIP_NEARMV_OFFSET)) mode_ctx = 7;
+ if (ctx & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) mode_ctx = 8;
+
+ mode_prob = cm->fc->refmv_prob[mode_ctx];
+
+ if (vp10_read(r, mode_prob) == 0) {
+ if (counts) ++counts->refmv_mode[mode_ctx][0];
+
+ return NEARESTMV;
+ } else {
+ if (counts) ++counts->refmv_mode[mode_ctx][1];
+ return NEARMV;
+ }
+
+ // Invalid prediction mode.
+ assert(0);
+#else
+ const int mode =
+ vp10_read_tree(r, vp10_inter_mode_tree, cm->fc->inter_mode_probs[ctx]);
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts) ++counts->inter_mode[ctx][mode];
+
+ return NEARESTMV + mode;
+#endif
+}
+
+#if CONFIG_REF_MV
+static void read_drl_idx(const VP10_COMMON *cm, MACROBLOCKD *xd,
+ MB_MODE_INFO *mbmi, vp10_reader *r) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+ mbmi->ref_mv_idx = 0;
+
+ if (mbmi->mode == NEWMV) {
+ int idx;
+ for (idx = 0; idx < 2; ++idx) {
+ if (xd->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], idx);
+ vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx];
+ if (!vp10_read(r, drl_prob)) {
+ mbmi->ref_mv_idx = idx;
+ if (xd->counts) ++xd->counts->drl_mode[drl_ctx][0];
+ return;
+ }
+ mbmi->ref_mv_idx = idx + 1;
+ if (xd->counts) ++xd->counts->drl_mode[drl_ctx][1];
+ }
+ }
+ }
+
+ if (mbmi->mode == NEARMV) {
+ int idx;
+ // Offset the NEARESTMV mode.
+ // TODO(jingning): Unify the two syntax decoding loops after the NEARESTMV
+ // mode is factored in.
+ for (idx = 1; idx < 3; ++idx) {
+ if (xd->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx = vp10_drl_ctx(xd->ref_mv_stack[ref_frame_type], idx);
+ vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx];
+ if (!vp10_read(r, drl_prob)) {
+ mbmi->ref_mv_idx = idx - 1;
+ if (xd->counts) ++xd->counts->drl_mode[drl_ctx][0];
+ return;
+ }
+ mbmi->ref_mv_idx = idx;
+ if (xd->counts) ++xd->counts->drl_mode[drl_ctx][1];
+ }
+ }
+ }
+}
+#endif
+
+#if CONFIG_EXT_INTER
+static PREDICTION_MODE read_inter_compound_mode(VP10_COMMON *cm,
+ MACROBLOCKD *xd, vp10_reader *r,
+ int16_t ctx) {
+ const int mode = vp10_read_tree(r, vp10_inter_compound_mode_tree,
+ cm->fc->inter_compound_mode_probs[ctx]);
+ FRAME_COUNTS *counts = xd->counts;
+
+ if (counts) ++counts->inter_compound_mode[ctx][mode];
+
+ assert(is_inter_compound_mode(NEAREST_NEARESTMV + mode));
+ return NEAREST_NEARESTMV + mode;
+}
+#endif // CONFIG_EXT_INTER
+
+static int read_segment_id(vp10_reader *r,
+ const struct segmentation_probs *segp) {
+ return vp10_read_tree(r, vp10_segment_tree, segp->tree_probs);
+}
+
+#if CONFIG_VAR_TX
+static void read_tx_size_vartx(VP10_COMMON *cm, MACROBLOCKD *xd,
+ MB_MODE_INFO *mbmi, FRAME_COUNTS *counts,
+ TX_SIZE tx_size, int blk_row, int blk_col,
+ vp10_reader *r) {
+ int is_split = 0;
+ const int tx_row = blk_row >> 1;
+ const int tx_col = blk_col >> 1;
+ int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ int ctx = txfm_partition_context(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, tx_size);
+ TX_SIZE (*const inter_tx_size)
+ [MAX_MIB_SIZE] =
+ (TX_SIZE(*)[MAX_MIB_SIZE]) & mbmi->inter_tx_size[tx_row][tx_col];
+
+ if (xd->mb_to_bottom_edge < 0) max_blocks_high += xd->mb_to_bottom_edge >> 5;
+ if (xd->mb_to_right_edge < 0) max_blocks_wide += xd->mb_to_right_edge >> 5;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+ is_split = vp10_read(r, cm->fc->txfm_partition_prob[ctx]);
+
+ if (is_split) {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ if (counts) ++counts->txfm_partition[ctx][1];
+
+ if (tx_size == TX_8X8) {
+ inter_tx_size[0][0] = TX_4X4;
+ mbmi->tx_size = TX_4X4;
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, TX_4X4);
+ return;
+ }
+
+ assert(bsl > 0);
+ --bsl;
+ for (i = 0; i < 4; ++i) {
+ int offsetr = blk_row + ((i >> 1) << bsl);
+ int offsetc = blk_col + ((i & 0x01) << bsl);
+ read_tx_size_vartx(cm, xd, mbmi, counts, tx_size - 1, offsetr, offsetc,
+ r);
+ }
+ } else {
+ int idx, idy;
+ inter_tx_size[0][0] = tx_size;
+ for (idy = 0; idy < num_4x4_blocks_high_txsize_lookup[tx_size] / 2; ++idy)
+ for (idx = 0; idx < num_4x4_blocks_wide_txsize_lookup[tx_size] / 2; ++idx)
+ inter_tx_size[idy][idx] = tx_size;
+ mbmi->tx_size = tx_size;
+ if (counts) ++counts->txfm_partition[ctx][0];
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, tx_size);
+ }
+}
+#endif
+
+static TX_SIZE read_selected_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd,
+ int tx_size_cat, vp10_reader *r) {
+ FRAME_COUNTS *counts = xd->counts;
+ const int ctx = get_tx_size_context(xd);
+ int tx_size = vp10_read_tree(r, vp10_tx_size_tree[tx_size_cat],
+ cm->fc->tx_size_probs[tx_size_cat][ctx]);
+ if (counts) ++counts->tx_size[tx_size_cat][ctx][tx_size];
+ return (TX_SIZE)tx_size;
+}
+
+static TX_SIZE read_tx_size_intra(VP10_COMMON *cm, MACROBLOCKD *xd,
+ vp10_reader *r) {
+ TX_MODE tx_mode = cm->tx_mode;
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return TX_4X4;
+ if (bsize >= BLOCK_8X8) {
+ if (tx_mode == TX_MODE_SELECT) {
+ const TX_SIZE tx_size =
+ read_selected_tx_size(cm, xd, intra_tx_size_cat_lookup[bsize], r);
+ assert(tx_size <= max_txsize_lookup[bsize]);
+ return tx_size;
+ } else {
+ return tx_size_from_tx_mode(bsize, cm->tx_mode, 0);
+ }
+ } else {
+ return TX_4X4;
+ }
+}
+
+static TX_SIZE read_tx_size_inter(VP10_COMMON *cm, MACROBLOCKD *xd,
+ int allow_select, vp10_reader *r) {
+ TX_MODE tx_mode = cm->tx_mode;
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return TX_4X4;
+ if (bsize >= BLOCK_8X8) {
+ if (allow_select && tx_mode == TX_MODE_SELECT) {
+ const TX_SIZE coded_tx_size =
+ read_selected_tx_size(cm, xd, inter_tx_size_cat_lookup[bsize], r);
+#if !CONFIG_RECT_TX
+ assert(coded_tx_size <= max_txsize_lookup[bsize]);
+#else
+ if (coded_tx_size > max_txsize_lookup[bsize]) {
+ assert(coded_tx_size == max_txsize_lookup[bsize] + 1);
+ return max_txsize_rect_lookup[bsize];
+ }
+#endif // !CONFIG_RECT_TX
+ return coded_tx_size;
+ } else {
+ return tx_size_from_tx_mode(bsize, cm->tx_mode, 1);
+ }
+ } else {
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ assert(IMPLIES(tx_mode == ONLY_4X4, bsize == BLOCK_4X4));
+ return max_txsize_rect_lookup[bsize];
+#else
+ return TX_4X4;
+#endif
+ }
+}
+
+static int dec_get_segment_id(const VP10_COMMON *cm, const uint8_t *segment_ids,
+ int mi_offset, int x_mis, int y_mis) {
+ int x, y, segment_id = INT_MAX;
+
+ for (y = 0; y < y_mis; y++)
+ for (x = 0; x < x_mis; x++)
+ segment_id =
+ VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]);
+
+ assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
+ return segment_id;
+}
+
+static void set_segment_id(VP10_COMMON *cm, int mi_offset, int x_mis, int y_mis,
+ int segment_id) {
+ int x, y;
+
+ assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
+
+ for (y = 0; y < y_mis; y++)
+ for (x = 0; x < x_mis; x++)
+ cm->current_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id;
+}
+
+static int read_intra_segment_id(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+ int mi_offset, int x_mis, int y_mis,
+ vp10_reader *r) {
+ struct segmentation *const seg = &cm->seg;
+ FRAME_COUNTS *counts = xd->counts;
+ struct segmentation_probs *const segp = &cm->fc->seg;
+ int segment_id;
+
+ if (!seg->enabled) return 0; // Default for disabled segmentation
+
+ assert(seg->update_map && !seg->temporal_update);
+
+ segment_id = read_segment_id(r, segp);
+ if (counts) ++counts->seg.tree_total[segment_id];
+ set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
+ return segment_id;
+}
+
+static void copy_segment_id(const VP10_COMMON *cm,
+ const uint8_t *last_segment_ids,
+ uint8_t *current_segment_ids, int mi_offset,
+ int x_mis, int y_mis) {
+ int x, y;
+
+ for (y = 0; y < y_mis; y++)
+ for (x = 0; x < x_mis; x++)
+ current_segment_ids[mi_offset + y * cm->mi_cols + x] =
+ last_segment_ids ? last_segment_ids[mi_offset + y * cm->mi_cols + x]
+ : 0;
+}
+
+static int read_inter_segment_id(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+ int mi_row, int mi_col, vp10_reader *r) {
+ struct segmentation *const seg = &cm->seg;
+ FRAME_COUNTS *counts = xd->counts;
+ struct segmentation_probs *const segp = &cm->fc->seg;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int predicted_segment_id, segment_id;
+ const int mi_offset = mi_row * cm->mi_cols + mi_col;
+ const int bw = num_8x8_blocks_wide_lookup[mbmi->sb_type];
+ const int bh = num_8x8_blocks_high_lookup[mbmi->sb_type];
+
+ // TODO(slavarnway): move x_mis, y_mis into xd ?????
+ const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh);
+
+ if (!seg->enabled) return 0; // Default for disabled segmentation
+
+ predicted_segment_id = cm->last_frame_seg_map
+ ? dec_get_segment_id(cm, cm->last_frame_seg_map,
+ mi_offset, x_mis, y_mis)
+ : 0;
+
+ if (!seg->update_map) {
+ copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map,
+ mi_offset, x_mis, y_mis);
+ return predicted_segment_id;
+ }
+
+ if (seg->temporal_update) {
+ const int ctx = vp10_get_pred_context_seg_id(xd);
+ const vpx_prob pred_prob = segp->pred_probs[ctx];
+ mbmi->seg_id_predicted = vp10_read(r, pred_prob);
+ if (counts) ++counts->seg.pred[ctx][mbmi->seg_id_predicted];
+ if (mbmi->seg_id_predicted) {
+ segment_id = predicted_segment_id;
+ } else {
+ segment_id = read_segment_id(r, segp);
+ if (counts) ++counts->seg.tree_mispred[segment_id];
+ }
+ } else {
+ segment_id = read_segment_id(r, segp);
+ if (counts) ++counts->seg.tree_total[segment_id];
+ }
+ set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id);
+ return segment_id;
+}
+
+static int read_skip(VP10_COMMON *cm, const MACROBLOCKD *xd, int segment_id,
+ vp10_reader *r) {
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
+ return 1;
+ } else {
+ const int ctx = vp10_get_skip_context(xd);
+ const int skip = vp10_read(r, cm->fc->skip_probs[ctx]);
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts) ++counts->skip[ctx][skip];
+ return skip;
+ }
+}
+
+static void read_palette_mode_info(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+ vp10_reader *r) {
+ MODE_INFO *const mi = xd->mi[0];
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ int i, n, palette_ctx = 0;
+ PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+
+ if (mbmi->mode == DC_PRED) {
+ if (above_mi)
+ palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ if (left_mi)
+ palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ if (vp10_read(
+ r,
+ vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx])) {
+ pmi->palette_size[0] =
+ vp10_read_tree(r, vp10_palette_size_tree,
+ vp10_default_palette_y_size_prob[bsize - BLOCK_8X8]) +
+ 2;
+ n = pmi->palette_size[0];
+ for (i = 0; i < n; ++i)
+ pmi->palette_colors[i] = vp10_read_literal(r, cm->bit_depth);
+
+ xd->plane[0].color_index_map[0] = read_uniform(r, n);
+ assert(xd->plane[0].color_index_map[0] < n);
+ }
+ }
+
+ if (mbmi->uv_mode == DC_PRED) {
+ if (vp10_read(
+ r, vp10_default_palette_uv_mode_prob[pmi->palette_size[0] > 0])) {
+ pmi->palette_size[1] =
+ vp10_read_tree(r, vp10_palette_size_tree,
+ vp10_default_palette_uv_size_prob[bsize - BLOCK_8X8]) +
+ 2;
+ n = pmi->palette_size[1];
+ for (i = 0; i < n; ++i) {
+ pmi->palette_colors[PALETTE_MAX_SIZE + i] =
+ vp10_read_literal(r, cm->bit_depth);
+ pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] =
+ vp10_read_literal(r, cm->bit_depth);
+ }
+ xd->plane[1].color_index_map[0] = read_uniform(r, n);
+ assert(xd->plane[1].color_index_map[0] < n);
+ }
+ }
+}
+
+#if CONFIG_EXT_INTRA
+static void read_ext_intra_mode_info(VP10_COMMON *const cm,
+ MACROBLOCKD *const xd, vp10_reader *r) {
+ MODE_INFO *const mi = xd->mi[0];
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ FRAME_COUNTS *counts = xd->counts;
+
+#if !ALLOW_FILTER_INTRA_MODES
+ return;
+#endif
+ if (mbmi->mode == DC_PRED && mbmi->palette_mode_info.palette_size[0] == 0) {
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] =
+ vp10_read(r, cm->fc->ext_intra_probs[0]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[0] =
+ read_uniform(r, FILTER_INTRA_MODES);
+ }
+ if (counts)
+ ++counts->ext_intra[0][mbmi->ext_intra_mode_info.use_ext_intra_mode[0]];
+ }
+ if (mbmi->uv_mode == DC_PRED &&
+ mbmi->palette_mode_info.palette_size[1] == 0) {
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ vp10_read(r, cm->fc->ext_intra_probs[1]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ read_uniform(r, FILTER_INTRA_MODES);
+ }
+ if (counts)
+ ++counts->ext_intra[1][mbmi->ext_intra_mode_info.use_ext_intra_mode[1]];
+ }
+}
+
+static void read_intra_angle_info(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+ vp10_reader *r) {
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const int ctx = vp10_get_pred_context_intra_interp(xd);
+ int p_angle;
+
+ if (bsize < BLOCK_8X8) return;
+
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) {
+ mbmi->angle_delta[0] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+ p_angle = mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+ if (vp10_is_intra_filter_switchable(p_angle)) {
+ FRAME_COUNTS *counts = xd->counts;
+ mbmi->intra_filter = vp10_read_tree(r, vp10_intra_filter_tree,
+ cm->fc->intra_filter_probs[ctx]);
+ if (counts) ++counts->intra_filter[ctx][mbmi->intra_filter];
+ } else {
+ mbmi->intra_filter = INTRA_FILTER_LINEAR;
+ }
+ }
+
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
+ mbmi->angle_delta[1] =
+ read_uniform(r, 2 * MAX_ANGLE_DELTAS + 1) - MAX_ANGLE_DELTAS;
+ }
+}
+#endif // CONFIG_EXT_INTRA
+
+static void read_intra_frame_mode_info(VP10_COMMON *const cm,
+ MACROBLOCKD *const xd, int mi_row,
+ int mi_col, vp10_reader *r) {
+ MODE_INFO *const mi = xd->mi[0];
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const MODE_INFO *above_mi = xd->above_mi;
+ const MODE_INFO *left_mi = xd->left_mi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ int i;
+ const int mi_offset = mi_row * cm->mi_cols + mi_col;
+ const int bw = xd->plane[0].n4_w >> 1;
+ const int bh = xd->plane[0].n4_h >> 1;
+
+ // TODO(slavarnway): move x_mis, y_mis into xd ?????
+ const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh);
+
+ mbmi->segment_id = read_intra_segment_id(cm, xd, mi_offset, x_mis, y_mis, r);
+ mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
+ mbmi->tx_size = read_tx_size_intra(cm, xd, r);
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->ref_frame[1] = NONE;
+
+ switch (bsize) {
+ case BLOCK_4X4:
+ for (i = 0; i < 4; ++i)
+ mi->bmi[i].as_mode =
+ read_intra_mode(r, get_y_mode_probs(cm, mi, above_mi, left_mi, i));
+ mbmi->mode = mi->bmi[3].as_mode;
+ break;
+ case BLOCK_4X8:
+ mi->bmi[0].as_mode = mi->bmi[2].as_mode =
+ read_intra_mode(r, get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
+ mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode =
+ read_intra_mode(r, get_y_mode_probs(cm, mi, above_mi, left_mi, 1));
+ break;
+ case BLOCK_8X4:
+ mi->bmi[0].as_mode = mi->bmi[1].as_mode =
+ read_intra_mode(r, get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
+ mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode =
+ read_intra_mode(r, get_y_mode_probs(cm, mi, above_mi, left_mi, 2));
+ break;
+ default:
+ mbmi->mode =
+ read_intra_mode(r, get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
+ }
+
+ mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
+#if CONFIG_EXT_INTRA
+ read_intra_angle_info(cm, xd, r);
+#endif // CONFIG_EXT_INTRA
+ mbmi->palette_mode_info.palette_size[0] = 0;
+ mbmi->palette_mode_info.palette_size[1] = 0;
+ if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools)
+ read_palette_mode_info(cm, xd, r);
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+ if (bsize >= BLOCK_8X8) read_ext_intra_mode_info(cm, xd, r);
+#endif // CONFIG_EXT_INTRA
+
+ if (!FIXED_TX_TYPE) {
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, mbmi->sb_type, 0) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
+ ALLOW_INTRA_EXT_TX) {
+ FRAME_COUNTS *counts = xd->counts;
+ int eset = get_ext_tx_set(mbmi->tx_size, mbmi->sb_type, 0);
+ if (eset > 0) {
+ mbmi->tx_type = vp10_read_tree(
+ r, vp10_ext_tx_intra_tree[eset],
+ cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode]);
+ if (counts)
+ ++counts
+ ->intra_ext_tx[eset][mbmi->tx_size][mbmi->mode][mbmi->tx_type];
+ }
+ } else {
+ mbmi->tx_type = DCT_DCT;
+ }
+#else
+ if (mbmi->tx_size < TX_32X32 && cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ FRAME_COUNTS *counts = xd->counts;
+ TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode];
+ mbmi->tx_type =
+ vp10_read_tree(r, vp10_ext_tx_tree,
+ cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom]);
+ if (counts)
+ ++counts->intra_ext_tx[mbmi->tx_size][tx_type_nom][mbmi->tx_type];
+ } else {
+ mbmi->tx_type = DCT_DCT;
+ }
+#endif // CONFIG_EXT_TX
+ }
+}
+
+static int read_mv_component(vp10_reader *r, const nmv_component *mvcomp,
+ int usehp) {
+ int mag, d, fr, hp;
+ const int sign = vp10_read(r, mvcomp->sign);
+ const int mv_class = vp10_read_tree(r, vp10_mv_class_tree, mvcomp->classes);
+ const int class0 = mv_class == MV_CLASS_0;
+
+ // Integer part
+ if (class0) {
+ d = vp10_read_tree(r, vp10_mv_class0_tree, mvcomp->class0);
+ mag = 0;
+ } else {
+ int i;
+ const int n = mv_class + CLASS0_BITS - 1; // number of bits
+
+ d = 0;
+ for (i = 0; i < n; ++i) d |= vp10_read(r, mvcomp->bits[i]) << i;
+ mag = CLASS0_SIZE << (mv_class + 2);
+ }
+
+ // Fractional part
+ fr = vp10_read_tree(r, vp10_mv_fp_tree,
+ class0 ? mvcomp->class0_fp[d] : mvcomp->fp);
+
+ // High precision part (if hp is not used, the default value of the hp is 1)
+ hp = usehp ? vp10_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp) : 1;
+
+ // Result
+ mag += ((d << 3) | (fr << 1) | hp) + 1;
+ return sign ? -mag : mag;
+}
+
+static INLINE void read_mv(vp10_reader *r, MV *mv, const MV *ref,
+#if CONFIG_REF_MV
+ int is_compound,
+#endif
+ const nmv_context *ctx, nmv_context_counts *counts,
+ int allow_hp) {
+ MV_JOINT_TYPE joint_type;
+ const int use_hp = allow_hp && vp10_use_mv_hp(ref);
+ MV diff = { 0, 0 };
+
+#if CONFIG_REF_MV && !CONFIG_EXT_INTER
+ if (is_compound) {
+ int is_zero_rmv = vp10_read(r, ctx->zero_rmv);
+ if (is_zero_rmv) {
+ joint_type = MV_JOINT_ZERO;
+ } else {
+ joint_type =
+ (MV_JOINT_TYPE)vp10_read_tree(r, vp10_mv_joint_tree, ctx->joints);
+ }
+ } else {
+ joint_type =
+ (MV_JOINT_TYPE)vp10_read_tree(r, vp10_mv_joint_tree, ctx->joints);
+ }
+#else
+ joint_type =
+ (MV_JOINT_TYPE)vp10_read_tree(r, vp10_mv_joint_tree, ctx->joints);
+#endif
+
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ (void)is_compound;
+#endif
+
+ if (mv_joint_vertical(joint_type))
+ diff.row = read_mv_component(r, &ctx->comps[0], use_hp);
+
+ if (mv_joint_horizontal(joint_type))
+ diff.col = read_mv_component(r, &ctx->comps[1], use_hp);
+
+ vp10_inc_mv(&diff, counts, use_hp);
+
+ mv->row = ref->row + diff.row;
+ mv->col = ref->col + diff.col;
+}
+
+static REFERENCE_MODE read_block_reference_mode(VP10_COMMON *cm,
+ const MACROBLOCKD *xd,
+ vp10_reader *r) {
+ if (cm->reference_mode == REFERENCE_MODE_SELECT) {
+ const int ctx = vp10_get_reference_mode_context(cm, xd);
+ const REFERENCE_MODE mode =
+ (REFERENCE_MODE)vp10_read(r, cm->fc->comp_inter_prob[ctx]);
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts) ++counts->comp_inter[ctx][mode];
+ return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE
+ } else {
+ return cm->reference_mode;
+ }
+}
+
+// Read the referncence frame
+static void read_ref_frames(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+ vp10_reader *r, int segment_id,
+ MV_REFERENCE_FRAME ref_frame[2]) {
+ FRAME_CONTEXT *const fc = cm->fc;
+ FRAME_COUNTS *counts = xd->counts;
+
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ ref_frame[0] = (MV_REFERENCE_FRAME)get_segdata(&cm->seg, segment_id,
+ SEG_LVL_REF_FRAME);
+ ref_frame[1] = NONE;
+ } else {
+ const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r);
+ // FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding
+ if (mode == COMPOUND_REFERENCE) {
+#if CONFIG_EXT_REFS
+ const int idx = cm->ref_frame_sign_bias[cm->comp_bwd_ref[0]];
+#else
+ const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+#endif // CONFIG_EXT_REFS
+ const int ctx = vp10_get_pred_context_comp_ref_p(cm, xd);
+ const int bit = vp10_read(r, fc->comp_ref_prob[ctx][0]);
+
+ if (counts) ++counts->comp_ref[ctx][0][bit];
+
+#if CONFIG_EXT_REFS
+ // Decode forward references.
+ if (!bit) {
+ const int ctx1 = vp10_get_pred_context_comp_ref_p1(cm, xd);
+ const int bit1 = vp10_read(r, fc->comp_ref_prob[ctx1][1]);
+ if (counts) ++counts->comp_ref[ctx1][1][bit1];
+ ref_frame[!idx] = cm->comp_fwd_ref[bit1 ? 0 : 1];
+ } else {
+ const int ctx2 = vp10_get_pred_context_comp_ref_p2(cm, xd);
+ const int bit2 = vp10_read(r, fc->comp_ref_prob[ctx2][2]);
+ if (counts) ++counts->comp_ref[ctx2][2][bit2];
+ ref_frame[!idx] = cm->comp_fwd_ref[bit2 ? 3 : 2];
+ }
+
+ // Decode backward references.
+ {
+ const int ctx_bwd = vp10_get_pred_context_comp_bwdref_p(cm, xd);
+ const int bit_bwd = vp10_read(r, fc->comp_bwdref_prob[ctx_bwd][0]);
+ if (counts) ++counts->comp_bwdref[ctx_bwd][0][bit_bwd];
+ ref_frame[idx] = cm->comp_bwd_ref[bit_bwd];
+ }
+#else
+ ref_frame[!idx] = cm->comp_var_ref[bit];
+ ref_frame[idx] = cm->comp_fixed_ref;
+#endif // CONFIG_EXT_REFS
+ } else if (mode == SINGLE_REFERENCE) {
+#if CONFIG_EXT_REFS
+ const int ctx0 = vp10_get_pred_context_single_ref_p1(xd);
+ const int bit0 = vp10_read(r, fc->single_ref_prob[ctx0][0]);
+ if (counts) ++counts->single_ref[ctx0][0][bit0];
+
+ if (bit0) {
+ const int ctx1 = vp10_get_pred_context_single_ref_p2(xd);
+ const int bit1 = vp10_read(r, fc->single_ref_prob[ctx1][1]);
+ if (counts) ++counts->single_ref[ctx1][1][bit1];
+ ref_frame[0] = bit1 ? ALTREF_FRAME : BWDREF_FRAME;
+ } else {
+ const int ctx2 = vp10_get_pred_context_single_ref_p3(xd);
+ const int bit2 = vp10_read(r, fc->single_ref_prob[ctx2][2]);
+ if (counts) ++counts->single_ref[ctx2][2][bit2];
+ if (bit2) {
+ const int ctx4 = vp10_get_pred_context_single_ref_p5(xd);
+ const int bit4 = vp10_read(r, fc->single_ref_prob[ctx4][4]);
+ if (counts) ++counts->single_ref[ctx4][4][bit4];
+ ref_frame[0] = bit4 ? GOLDEN_FRAME : LAST3_FRAME;
+ } else {
+ const int ctx3 = vp10_get_pred_context_single_ref_p4(xd);
+ const int bit3 = vp10_read(r, fc->single_ref_prob[ctx3][3]);
+ if (counts) ++counts->single_ref[ctx3][3][bit3];
+ ref_frame[0] = bit3 ? LAST2_FRAME : LAST_FRAME;
+ }
+ }
+#else
+ const int ctx0 = vp10_get_pred_context_single_ref_p1(xd);
+ const int bit0 = vp10_read(r, fc->single_ref_prob[ctx0][0]);
+ if (counts) ++counts->single_ref[ctx0][0][bit0];
+
+ if (bit0) {
+ const int ctx1 = vp10_get_pred_context_single_ref_p2(xd);
+ const int bit1 = vp10_read(r, fc->single_ref_prob[ctx1][1]);
+ if (counts) ++counts->single_ref[ctx1][1][bit1];
+ ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME;
+ } else {
+ ref_frame[0] = LAST_FRAME;
+ }
+#endif // CONFIG_EXT_REFS
+
+ ref_frame[1] = NONE;
+ } else {
+ assert(0 && "Invalid prediction mode.");
+ }
+ }
+}
+
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+static MOTION_VARIATION read_motvar_block(VP10_COMMON *const cm,
+ MACROBLOCKD *const xd,
+ vp10_reader *r) {
+ BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ FRAME_COUNTS *counts = xd->counts;
+ MOTION_VARIATION motvar;
+
+ if (is_motvar_allowed(&xd->mi[0]->mbmi)) {
+ motvar = (MOTION_VARIATION)vp10_read_tree(r, vp10_motvar_tree,
+ cm->fc->motvar_prob[bsize]);
+ if (counts) ++counts->motvar[bsize][motvar];
+ return motvar;
+ } else {
+ return SIMPLE_TRANSLATION;
+ }
+}
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+static INLINE INTERP_FILTER read_interp_filter(VP10_COMMON *const cm,
+ MACROBLOCKD *const xd,
+#if CONFIG_DUAL_FILTER
+ int dir,
+#endif
+ vp10_reader *r) {
+#if CONFIG_EXT_INTERP
+ if (!vp10_is_interp_needed(xd)) return EIGHTTAP_REGULAR;
+#endif
+ if (cm->interp_filter != SWITCHABLE) {
+ return cm->interp_filter;
+ } else {
+#if CONFIG_DUAL_FILTER
+ const int ctx = vp10_get_pred_context_switchable_interp(xd, dir);
+#else
+ const int ctx = vp10_get_pred_context_switchable_interp(xd);
+#endif
+ FRAME_COUNTS *counts = xd->counts;
+ const INTERP_FILTER type = (INTERP_FILTER)vp10_read_tree(
+ r, vp10_switchable_interp_tree, cm->fc->switchable_interp_prob[ctx]);
+ if (counts) ++counts->switchable_interp[ctx][type];
+ return type;
+ }
+}
+
+static void read_intra_block_mode_info(VP10_COMMON *const cm,
+ MACROBLOCKD *const xd, MODE_INFO *mi,
+ vp10_reader *r) {
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const BLOCK_SIZE bsize = mi->mbmi.sb_type;
+ int i;
+
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->ref_frame[1] = NONE;
+
+ switch (bsize) {
+ case BLOCK_4X4:
+ for (i = 0; i < 4; ++i)
+ mi->bmi[i].as_mode = read_intra_mode_y(cm, xd, r, 0);
+ mbmi->mode = mi->bmi[3].as_mode;
+ break;
+ case BLOCK_4X8:
+ mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, xd, r, 0);
+ mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode =
+ read_intra_mode_y(cm, xd, r, 0);
+ break;
+ case BLOCK_8X4:
+ mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, xd, r, 0);
+ mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode =
+ read_intra_mode_y(cm, xd, r, 0);
+ break;
+ default:
+ mbmi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]);
+ }
+
+ mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
+#if CONFIG_EXT_INTRA
+ read_intra_angle_info(cm, xd, r);
+#endif // CONFIG_EXT_INTRA
+ mbmi->palette_mode_info.palette_size[0] = 0;
+ mbmi->palette_mode_info.palette_size[1] = 0;
+ if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools)
+ read_palette_mode_info(cm, xd, r);
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+ if (bsize >= BLOCK_8X8) read_ext_intra_mode_info(cm, xd, r);
+#endif // CONFIG_EXT_INTRA
+}
+
+static INLINE int is_mv_valid(const MV *mv) {
+ return mv->row > MV_LOW && mv->row < MV_UPP && mv->col > MV_LOW &&
+ mv->col < MV_UPP;
+}
+
+static INLINE int assign_mv(VP10_COMMON *cm, MACROBLOCKD *xd,
+ PREDICTION_MODE mode,
+#if CONFIG_REF_MV
+ int block,
+#endif
+ int_mv mv[2], int_mv ref_mv[2],
+ int_mv nearest_mv[2], int_mv near_mv[2],
+ int is_compound, int allow_hp, vp10_reader *r) {
+ int i;
+ int ret = 1;
+#if CONFIG_REF_MV
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ BLOCK_SIZE bsize = mbmi->sb_type;
+ int_mv *pred_mv =
+ (bsize >= BLOCK_8X8) ? mbmi->pred_mv : xd->mi[0]->bmi[block].pred_mv_s8;
+#endif
+
+ switch (mode) {
+#if CONFIG_EXT_INTER
+ case NEWFROMNEARMV:
+#endif // CONFIG_EXT_INTER
+ case NEWMV: {
+ FRAME_COUNTS *counts = xd->counts;
+#if !CONFIG_REF_MV
+ nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+#endif
+ for (i = 0; i < 1 + is_compound; ++i) {
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[i]],
+ xd->ref_mv_stack[mbmi->ref_frame[i]]);
+ nmv_context_counts *const mv_counts =
+ counts ? &counts->mv[nmv_ctx] : NULL;
+ read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv,
+#if CONFIG_REF_MV
+ is_compound,
+#endif
+ &cm->fc->nmvc[nmv_ctx], mv_counts, allow_hp);
+#else
+ read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts,
+ allow_hp);
+#endif
+ ret = ret && is_mv_valid(&mv[i].as_mv);
+
+#if CONFIG_REF_MV
+ pred_mv[i].as_int = ref_mv[i].as_int;
+#endif
+ }
+ break;
+ }
+ case NEARESTMV: {
+ mv[0].as_int = nearest_mv[0].as_int;
+ if (is_compound) mv[1].as_int = nearest_mv[1].as_int;
+
+#if CONFIG_REF_MV
+ pred_mv[0].as_int = nearest_mv[0].as_int;
+ if (is_compound) pred_mv[1].as_int = nearest_mv[1].as_int;
+#endif
+ break;
+ }
+ case NEARMV: {
+ mv[0].as_int = near_mv[0].as_int;
+ if (is_compound) mv[1].as_int = near_mv[1].as_int;
+
+#if CONFIG_REF_MV
+ pred_mv[0].as_int = near_mv[0].as_int;
+ if (is_compound) pred_mv[1].as_int = near_mv[1].as_int;
+#endif
+ break;
+ }
+ case ZEROMV: {
+ mv[0].as_int = 0;
+ if (is_compound) mv[1].as_int = 0;
+
+#if CONFIG_REF_MV
+ pred_mv[0].as_int = 0;
+ if (is_compound) pred_mv[1].as_int = 0;
+#endif
+ break;
+ }
+#if CONFIG_EXT_INTER
+ case NEW_NEWMV: {
+ FRAME_COUNTS *counts = xd->counts;
+#if !CONFIG_REF_MV
+ nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+#endif
+ assert(is_compound);
+ for (i = 0; i < 2; ++i) {
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[i]],
+ xd->ref_mv_stack[mbmi->ref_frame[i]]);
+ nmv_context_counts *const mv_counts =
+ counts ? &counts->mv[nmv_ctx] : NULL;
+ read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, is_compound,
+ &cm->fc->nmvc[nmv_ctx], mv_counts, allow_hp);
+#else
+ read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts,
+ allow_hp);
+#endif
+ ret = ret && is_mv_valid(&mv[i].as_mv);
+ }
+ break;
+ }
+ case NEAREST_NEARESTMV: {
+ assert(is_compound);
+ mv[0].as_int = nearest_mv[0].as_int;
+ mv[1].as_int = nearest_mv[1].as_int;
+ break;
+ }
+ case NEAREST_NEARMV: {
+ assert(is_compound);
+ mv[0].as_int = nearest_mv[0].as_int;
+ mv[1].as_int = near_mv[1].as_int;
+ break;
+ }
+ case NEAR_NEARESTMV: {
+ assert(is_compound);
+ mv[0].as_int = near_mv[0].as_int;
+ mv[1].as_int = nearest_mv[1].as_int;
+ break;
+ }
+ case NEAR_NEARMV: {
+ assert(is_compound);
+ mv[0].as_int = near_mv[0].as_int;
+ mv[1].as_int = near_mv[1].as_int;
+ break;
+ }
+ case NEW_NEARESTMV: {
+ FRAME_COUNTS *counts = xd->counts;
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[0]],
+ xd->ref_mv_stack[mbmi->ref_frame[0]]);
+ nmv_context_counts *const mv_counts =
+ counts ? &counts->mv[nmv_ctx] : NULL;
+ read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv, is_compound,
+ &cm->fc->nmvc[nmv_ctx], mv_counts, allow_hp);
+#else
+ nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+ read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv, &cm->fc->nmvc, mv_counts,
+ allow_hp);
+#endif
+ assert(is_compound);
+ ret = ret && is_mv_valid(&mv[0].as_mv);
+ mv[1].as_int = nearest_mv[1].as_int;
+ break;
+ }
+ case NEAREST_NEWMV: {
+ FRAME_COUNTS *counts = xd->counts;
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[1]],
+ xd->ref_mv_stack[mbmi->ref_frame[1]]);
+ nmv_context_counts *const mv_counts =
+ counts ? &counts->mv[nmv_ctx] : NULL;
+ mv[0].as_int = nearest_mv[0].as_int;
+ read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv, is_compound,
+ &cm->fc->nmvc[nmv_ctx], mv_counts, allow_hp);
+#else
+ nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+ mv[0].as_int = nearest_mv[0].as_int;
+ read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv, &cm->fc->nmvc, mv_counts,
+ allow_hp);
+#endif
+ assert(is_compound);
+ ret = ret && is_mv_valid(&mv[1].as_mv);
+ break;
+ }
+ case NEAR_NEWMV: {
+ FRAME_COUNTS *counts = xd->counts;
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[1]],
+ xd->ref_mv_stack[mbmi->ref_frame[1]]);
+ nmv_context_counts *const mv_counts =
+ counts ? &counts->mv[nmv_ctx] : NULL;
+ mv[0].as_int = near_mv[0].as_int;
+ read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv, is_compound,
+ &cm->fc->nmvc[nmv_ctx], mv_counts, allow_hp);
+#else
+ nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+ mv[0].as_int = near_mv[0].as_int;
+ read_mv(r, &mv[1].as_mv, &ref_mv[1].as_mv, &cm->fc->nmvc, mv_counts,
+ allow_hp);
+#endif
+ assert(is_compound);
+
+ ret = ret && is_mv_valid(&mv[1].as_mv);
+ break;
+ }
+ case NEW_NEARMV: {
+ FRAME_COUNTS *counts = xd->counts;
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(xd->ref_mv_count[mbmi->ref_frame[0]],
+ xd->ref_mv_stack[mbmi->ref_frame[0]]);
+ nmv_context_counts *const mv_counts =
+ counts ? &counts->mv[nmv_ctx] : NULL;
+ read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv, is_compound,
+ &cm->fc->nmvc[nmv_ctx], mv_counts, allow_hp);
+#else
+ nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
+ read_mv(r, &mv[0].as_mv, &ref_mv[0].as_mv, &cm->fc->nmvc, mv_counts,
+ allow_hp);
+#endif
+ assert(is_compound);
+ ret = ret && is_mv_valid(&mv[0].as_mv);
+ mv[1].as_int = near_mv[1].as_int;
+ break;
+ }
+ case ZERO_ZEROMV: {
+ assert(is_compound);
+ mv[0].as_int = 0;
+ mv[1].as_int = 0;
+ break;
+ }
+#endif // CONFIG_EXT_INTER
+ default: { return 0; }
+ }
+ return ret;
+}
+
+static int read_is_inter_block(VP10_COMMON *const cm, MACROBLOCKD *const xd,
+ int segment_id, vp10_reader *r) {
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME;
+ } else {
+ const int ctx = vp10_get_intra_inter_context(xd);
+ const int is_inter = vp10_read(r, cm->fc->intra_inter_prob[ctx]);
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts) ++counts->intra_inter[ctx][is_inter];
+ return is_inter;
+ }
+}
+
+static void fpm_sync(void *const data, int mi_row) {
+ VP10Decoder *const pbi = (VP10Decoder *)data;
+ vp10_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame,
+ mi_row << pbi->common.mib_size_log2);
+}
+
+static void read_inter_block_mode_info(VP10Decoder *const pbi,
+ MACROBLOCKD *const xd,
+ MODE_INFO *const mi,
+#if (CONFIG_OBMC || CONFIG_EXT_INTER) && CONFIG_SUPERTX
+ int mi_row, int mi_col, vp10_reader *r,
+ int supertx_enabled) {
+#else
+ int mi_row, int mi_col, vp10_reader *r) {
+#endif // CONFIG_OBMC && CONFIG_SUPERTX
+ VP10_COMMON *const cm = &pbi->common;
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const int allow_hp = cm->allow_high_precision_mv;
+ int_mv nearestmv[2], nearmv[2];
+ int_mv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+#if CONFIG_EXT_INTER
+ int mv_idx;
+#endif // CONFIG_EXT_INTER
+ int ref, is_compound;
+ int16_t inter_mode_ctx[MODE_CTX_REF_FRAMES];
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ int16_t compound_inter_mode_ctx[MODE_CTX_REF_FRAMES];
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ int16_t mode_ctx = 0;
+ MV_REFERENCE_FRAME ref_frame;
+
+ mbmi->palette_mode_info.palette_size[0] = 0;
+ mbmi->palette_mode_info.palette_size[1] = 0;
+
+ read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame);
+ is_compound = has_second_ref(mbmi);
+
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+ RefBuffer *ref_buf = &cm->frame_refs[frame - LAST_FRAME];
+
+ xd->block_refs[ref] = ref_buf;
+ if ((!vp10_is_valid_scale(&ref_buf->sf)))
+ vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+ "Reference frame has invalid dimensions");
+ vp10_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, &ref_buf->sf);
+ }
+
+ for (ref_frame = LAST_FRAME; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
+ vp10_find_mv_refs(cm, xd, mi, ref_frame,
+#if CONFIG_REF_MV
+ &xd->ref_mv_count[ref_frame], xd->ref_mv_stack[ref_frame],
+#if CONFIG_EXT_INTER
+ compound_inter_mode_ctx,
+#endif // CONFIG_EXT_INTER
+#endif
+ ref_mvs[ref_frame], mi_row, mi_col, fpm_sync, (void *)pbi,
+ inter_mode_ctx);
+ }
+
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (is_compound)
+ mode_ctx = compound_inter_mode_ctx[mbmi->ref_frame[0]];
+ else
+#endif // CONFIG_EXT_INTER
+ mode_ctx =
+ vp10_mode_context_analyzer(inter_mode_ctx, mbmi->ref_frame, bsize, -1);
+ mbmi->ref_mv_idx = 0;
+#else
+ mode_ctx = inter_mode_ctx[mbmi->ref_frame[0]];
+#endif
+
+ if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ mbmi->mode = ZEROMV;
+ if (bsize < BLOCK_8X8) {
+ vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
+ "Invalid usage of segement feature on small blocks");
+ return;
+ }
+ } else {
+ if (bsize >= BLOCK_8X8) {
+#if CONFIG_EXT_INTER
+ if (is_compound)
+ mbmi->mode = read_inter_compound_mode(cm, xd, r, mode_ctx);
+ else
+#endif // CONFIG_EXT_INTER
+ mbmi->mode = read_inter_mode(cm, xd,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ mbmi,
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ r, mode_ctx);
+#if CONFIG_REF_MV
+ if (mbmi->mode == NEARMV || mbmi->mode == NEWMV)
+ read_drl_idx(cm, xd, mbmi, r);
+#endif
+ }
+ }
+
+#if CONFIG_EXT_INTER
+ if (bsize < BLOCK_8X8 ||
+ (mbmi->mode != ZEROMV && mbmi->mode != ZERO_ZEROMV)) {
+#else
+ if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
+#endif // CONFIG_EXT_INTER
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ vp10_find_best_ref_mvs(allow_hp, ref_mvs[mbmi->ref_frame[ref]],
+ &nearestmv[ref], &nearmv[ref]);
+ }
+ }
+
+#if CONFIG_REF_MV
+ if (mbmi->ref_mv_idx > 0) {
+ int_mv cur_mv =
+ xd->ref_mv_stack[mbmi->ref_frame[0]][1 + mbmi->ref_mv_idx].this_mv;
+ nearmv[0] = cur_mv;
+ }
+
+#if CONFIG_EXT_INTER
+ if (is_compound && bsize >= BLOCK_8X8 && mbmi->mode != ZERO_ZEROMV) {
+#else
+ if (is_compound && bsize >= BLOCK_8X8 && mbmi->mode != NEWMV &&
+ mbmi->mode != ZEROMV) {
+#endif // CONFIG_EXT_INTER
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+
+#if CONFIG_EXT_INTER
+ if (xd->ref_mv_count[ref_frame_type] > 0) {
+#else
+ if (xd->ref_mv_count[ref_frame_type] == 1 && mbmi->mode == NEARESTMV) {
+#endif // CONFIG_EXT_INTER
+#if CONFIG_EXT_INTER
+ if (mbmi->mode == NEAREST_NEARESTMV) {
+#endif // CONFIG_EXT_INTER
+ nearestmv[0] = xd->ref_mv_stack[ref_frame_type][0].this_mv;
+ nearestmv[1] = xd->ref_mv_stack[ref_frame_type][0].comp_mv;
+ lower_mv_precision(&nearestmv[0].as_mv, allow_hp);
+ lower_mv_precision(&nearestmv[1].as_mv, allow_hp);
+#if CONFIG_EXT_INTER
+ } else if (mbmi->mode == NEAREST_NEWMV || mbmi->mode == NEAREST_NEARMV) {
+ nearestmv[0] = xd->ref_mv_stack[ref_frame_type][0].this_mv;
+ lower_mv_precision(&nearestmv[0].as_mv, allow_hp);
+ } else if (mbmi->mode == NEW_NEARESTMV || mbmi->mode == NEAR_NEARESTMV) {
+ nearestmv[1] = xd->ref_mv_stack[ref_frame_type][0].comp_mv;
+ lower_mv_precision(&nearestmv[1].as_mv, allow_hp);
+ }
+#endif // CONFIG_EXT_INTER
+ }
+
+#if CONFIG_EXT_INTER
+ if (xd->ref_mv_count[ref_frame_type] > 1) {
+ if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEAR_NEARESTMV ||
+ mbmi->mode == NEAR_NEARMV) {
+ nearmv[0] = xd->ref_mv_stack[ref_frame_type][1].this_mv;
+ lower_mv_precision(&nearmv[0].as_mv, allow_hp);
+ }
+
+ if (mbmi->mode == NEW_NEARMV || mbmi->mode == NEAREST_NEARMV ||
+ mbmi->mode == NEAR_NEARMV) {
+ nearmv[1] = xd->ref_mv_stack[ref_frame_type][1].comp_mv;
+ lower_mv_precision(&nearmv[1].as_mv, allow_hp);
+ }
+ }
+#else
+ if (xd->ref_mv_count[ref_frame_type] > 1) {
+ int ref_mv_idx = 1 + mbmi->ref_mv_idx;
+ nearestmv[0] = xd->ref_mv_stack[ref_frame_type][0].this_mv;
+ nearestmv[1] = xd->ref_mv_stack[ref_frame_type][0].comp_mv;
+ nearmv[0] = xd->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
+ nearmv[1] = xd->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
+ }
+#endif // CONFIG_EXT_INTER
+ }
+#endif
+
+#if !CONFIG_EXT_INTERP && !CONFIG_DUAL_FILTER
+ mbmi->interp_filter = read_interp_filter(cm, xd, r);
+#endif // !CONFIG_EXT_INTERP && !CONFIG_DUAL_FILTER
+
+ if (bsize < BLOCK_8X8) {
+ const int num_4x4_w = 1 << xd->bmode_blocks_wl;
+ const int num_4x4_h = 1 << xd->bmode_blocks_hl;
+ int idx, idy;
+ PREDICTION_MODE b_mode;
+ int_mv nearest_sub8x8[2], near_sub8x8[2];
+#if CONFIG_EXT_INTER
+ int_mv ref_mv[2][2];
+#endif // CONFIG_EXT_INTER
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
+ int_mv block[2];
+ const int j = idy * 2 + idx;
+ int_mv ref_mv_s8[2];
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (!is_compound)
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(inter_mode_ctx, mbmi->ref_frame,
+ bsize, j);
+#endif
+#if CONFIG_EXT_INTER
+ if (is_compound)
+ b_mode = read_inter_compound_mode(cm, xd, r, mode_ctx);
+ else
+#endif // CONFIG_EXT_INTER
+ b_mode = read_inter_mode(cm, xd,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ mbmi,
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ r, mode_ctx);
+
+#if CONFIG_EXT_INTER
+ mv_idx = (b_mode == NEWFROMNEARMV) ? 1 : 0;
+
+ if (b_mode != ZEROMV && b_mode != ZERO_ZEROMV) {
+#else
+ if (b_mode != ZEROMV) {
+#endif // CONFIG_EXT_INTER
+#if CONFIG_REF_MV
+ CANDIDATE_MV ref_mv_stack[2][MAX_REF_MV_STACK_SIZE];
+ uint8_t ref_mv_count[2];
+#endif
+ for (ref = 0; ref < 1 + is_compound; ++ref)
+#if CONFIG_EXT_INTER
+ {
+ int_mv mv_ref_list[MAX_MV_REF_CANDIDATES];
+ vp10_update_mv_context(xd, mi, mbmi->ref_frame[ref], mv_ref_list, j,
+ mi_row, mi_col, NULL);
+#endif // CONFIG_EXT_INTER
+ vp10_append_sub8x8_mvs_for_idx(
+ cm, xd, j, ref, mi_row, mi_col,
+#if CONFIG_REF_MV
+ ref_mv_stack[ref], &ref_mv_count[ref],
+#endif
+#if CONFIG_EXT_INTER
+ mv_ref_list,
+#endif // CONFIG_EXT_INTER
+ &nearest_sub8x8[ref], &near_sub8x8[ref]);
+#if CONFIG_EXT_INTER
+ if (have_newmv_in_inter_mode(b_mode)) {
+ mv_ref_list[0].as_int = nearest_sub8x8[ref].as_int;
+ mv_ref_list[1].as_int = near_sub8x8[ref].as_int;
+ vp10_find_best_ref_mvs(allow_hp, mv_ref_list, &ref_mv[0][ref],
+ &ref_mv[1][ref]);
+ }
+ }
+#endif // CONFIG_EXT_INTER
+ }
+
+ for (ref = 0; ref < 1 + is_compound && b_mode != ZEROMV; ++ref) {
+#if CONFIG_REF_MV
+ ref_mv_s8[ref] = nearest_sub8x8[ref];
+ lower_mv_precision(&ref_mv_s8[ref].as_mv, allow_hp);
+#else
+ ref_mv_s8[ref] = nearestmv[ref];
+#endif
+ }
+#if CONFIG_EXT_INTER
+ (void)ref_mv_s8;
+#endif
+
+ if (!assign_mv(cm, xd, b_mode,
+#if CONFIG_REF_MV
+ j,
+#endif
+ block,
+#if CONFIG_EXT_INTER
+ ref_mv[mv_idx],
+#else
+ ref_mv_s8,
+#endif // CONFIG_EXT_INTER
+ nearest_sub8x8, near_sub8x8, is_compound, allow_hp, r)) {
+ xd->corrupted |= 1;
+ break;
+ };
+
+ mi->bmi[j].as_mv[0].as_int = block[0].as_int;
+ if (is_compound) mi->bmi[j].as_mv[1].as_int = block[1].as_int;
+
+ if (num_4x4_h == 2) mi->bmi[j + 2] = mi->bmi[j];
+ if (num_4x4_w == 2) mi->bmi[j + 1] = mi->bmi[j];
+ }
+ }
+
+#if CONFIG_REF_MV
+ mbmi->pred_mv[0].as_int = mi->bmi[3].pred_mv_s8[0].as_int;
+ mbmi->pred_mv[1].as_int = mi->bmi[3].pred_mv_s8[1].as_int;
+#endif
+ mi->mbmi.mode = b_mode;
+
+ mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
+ mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
+ } else {
+ int ref;
+ int_mv ref_mv[2];
+ ref_mv[0] = nearestmv[0];
+ ref_mv[1] = nearestmv[1];
+
+ for (ref = 0; ref < 1 + is_compound && mbmi->mode == NEWMV; ++ref) {
+#if CONFIG_REF_MV
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+ if (xd->ref_mv_count[ref_frame_type] > 1) {
+ ref_mv[ref] =
+ (ref == 0)
+ ? xd->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx].this_mv
+ : xd->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx].comp_mv;
+ clamp_mv_ref(&ref_mv[ref].as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+ }
+#endif
+ nearestmv[ref] = ref_mv[ref];
+ }
+
+ xd->corrupted |=
+ !assign_mv(cm, xd, mbmi->mode,
+#if CONFIG_REF_MV
+ 0,
+#endif
+ mbmi->mv,
+#if CONFIG_EXT_INTER
+ mbmi->mode == NEWFROMNEARMV ? nearmv : nearestmv,
+#else
+ ref_mv,
+#endif // CONFIG_EXT_INTER
+ nearestmv, nearmv, is_compound, allow_hp, r);
+ }
+
+#if CONFIG_EXT_INTER
+ mbmi->use_wedge_interintra = 0;
+ if (cm->reference_mode != COMPOUND_REFERENCE &&
+#if CONFIG_SUPERTX
+ !supertx_enabled &&
+#endif
+ is_interintra_allowed(mbmi)) {
+ const int bsize_group = size_group_lookup[bsize];
+ const int interintra = vp10_read(r, cm->fc->interintra_prob[bsize_group]);
+ if (xd->counts) xd->counts->interintra[bsize_group][interintra]++;
+ assert(mbmi->ref_frame[1] == NONE);
+ if (interintra) {
+ const INTERINTRA_MODE interintra_mode =
+ read_interintra_mode(cm, xd, r, bsize_group);
+ mbmi->ref_frame[1] = INTRA_FRAME;
+ mbmi->interintra_mode = interintra_mode;
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+ mbmi->angle_delta[0] = 0;
+ mbmi->angle_delta[1] = 0;
+ mbmi->intra_filter = INTRA_FILTER_LINEAR;
+#endif // CONFIG_EXT_INTRA
+ if (is_interintra_wedge_used(bsize)) {
+ mbmi->use_wedge_interintra =
+ vp10_read(r, cm->fc->wedge_interintra_prob[bsize]);
+ if (xd->counts)
+ xd->counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
+ if (mbmi->use_wedge_interintra) {
+ mbmi->interintra_wedge_index =
+ vp10_read_literal(r, get_wedge_bits_lookup(bsize));
+ mbmi->interintra_wedge_sign = 0;
+ }
+ }
+ }
+ }
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ mbmi->motion_variation = SIMPLE_TRANSLATION;
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif // CONFIG_SUPERTX
+#if CONFIG_EXT_INTER
+ if (mbmi->ref_frame[1] != INTRA_FRAME)
+#endif // CONFIG_EXT_INTER
+ mbmi->motion_variation = read_motvar_block(cm, xd, r);
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+#if CONFIG_EXT_INTER
+ mbmi->use_wedge_interinter = 0;
+ if (cm->reference_mode != SINGLE_REFERENCE &&
+ is_inter_compound_mode(mbmi->mode) &&
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ !(is_motvar_allowed(mbmi) &&
+ mbmi->motion_variation != SIMPLE_TRANSLATION) &&
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ is_interinter_wedge_used(bsize)) {
+ mbmi->use_wedge_interinter =
+ vp10_read(r, cm->fc->wedge_interinter_prob[bsize]);
+ if (xd->counts)
+ xd->counts->wedge_interinter[bsize][mbmi->use_wedge_interinter]++;
+ if (mbmi->use_wedge_interinter) {
+ mbmi->interinter_wedge_index =
+ vp10_read_literal(r, get_wedge_bits_lookup(bsize));
+ mbmi->interinter_wedge_sign = vp10_read_bit(r);
+ }
+ }
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_DUAL_FILTER
+ for (ref = 0; ref < 2; ++ref) {
+ mbmi->interp_filter[ref] = (cm->interp_filter == SWITCHABLE)
+ ? EIGHTTAP_REGULAR
+ : cm->interp_filter;
+
+ if (has_subpel_mv_component(xd->mi[0], xd, ref) ||
+ (mbmi->ref_frame[1] > INTRA_FRAME &&
+ has_subpel_mv_component(xd->mi[0], xd, ref + 2)))
+ mbmi->interp_filter[ref] = read_interp_filter(cm, xd, ref, r);
+ }
+ // The index system worsk as:
+ // (0, 1) -> (vertical, horizontal) filter types for the first ref frame.
+ // (2, 3) -> (vertical, horizontal) filter types for the second ref frame.
+ mbmi->interp_filter[2] = mbmi->interp_filter[0];
+ mbmi->interp_filter[3] = mbmi->interp_filter[1];
+#else
+#if CONFIG_EXT_INTERP
+ mbmi->interp_filter = read_interp_filter(cm, xd, r);
+#endif // CONFIG_EXT_INTERP
+#endif // CONFIG_DUAL_FILTER
+}
+
+static void read_inter_frame_mode_info(VP10Decoder *const pbi,
+ MACROBLOCKD *const xd,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif // CONFIG_SUPERTX
+ int mi_row, int mi_col, vp10_reader *r) {
+ VP10_COMMON *const cm = &pbi->common;
+ MODE_INFO *const mi = xd->mi[0];
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ int inter_block = 1;
+#if CONFIG_VAR_TX
+ BLOCK_SIZE bsize = mbmi->sb_type;
+#endif // CONFIG_VAR_TX
+
+ mbmi->mv[0].as_int = 0;
+ mbmi->mv[1].as_int = 0;
+ mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r);
+#if CONFIG_SUPERTX
+ if (!supertx_enabled) {
+#endif // CONFIG_SUPERTX
+ mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
+ inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
+
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+ if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && !mbmi->skip &&
+ inter_block) {
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ const int bs = num_4x4_blocks_wide_lookup[txb_size];
+ const int width = num_4x4_blocks_wide_lookup[bsize];
+ const int height = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ for (idy = 0; idy < height; idy += bs)
+ for (idx = 0; idx < width; idx += bs)
+ read_tx_size_vartx(cm, xd, mbmi, xd->counts, max_tx_size, idy, idx,
+ r);
+ if (xd->counts) {
+ const int ctx = get_tx_size_context(xd);
+ ++xd->counts->tx_size[max_tx_size - TX_8X8][ctx][mbmi->tx_size];
+ }
+ } else {
+ if (inter_block)
+ mbmi->tx_size = read_tx_size_inter(cm, xd, !mbmi->skip, r);
+ else
+ mbmi->tx_size = read_tx_size_intra(cm, xd, r);
+
+ if (inter_block) {
+ const int width = num_4x4_blocks_wide_lookup[bsize];
+ const int height = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ for (idy = 0; idy < height; ++idy)
+ for (idx = 0; idx < width; ++idx)
+ mbmi->inter_tx_size[idy >> 1][idx >> 1] = mbmi->tx_size;
+ }
+
+ set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, xd);
+ }
+#else
+ if (inter_block)
+ mbmi->tx_size = read_tx_size_inter(cm, xd, !mbmi->skip, r);
+ else
+ mbmi->tx_size = read_tx_size_intra(cm, xd, r);
+#endif // CONFIG_VAR_TX
+#if CONFIG_SUPERTX
+ }
+#if CONFIG_VAR_TX
+ else if (inter_block) {
+ const int width = num_4x4_blocks_wide_lookup[bsize];
+ const int height = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ xd->mi[0]->mbmi.tx_size = xd->supertx_size;
+ for (idy = 0; idy < height; ++idy)
+ for (idx = 0; idx < width; ++idx)
+ xd->mi[0]->mbmi.inter_tx_size[idy >> 1][idx >> 1] = xd->supertx_size;
+ }
+#endif // CONFIG_VAR_TX
+#endif // CONFIG_SUPERTX
+
+ if (inter_block)
+ read_inter_block_mode_info(pbi, xd,
+#if (CONFIG_OBMC || CONFIG_EXT_INTER) && CONFIG_SUPERTX
+
+ mi, mi_row, mi_col, r, supertx_enabled);
+#else
+ mi, mi_row, mi_col, r);
+#endif // CONFIG_OBMC && CONFIG_SUPERTX
+ else
+ read_intra_block_mode_info(cm, xd, mi, r);
+
+ if (!FIXED_TX_TYPE) {
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, mbmi->sb_type, inter_block) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+#if CONFIG_SUPERTX
+ !supertx_enabled &&
+#endif // CONFIG_SUPERTX
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ int eset = get_ext_tx_set(mbmi->tx_size, mbmi->sb_type, inter_block);
+ FRAME_COUNTS *counts = xd->counts;
+
+ if (inter_block) {
+ if (eset > 0) {
+ mbmi->tx_type = vp10_read_tree(
+ r, vp10_ext_tx_inter_tree[eset],
+ cm->fc->inter_ext_tx_prob[eset][txsize_sqr_map[mbmi->tx_size]]);
+ if (counts)
+ ++counts->inter_ext_tx[eset][txsize_sqr_map[mbmi->tx_size]]
+ [mbmi->tx_type];
+ }
+ } else if (ALLOW_INTRA_EXT_TX) {
+ if (eset > 0) {
+ mbmi->tx_type = vp10_read_tree(
+ r, vp10_ext_tx_intra_tree[eset],
+ cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode]);
+ if (counts)
+ ++counts->intra_ext_tx[eset][mbmi->tx_size][mbmi->mode]
+ [mbmi->tx_type];
+ }
+ }
+ } else {
+ mbmi->tx_type = DCT_DCT;
+ }
+#else
+ if (mbmi->tx_size < TX_32X32 && cm->base_qindex > 0 && !mbmi->skip &&
+#if CONFIG_SUPERTX
+ !supertx_enabled &&
+#endif // CONFIG_SUPERTX
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ FRAME_COUNTS *counts = xd->counts;
+ if (inter_block) {
+ mbmi->tx_type = vp10_read_tree(
+ r, vp10_ext_tx_tree, cm->fc->inter_ext_tx_prob[mbmi->tx_size]);
+ if (counts) ++counts->inter_ext_tx[mbmi->tx_size][mbmi->tx_type];
+ } else {
+ const TX_TYPE tx_type_nom = intra_mode_to_tx_type_context[mbmi->mode];
+ mbmi->tx_type = vp10_read_tree(
+ r, vp10_ext_tx_tree,
+ cm->fc->intra_ext_tx_prob[mbmi->tx_size][tx_type_nom]);
+ if (counts)
+ ++counts->intra_ext_tx[mbmi->tx_size][tx_type_nom][mbmi->tx_type];
+ }
+ } else {
+ mbmi->tx_type = DCT_DCT;
+ }
+#endif // CONFIG_EXT_TX
+ }
+}
+
+void vp10_read_mode_info(VP10Decoder *const pbi, MACROBLOCKD *xd,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif // CONFIG_SUPERTX
+ int mi_row, int mi_col, vp10_reader *r, int x_mis,
+ int y_mis) {
+ VP10_COMMON *const cm = &pbi->common;
+ MODE_INFO *const mi = xd->mi[0];
+ MV_REF *frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
+ int w, h;
+
+ if (frame_is_intra_only(cm)) {
+ read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r);
+#if CONFIG_REF_MV
+ for (h = 0; h < y_mis; ++h) {
+ MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
+ for (w = 0; w < x_mis; ++w) {
+ MV_REF *const mv = frame_mv + w;
+ mv->ref_frame[0] = NONE;
+ mv->ref_frame[1] = NONE;
+ }
+ }
+#endif
+ } else {
+ read_inter_frame_mode_info(pbi, xd,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif // CONFIG_SUPERTX
+ mi_row, mi_col, r);
+ for (h = 0; h < y_mis; ++h) {
+ MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
+ for (w = 0; w < x_mis; ++w) {
+ MV_REF *const mv = frame_mv + w;
+ mv->ref_frame[0] = mi->mbmi.ref_frame[0];
+ mv->ref_frame[1] = mi->mbmi.ref_frame[1];
+ mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
+ mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
+ }
+ }
+ }
+}
diff --git a/av1/decoder/decodemv.h b/av1/decoder/decodemv.h
new file mode 100644
index 0000000..59fdd70
--- /dev/null
+++ b/av1/decoder/decodemv.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_DECODER_DECODEMV_H_
+#define VP10_DECODER_DECODEMV_H_
+
+#include "av1/decoder/bitreader.h"
+
+#include "av1/decoder/decoder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_read_mode_info(VP10Decoder *const pbi, MACROBLOCKD *xd,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif
+
+ int mi_row, int mi_col, vp10_reader *r, int x_mis,
+ int y_mis);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_DECODER_DECODEMV_H_
diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
new file mode 100644
index 0000000..4cea36b
--- /dev/null
+++ b/av1/decoder/decoder.c
@@ -0,0 +1,549 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_scale_rtcd.h"
+
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/system_state.h"
+#include "aom_ports/vpx_once.h"
+#include "aom_ports/vpx_timer.h"
+#include "aom_scale/vpx_scale.h"
+#include "aom_util/vpx_thread.h"
+
+#include "av1/common/alloccommon.h"
+#include "av1/common/loopfilter.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/reconintra.h"
+
+#include "av1/decoder/decodeframe.h"
+#include "av1/decoder/decoder.h"
+#include "av1/decoder/detokenize.h"
+
+static void initialize_dec(void) {
+ static volatile int init_done = 0;
+
+ if (!init_done) {
+ vp10_rtcd();
+ vpx_dsp_rtcd();
+ vpx_scale_rtcd();
+ vp10_init_intra_predictors();
+#if CONFIG_EXT_INTER
+ vp10_init_wedge_masks();
+#endif // CONFIG_EXT_INTER
+ init_done = 1;
+ }
+}
+
+static void vp10_dec_setup_mi(VP10_COMMON *cm) {
+ cm->mi = cm->mip + cm->mi_stride + 1;
+ cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
+ memset(cm->mi_grid_base, 0,
+ cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
+}
+
+static int vp10_dec_alloc_mi(VP10_COMMON *cm, int mi_size) {
+ cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
+ if (!cm->mip) return 1;
+ cm->mi_alloc_size = mi_size;
+ cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
+ if (!cm->mi_grid_base) return 1;
+ return 0;
+}
+
+static void vp10_dec_free_mi(VP10_COMMON *cm) {
+ vpx_free(cm->mip);
+ cm->mip = NULL;
+ vpx_free(cm->mi_grid_base);
+ cm->mi_grid_base = NULL;
+}
+
+VP10Decoder *vp10_decoder_create(BufferPool *const pool) {
+ VP10Decoder *volatile const pbi = vpx_memalign(32, sizeof(*pbi));
+ VP10_COMMON *volatile const cm = pbi ? &pbi->common : NULL;
+
+ if (!cm) return NULL;
+
+ vp10_zero(*pbi);
+
+ if (setjmp(cm->error.jmp)) {
+ cm->error.setjmp = 0;
+ vp10_decoder_remove(pbi);
+ return NULL;
+ }
+
+ cm->error.setjmp = 1;
+
+ CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
+ CHECK_MEM_ERROR(
+ cm, cm->frame_contexts,
+ (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
+
+ pbi->need_resync = 1;
+ once(initialize_dec);
+
+ // Initialize the references to not point to any frame buffers.
+ memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
+ memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map));
+
+ cm->current_video_frame = 0;
+ pbi->ready_for_new_data = 1;
+ pbi->common.buffer_pool = pool;
+
+ cm->bit_depth = VPX_BITS_8;
+ cm->dequant_bit_depth = VPX_BITS_8;
+
+ cm->alloc_mi = vp10_dec_alloc_mi;
+ cm->free_mi = vp10_dec_free_mi;
+ cm->setup_mi = vp10_dec_setup_mi;
+
+ vp10_loop_filter_init(cm);
+
+#if CONFIG_AOM_QM
+ aom_qm_init(cm);
+#endif
+#if CONFIG_LOOP_RESTORATION
+ vp10_loop_restoration_precal();
+#endif // CONFIG_LOOP_RESTORATION
+
+ cm->error.setjmp = 0;
+
+ vpx_get_worker_interface()->init(&pbi->lf_worker);
+
+ return pbi;
+}
+
+void vp10_decoder_remove(VP10Decoder *pbi) {
+ int i;
+
+ if (!pbi) return;
+
+ vpx_get_worker_interface()->end(&pbi->lf_worker);
+ vpx_free(pbi->lf_worker.data1);
+ vpx_free(pbi->tile_data);
+ for (i = 0; i < pbi->num_tile_workers; ++i) {
+ VPxWorker *const worker = &pbi->tile_workers[i];
+ vpx_get_worker_interface()->end(worker);
+ }
+ vpx_free(pbi->tile_worker_data);
+ vpx_free(pbi->tile_worker_info);
+ vpx_free(pbi->tile_workers);
+
+ if (pbi->num_tile_workers > 0) {
+ vp10_loop_filter_dealloc(&pbi->lf_row_sync);
+ }
+
+ vpx_free(pbi);
+}
+
+static int equal_dimensions(const YV12_BUFFER_CONFIG *a,
+ const YV12_BUFFER_CONFIG *b) {
+ return a->y_height == b->y_height && a->y_width == b->y_width &&
+ a->uv_height == b->uv_height && a->uv_width == b->uv_width;
+}
+
+vpx_codec_err_t vp10_copy_reference_dec(VP10Decoder *pbi,
+ VPX_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd) {
+ VP10_COMMON *cm = &pbi->common;
+
+ /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
+ * encoder is using the frame buffers for. This is just a stub to keep the
+ * vpxenc --test-decode functionality working, and will be replaced in a
+ * later commit that adds VP9-specific controls for this functionality.
+ */
+ if (ref_frame_flag == VPX_LAST_FLAG) {
+ const YV12_BUFFER_CONFIG *const cfg = get_ref_frame(cm, 0);
+ if (cfg == NULL) {
+ vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
+ "No 'last' reference frame");
+ return VPX_CODEC_ERROR;
+ }
+ if (!equal_dimensions(cfg, sd))
+ vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
+ "Incorrect buffer dimensions");
+ else
+ vpx_yv12_copy_frame(cfg, sd);
+ } else {
+ vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Invalid reference frame");
+ }
+
+ return cm->error.error_code;
+}
+
+vpx_codec_err_t vp10_set_reference_dec(VP10_COMMON *cm,
+ VPX_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd) {
+ int idx;
+ YV12_BUFFER_CONFIG *ref_buf = NULL;
+
+ // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
+ // encoder is using the frame buffers for. This is just a stub to keep the
+ // vpxenc --test-decode functionality working, and will be replaced in a
+ // later commit that adds VP9-specific controls for this functionality.
+
+ // (Yunqing) The set_reference control depends on the following setting in
+ // encoder.
+ // cpi->lst_fb_idx = 0;
+ // #if CONFIG_EXT_REFS
+ // cpi->lst2_fb_idx = 1;
+ // cpi->lst3_fb_idx = 2;
+ // cpi->gld_fb_idx = 3;
+ // cpi->bwd_fb_idx = 4;
+ // cpi->alt_fb_idx = 5;
+ // #else // CONFIG_EXT_REFS
+ // cpi->gld_fb_idx = 1;
+ // cpi->alt_fb_idx = 2;
+ // #endif // CONFIG_EXT_REFS
+
+ // TODO(zoeliu): To revisit following code and reconsider what assumption we
+ // may take on the reference frame buffer virtual indexes
+ if (ref_frame_flag == VPX_LAST_FLAG) {
+ idx = cm->ref_frame_map[0];
+#if CONFIG_EXT_REFS
+ } else if (ref_frame_flag == VPX_LAST2_FLAG) {
+ idx = cm->ref_frame_map[1];
+ } else if (ref_frame_flag == VPX_LAST3_FLAG) {
+ idx = cm->ref_frame_map[2];
+ } else if (ref_frame_flag == VPX_GOLD_FLAG) {
+ idx = cm->ref_frame_map[3];
+ } else if (ref_frame_flag == VPX_BWD_FLAG) {
+ idx = cm->ref_frame_map[4];
+ } else if (ref_frame_flag == VPX_ALT_FLAG) {
+ idx = cm->ref_frame_map[5];
+#else
+ } else if (ref_frame_flag == VPX_GOLD_FLAG) {
+ idx = cm->ref_frame_map[1];
+ } else if (ref_frame_flag == VPX_ALT_FLAG) {
+ idx = cm->ref_frame_map[2];
+#endif // CONFIG_EXT_REFS
+ } else {
+ vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Invalid reference frame");
+ return cm->error.error_code;
+ }
+
+ if (idx < 0 || idx >= FRAME_BUFFERS) {
+ vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
+ "Invalid reference frame map");
+ return cm->error.error_code;
+ }
+
+ // Get the destination reference buffer.
+ ref_buf = &cm->buffer_pool->frame_bufs[idx].buf;
+
+ if (!equal_dimensions(ref_buf, sd)) {
+ vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
+ "Incorrect buffer dimensions");
+ } else {
+ // Overwrite the reference frame buffer.
+ vpx_yv12_copy_frame(sd, ref_buf);
+ }
+
+ return cm->error.error_code;
+}
+
+/* If any buffer updating is signaled it should be done here. */
+static void swap_frame_buffers(VP10Decoder *pbi) {
+ int ref_index = 0, mask;
+ VP10_COMMON *const cm = &pbi->common;
+ BufferPool *const pool = cm->buffer_pool;
+ RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+
+ lock_buffer_pool(pool);
+ for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
+ const int old_idx = cm->ref_frame_map[ref_index];
+ // Current thread releases the holding of reference frame.
+ decrease_ref_count(old_idx, frame_bufs, pool);
+
+ // Release the reference frame holding in the reference map for the decoding
+ // of the next frame.
+ if (mask & 1) decrease_ref_count(old_idx, frame_bufs, pool);
+ cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index];
+ ++ref_index;
+ }
+
+ // Current thread releases the holding of reference frame.
+ for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) {
+ const int old_idx = cm->ref_frame_map[ref_index];
+ decrease_ref_count(old_idx, frame_bufs, pool);
+ cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index];
+ }
+
+ unlock_buffer_pool(pool);
+ pbi->hold_ref_buf = 0;
+ cm->frame_to_show = get_frame_new_buffer(cm);
+
+ // TODO(zoeliu): To fix the ref frame buffer update for the scenario of
+ // cm->frame_parellel_decode == 1
+ if (!cm->frame_parallel_decode || !cm->show_frame) {
+ lock_buffer_pool(pool);
+ --frame_bufs[cm->new_fb_idx].ref_count;
+ unlock_buffer_pool(pool);
+ }
+
+ // Invalidate these references until the next frame starts.
+ for (ref_index = 0; ref_index < INTER_REFS_PER_FRAME; ref_index++) {
+ cm->frame_refs[ref_index].idx = INVALID_IDX;
+ cm->frame_refs[ref_index].buf = NULL;
+ }
+}
+
+int vp10_receive_compressed_data(VP10Decoder *pbi, size_t size,
+ const uint8_t **psource) {
+ VP10_COMMON *volatile const cm = &pbi->common;
+ BufferPool *volatile const pool = cm->buffer_pool;
+ RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs;
+ const uint8_t *source = *psource;
+ int retcode = 0;
+ cm->error.error_code = VPX_CODEC_OK;
+
+ if (size == 0) {
+ // This is used to signal that we are missing frames.
+ // We do not know if the missing frame(s) was supposed to update
+ // any of the reference buffers, but we act conservative and
+ // mark only the last buffer as corrupted.
+ //
+ // TODO(jkoleszar): Error concealment is undefined and non-normative
+ // at this point, but if it becomes so, [0] may not always be the correct
+ // thing to do here.
+ if (cm->frame_refs[0].idx > 0) {
+ assert(cm->frame_refs[0].buf != NULL);
+ cm->frame_refs[0].buf->corrupted = 1;
+ }
+ }
+
+ pbi->ready_for_new_data = 0;
+
+ // Find a free buffer for the new frame, releasing the reference previously
+ // held.
+
+ // Check if the previous frame was a frame without any references to it.
+ // Release frame buffer if not decoding in frame parallel mode.
+ if (!cm->frame_parallel_decode && cm->new_fb_idx >= 0 &&
+ frame_bufs[cm->new_fb_idx].ref_count == 0)
+ pool->release_fb_cb(pool->cb_priv,
+ &frame_bufs[cm->new_fb_idx].raw_frame_buffer);
+
+ // Find a free frame buffer. Return error if can not find any.
+ cm->new_fb_idx = get_free_fb(cm);
+ if (cm->new_fb_idx == INVALID_IDX) return VPX_CODEC_MEM_ERROR;
+
+ // Assign a MV array to the frame buffer.
+ cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
+
+ pbi->hold_ref_buf = 0;
+ if (cm->frame_parallel_decode) {
+ VPxWorker *const worker = pbi->frame_worker_owner;
+ vp10_frameworker_lock_stats(worker);
+ frame_bufs[cm->new_fb_idx].frame_worker_owner = worker;
+ // Reset decoding progress.
+ pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
+ pbi->cur_buf->row = -1;
+ pbi->cur_buf->col = -1;
+ vp10_frameworker_unlock_stats(worker);
+ } else {
+ pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
+ }
+
+ if (setjmp(cm->error.jmp)) {
+ const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+ int i;
+
+ cm->error.setjmp = 0;
+ pbi->ready_for_new_data = 1;
+
+ // Synchronize all threads immediately as a subsequent decode call may
+ // cause a resize invalidating some allocations.
+ winterface->sync(&pbi->lf_worker);
+ for (i = 0; i < pbi->num_tile_workers; ++i) {
+ winterface->sync(&pbi->tile_workers[i]);
+ }
+
+ lock_buffer_pool(pool);
+ // Release all the reference buffers if worker thread is holding them.
+ if (pbi->hold_ref_buf == 1) {
+ int ref_index = 0, mask;
+ for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
+ const int old_idx = cm->ref_frame_map[ref_index];
+ // Current thread releases the holding of reference frame.
+ decrease_ref_count(old_idx, frame_bufs, pool);
+
+ // Release the reference frame holding in the reference map for the
+ // decoding of the next frame.
+ if (mask & 1) decrease_ref_count(old_idx, frame_bufs, pool);
+ ++ref_index;
+ }
+
+ // Current thread releases the holding of reference frame.
+ for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) {
+ const int old_idx = cm->ref_frame_map[ref_index];
+ decrease_ref_count(old_idx, frame_bufs, pool);
+ }
+ pbi->hold_ref_buf = 0;
+ }
+ // Release current frame.
+ decrease_ref_count(cm->new_fb_idx, frame_bufs, pool);
+ unlock_buffer_pool(pool);
+
+ vpx_clear_system_state();
+ return -1;
+ }
+
+ cm->error.setjmp = 1;
+ vp10_decode_frame(pbi, source, source + size, psource);
+
+ swap_frame_buffers(pbi);
+
+#if CONFIG_EXT_TILE
+ // For now, we only extend the frame borders when the whole frame is decoded.
+ // Later, if needed, extend the border for the decoded tile on the frame
+ // border.
+ if (pbi->dec_tile_row == -1 && pbi->dec_tile_col == -1)
+#endif // CONFIG_EXT_TILE
+ vpx_extend_frame_inner_borders(cm->frame_to_show);
+
+ vpx_clear_system_state();
+
+ if (!cm->show_existing_frame) {
+ cm->last_show_frame = cm->show_frame;
+
+#if CONFIG_EXT_REFS
+ // NOTE: It is not supposed to ref to any frame not used as reference
+ if (cm->is_reference_frame)
+#endif // CONFIG_EXT_REFS
+ cm->prev_frame = cm->cur_frame;
+
+ if (cm->seg.enabled && !cm->frame_parallel_decode)
+ vp10_swap_current_and_last_seg_map(cm);
+ }
+
+ // Update progress in frame parallel decode.
+ if (cm->frame_parallel_decode) {
+ // Need to lock the mutex here as another thread may
+ // be accessing this buffer.
+ VPxWorker *const worker = pbi->frame_worker_owner;
+ FrameWorkerData *const frame_worker_data = worker->data1;
+ vp10_frameworker_lock_stats(worker);
+
+ if (cm->show_frame) {
+ cm->current_video_frame++;
+ }
+ frame_worker_data->frame_decoded = 1;
+ frame_worker_data->frame_context_ready = 1;
+ vp10_frameworker_signal_stats(worker);
+ vp10_frameworker_unlock_stats(worker);
+ } else {
+ cm->last_width = cm->width;
+ cm->last_height = cm->height;
+ if (cm->show_frame) {
+ cm->current_video_frame++;
+ }
+ }
+
+ cm->error.setjmp = 0;
+ return retcode;
+}
+
+int vp10_get_raw_frame(VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd) {
+ VP10_COMMON *const cm = &pbi->common;
+ int ret = -1;
+ if (pbi->ready_for_new_data == 1) return ret;
+
+ pbi->ready_for_new_data = 1;
+
+ /* no raw frame to show!!! */
+ if (!cm->show_frame) return ret;
+
+ pbi->ready_for_new_data = 1;
+ *sd = *cm->frame_to_show;
+ ret = 0;
+ vpx_clear_system_state();
+ return ret;
+}
+
+int vp10_get_frame_to_show(VP10Decoder *pbi, YV12_BUFFER_CONFIG *frame) {
+ VP10_COMMON *const cm = &pbi->common;
+
+ if (!cm->show_frame || !cm->frame_to_show) return -1;
+
+ *frame = *cm->frame_to_show;
+ return 0;
+}
+
+vpx_codec_err_t vp10_parse_superframe_index(const uint8_t *data, size_t data_sz,
+ uint32_t sizes[8], int *count,
+ vpx_decrypt_cb decrypt_cb,
+ void *decrypt_state) {
+ // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
+ // it is a super frame index. If the last byte of real video compression
+ // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
+ // not the associated matching marker byte at the front of the index we have
+ // an invalid bitstream and need to return an error.
+
+ uint8_t marker;
+ size_t frame_sz_sum = 0;
+
+ assert(data_sz);
+ marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1);
+ *count = 0;
+
+ if ((marker & 0xe0) == 0xc0) {
+ const uint32_t frames = (marker & 0x7) + 1;
+ const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+ const size_t index_sz = 2 + mag * (frames - 1);
+
+ // This chunk is marked as having a superframe index but doesn't have
+ // enough data for it, thus it's an invalid superframe index.
+ if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME;
+
+ {
+ const uint8_t marker2 =
+ read_marker(decrypt_cb, decrypt_state, data + data_sz - index_sz);
+
+ // This chunk is marked as having a superframe index but doesn't have
+ // the matching marker byte at the front of the index therefore it's an
+ // invalid chunk.
+ if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME;
+ }
+
+ {
+ // Found a valid superframe index.
+ uint32_t i, j;
+ const uint8_t *x = &data[data_sz - index_sz + 1];
+
+ // Frames has a maximum of 8 and mag has a maximum of 4.
+ uint8_t clear_buffer[32];
+ assert(sizeof(clear_buffer) >= frames * mag);
+ if (decrypt_cb) {
+ decrypt_cb(decrypt_state, x, clear_buffer, frames * mag);
+ x = clear_buffer;
+ }
+
+ for (i = 0; i < frames - 1; ++i) {
+ uint32_t this_sz = 0;
+
+ for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8);
+ this_sz += 1;
+ sizes[i] = this_sz;
+ frame_sz_sum += this_sz;
+ }
+ sizes[i] = (uint32_t)(data_sz - index_sz - frame_sz_sum);
+ *count = frames;
+ }
+ }
+ return VPX_CODEC_OK;
+}
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
new file mode 100644
index 0000000..47a5a7b
--- /dev/null
+++ b/av1/decoder/decoder.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_DECODER_DECODER_H_
+#define VP10_DECODER_DECODER_H_
+
+#include "./vpx_config.h"
+
+#include "aom/vpx_codec.h"
+#include "av1/decoder/bitreader.h"
+#include "aom_scale/yv12config.h"
+#include "aom_util/vpx_thread.h"
+
+#include "av1/common/thread_common.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/decoder/dthread.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// TODO(hkuang): combine this with TileWorkerData.
+typedef struct TileData {
+ VP10_COMMON *cm;
+ vp10_reader bit_reader;
+ DECLARE_ALIGNED(16, MACROBLOCKD, xd);
+ /* dqcoeff are shared by all the planes. So planes must be decoded serially */
+ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
+ DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
+} TileData;
+
+typedef struct TileWorkerData {
+ struct VP10Decoder *pbi;
+ vp10_reader bit_reader;
+ FRAME_COUNTS counts;
+ DECLARE_ALIGNED(16, MACROBLOCKD, xd);
+ /* dqcoeff are shared by all the planes. So planes must be decoded serially */
+ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
+ DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
+ struct vpx_internal_error_info error_info;
+} TileWorkerData;
+
+typedef struct TileBufferDec {
+ const uint8_t *data;
+ size_t size;
+ const uint8_t *raw_data_end; // The end of the raw tile buffer in the
+ // bit stream.
+ int col; // only used with multi-threaded decoding
+} TileBufferDec;
+
+typedef struct VP10Decoder {
+ DECLARE_ALIGNED(16, MACROBLOCKD, mb);
+
+ DECLARE_ALIGNED(16, VP10_COMMON, common);
+
+ int ready_for_new_data;
+
+ int refresh_frame_flags;
+
+ // TODO(hkuang): Combine this with cur_buf in macroblockd as they are
+ // the same.
+ RefCntBuffer *cur_buf; // Current decoding frame buffer.
+
+ VPxWorker *frame_worker_owner; // frame_worker that owns this pbi.
+ VPxWorker lf_worker;
+ VPxWorker *tile_workers;
+ TileWorkerData *tile_worker_data;
+ TileInfo *tile_worker_info;
+ int num_tile_workers;
+
+ TileData *tile_data;
+ int allocated_tiles;
+
+ TileBufferDec tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
+
+ VP10LfSync lf_row_sync;
+
+ vpx_decrypt_cb decrypt_cb;
+ void *decrypt_state;
+
+ int max_threads;
+ int inv_tile_order;
+ int need_resync; // wait for key/intra-only frame.
+ int hold_ref_buf; // hold the reference buffer.
+
+ int tile_size_bytes;
+#if CONFIG_EXT_TILE
+ int tile_col_size_bytes;
+ int dec_tile_row, dec_tile_col;
+#endif // CONFIG_EXT_TILE
+} VP10Decoder;
+
+int vp10_receive_compressed_data(struct VP10Decoder *pbi, size_t size,
+ const uint8_t **dest);
+
+int vp10_get_raw_frame(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd);
+
+int vp10_get_frame_to_show(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *frame);
+
+vpx_codec_err_t vp10_copy_reference_dec(struct VP10Decoder *pbi,
+ VPX_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd);
+
+vpx_codec_err_t vp10_set_reference_dec(VP10_COMMON *cm,
+ VPX_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd);
+
+static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb,
+ void *decrypt_state, const uint8_t *data) {
+ if (decrypt_cb) {
+ uint8_t marker;
+ decrypt_cb(decrypt_state, data, &marker, 1);
+ return marker;
+ }
+ return *data;
+}
+
+// This function is exposed for use in tests, as well as the inlined function
+// "read_marker".
+vpx_codec_err_t vp10_parse_superframe_index(const uint8_t *data, size_t data_sz,
+ uint32_t sizes[8], int *count,
+ vpx_decrypt_cb decrypt_cb,
+ void *decrypt_state);
+
+struct VP10Decoder *vp10_decoder_create(BufferPool *const pool);
+
+void vp10_decoder_remove(struct VP10Decoder *pbi);
+
+static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs,
+ BufferPool *const pool) {
+ if (idx >= 0) {
+ --frame_bufs[idx].ref_count;
+ // A worker may only get a free framebuffer index when calling get_free_fb.
+ // But the private buffer is not set up until finish decoding header.
+ // So any error happens during decoding header, the frame_bufs will not
+ // have valid priv buffer.
+ if (frame_bufs[idx].ref_count == 0 &&
+ frame_bufs[idx].raw_frame_buffer.priv) {
+ pool->release_fb_cb(pool->cb_priv, &frame_bufs[idx].raw_frame_buffer);
+ }
+ }
+}
+
+#if CONFIG_EXT_REFS
+static INLINE int dec_is_ref_frame_buf(VP10Decoder *const pbi,
+ RefCntBuffer *frame_buf) {
+ VP10_COMMON *const cm = &pbi->common;
+ int i;
+ for (i = 0; i < INTER_REFS_PER_FRAME; ++i) {
+ RefBuffer *const ref_frame = &cm->frame_refs[i];
+ if (ref_frame->idx == INVALID_IDX) continue;
+ if (frame_buf == &cm->buffer_pool->frame_bufs[ref_frame->idx]) break;
+ }
+ return (i < INTER_REFS_PER_FRAME);
+}
+#endif // CONFIG_EXT_REFS
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_DECODER_DECODER_H_
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
new file mode 100644
index 0000000..0fba999
--- /dev/null
+++ b/av1/decoder/detokenize.c
@@ -0,0 +1,536 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+
+#include "av1/common/ans.h"
+#include "av1/common/blockd.h"
+#include "av1/common/common.h"
+#include "av1/common/entropy.h"
+#include "av1/common/idct.h"
+
+#include "av1/decoder/detokenize.h"
+
+#define EOB_CONTEXT_NODE 0
+#define ZERO_CONTEXT_NODE 1
+#define ONE_CONTEXT_NODE 2
+#define LOW_VAL_CONTEXT_NODE 0
+#define TWO_CONTEXT_NODE 1
+#define THREE_CONTEXT_NODE 2
+#define HIGH_LOW_CONTEXT_NODE 3
+#define CAT_ONE_CONTEXT_NODE 4
+#define CAT_THREEFOUR_CONTEXT_NODE 5
+#define CAT_THREE_CONTEXT_NODE 6
+#define CAT_FIVE_CONTEXT_NODE 7
+
+#define INCREMENT_COUNT(token) \
+ do { \
+ if (counts) ++coef_counts[band][ctx][token]; \
+ } while (0)
+
+#if !CONFIG_ANS
+static INLINE int read_coeff(const vpx_prob *probs, int n, vp10_reader *r) {
+ int i, val = 0;
+ for (i = 0; i < n; ++i) val = (val << 1) | vp10_read(r, probs[i]);
+ return val;
+}
+
+#if CONFIG_AOM_QM
+static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
+ tran_low_t *dqcoeff, TX_SIZE tx_size, TX_TYPE tx_type,
+ const int16_t *dq, int ctx, const int16_t *scan,
+ const int16_t *nb, vp10_reader *r,
+ const qm_val_t *iqm[2][TX_SIZES])
+#else
+static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
+ tran_low_t *dqcoeff, TX_SIZE tx_size, TX_TYPE tx_type,
+ const int16_t *dq,
+#if CONFIG_NEW_QUANT
+ dequant_val_type_nuq *dq_val,
+#endif // CONFIG_NEW_QUANT
+ int ctx, const int16_t *scan, const int16_t *nb,
+ vp10_reader *r)
+#endif
+{
+ FRAME_COUNTS *counts = xd->counts;
+ const int max_eob = get_tx2d_size(tx_size);
+ const FRAME_CONTEXT *const fc = xd->fc;
+ const int ref = is_inter_block(&xd->mi[0]->mbmi);
+#if CONFIG_AOM_QM
+ const qm_val_t *iqmatrix = iqm[!ref][tx_size];
+#endif
+ int band, c = 0;
+ const int tx_size_ctx = txsize_sqr_map[tx_size];
+ const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
+ fc->coef_probs[tx_size_ctx][type][ref];
+ const vpx_prob *prob;
+ unsigned int(*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
+ unsigned int(*eob_branch_count)[COEFF_CONTEXTS];
+ uint8_t token_cache[MAX_TX_SQUARE];
+ const uint8_t *band_translate = get_band_translate(tx_size);
+ int dq_shift;
+ int v, token;
+ int16_t dqv = dq[0];
+#if CONFIG_NEW_QUANT
+ const tran_low_t *dqv_val = &dq_val[0][0];
+#endif // CONFIG_NEW_QUANT
+ const uint8_t *cat1_prob;
+ const uint8_t *cat2_prob;
+ const uint8_t *cat3_prob;
+ const uint8_t *cat4_prob;
+ const uint8_t *cat5_prob;
+ const uint8_t *cat6_prob;
+
+ if (counts) {
+ coef_counts = counts->coef[tx_size_ctx][type][ref];
+ eob_branch_count = counts->eob_branch[tx_size_ctx][type][ref];
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->bd > VPX_BITS_8) {
+ if (xd->bd == VPX_BITS_10) {
+ cat1_prob = vp10_cat1_prob_high10;
+ cat2_prob = vp10_cat2_prob_high10;
+ cat3_prob = vp10_cat3_prob_high10;
+ cat4_prob = vp10_cat4_prob_high10;
+ cat5_prob = vp10_cat5_prob_high10;
+ cat6_prob = vp10_cat6_prob_high10;
+ } else {
+ cat1_prob = vp10_cat1_prob_high12;
+ cat2_prob = vp10_cat2_prob_high12;
+ cat3_prob = vp10_cat3_prob_high12;
+ cat4_prob = vp10_cat4_prob_high12;
+ cat5_prob = vp10_cat5_prob_high12;
+ cat6_prob = vp10_cat6_prob_high12;
+ }
+ } else {
+ cat1_prob = vp10_cat1_prob;
+ cat2_prob = vp10_cat2_prob;
+ cat3_prob = vp10_cat3_prob;
+ cat4_prob = vp10_cat4_prob;
+ cat5_prob = vp10_cat5_prob;
+ cat6_prob = vp10_cat6_prob;
+ }
+#else
+ cat1_prob = vp10_cat1_prob;
+ cat2_prob = vp10_cat2_prob;
+ cat3_prob = vp10_cat3_prob;
+ cat4_prob = vp10_cat4_prob;
+ cat5_prob = vp10_cat5_prob;
+ cat6_prob = vp10_cat6_prob;
+#endif
+
+ dq_shift = get_tx_scale(xd, tx_type, tx_size);
+
+ while (c < max_eob) {
+ int val = -1;
+ band = *band_translate++;
+ prob = coef_probs[band][ctx];
+ if (counts) ++eob_branch_count[band][ctx];
+ if (!vp10_read(r, prob[EOB_CONTEXT_NODE])) {
+ INCREMENT_COUNT(EOB_MODEL_TOKEN);
+ break;
+ }
+
+#if CONFIG_NEW_QUANT
+ dqv_val = &dq_val[band][0];
+#endif // CONFIG_NEW_QUANT
+
+ while (!vp10_read(r, prob[ZERO_CONTEXT_NODE])) {
+ INCREMENT_COUNT(ZERO_TOKEN);
+ dqv = dq[1];
+ token_cache[scan[c]] = 0;
+ ++c;
+ if (c >= max_eob) return c; // zero tokens at the end (no eob token)
+ ctx = get_coef_context(nb, token_cache, c);
+ band = *band_translate++;
+ prob = coef_probs[band][ctx];
+#if CONFIG_NEW_QUANT
+ dqv_val = &dq_val[band][0];
+#endif // CONFIG_NEW_QUANT
+ }
+
+ if (!vp10_read(r, prob[ONE_CONTEXT_NODE])) {
+ INCREMENT_COUNT(ONE_TOKEN);
+ token = ONE_TOKEN;
+ val = 1;
+ } else {
+ INCREMENT_COUNT(TWO_TOKEN);
+ token = vp10_read_tree(r, vp10_coef_con_tree,
+ vp10_pareto8_full[prob[PIVOT_NODE] - 1]);
+ switch (token) {
+ case TWO_TOKEN:
+ case THREE_TOKEN:
+ case FOUR_TOKEN: val = token; break;
+ case CATEGORY1_TOKEN:
+ val = CAT1_MIN_VAL + read_coeff(cat1_prob, 1, r);
+ break;
+ case CATEGORY2_TOKEN:
+ val = CAT2_MIN_VAL + read_coeff(cat2_prob, 2, r);
+ break;
+ case CATEGORY3_TOKEN:
+ val = CAT3_MIN_VAL + read_coeff(cat3_prob, 3, r);
+ break;
+ case CATEGORY4_TOKEN:
+ val = CAT4_MIN_VAL + read_coeff(cat4_prob, 4, r);
+ break;
+ case CATEGORY5_TOKEN:
+ val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, r);
+ break;
+ case CATEGORY6_TOKEN: {
+ const int skip_bits = TX_SIZES - 1 - txsize_sqr_up_map[tx_size];
+ const uint8_t *cat6p = cat6_prob + skip_bits;
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (xd->bd) {
+ case VPX_BITS_8:
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 14 - skip_bits, r);
+ break;
+ case VPX_BITS_10:
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 16 - skip_bits, r);
+ break;
+ case VPX_BITS_12:
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 18 - skip_bits, r);
+ break;
+ default: assert(0); return -1;
+ }
+#else
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 14 - skip_bits, r);
+#endif
+ break;
+ }
+ }
+ }
+#if CONFIG_NEW_QUANT
+
+ v = vp10_dequant_abscoeff_nuq(val, dqv, dqv_val);
+ v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v;
+#else
+#if CONFIG_AOM_QM
+ dqv = ((iqmatrix[scan[c]] * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+#endif
+ v = (val * dqv) >> dq_shift;
+#endif // CONFIG_NEW_QUANT
+
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+#if CONFIG_VP9_HIGHBITDEPTH
+ dqcoeff[scan[c]] = highbd_check_range((vp10_read_bit(r) ? -v : v), xd->bd);
+#else
+ dqcoeff[scan[c]] = check_range(vp10_read_bit(r) ? -v : v);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#else
+ dqcoeff[scan[c]] = vp10_read_bit(r) ? -v : v;
+#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
+ token_cache[scan[c]] = vp10_pt_energy_class[token];
+ ++c;
+ ctx = get_coef_context(nb, token_cache, c);
+ dqv = dq[1];
+ }
+
+ return c;
+}
+#else // !CONFIG_ANS
+static INLINE int read_coeff(const vpx_prob *const probs, int n,
+ struct AnsDecoder *const ans) {
+ int i, val = 0;
+ for (i = 0; i < n; ++i) val = (val << 1) | uabs_read(ans, probs[i]);
+ return val;
+}
+
+static int decode_coefs_ans(const MACROBLOCKD *const xd, PLANE_TYPE type,
+ tran_low_t *dqcoeff, TX_SIZE tx_size,
+ TX_TYPE tx_type, const int16_t *dq,
+#if CONFIG_NEW_QUANT
+ dequant_val_type_nuq *dq_val,
+#endif // CONFIG_NEW_QUANT
+ int ctx, const int16_t *scan, const int16_t *nb,
+ struct AnsDecoder *const ans) {
+ FRAME_COUNTS *counts = xd->counts;
+ const int max_eob = get_tx2d_size(tx_size);
+ const FRAME_CONTEXT *const fc = xd->fc;
+ const int ref = is_inter_block(&xd->mi[0]->mbmi);
+ int band, c = 0;
+ int skip_eob = 0;
+ const int tx_size_ctx = txsize_sqr_map[tx_size];
+ const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
+ fc->coef_probs[tx_size_ctx][type][ref];
+ const rans_dec_lut(*coef_cdfs)[COEFF_CONTEXTS] =
+ fc->coef_cdfs[tx_size_ctx][type][ref];
+ const vpx_prob *prob;
+ const rans_dec_lut *cdf;
+ unsigned int(*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
+ unsigned int(*eob_branch_count)[COEFF_CONTEXTS];
+ uint8_t token_cache[MAX_TX_SQUARE];
+ const uint8_t *band_translate = get_band_translate(tx_size);
+ int dq_shift;
+ int v, token;
+ int16_t dqv = dq[0];
+#if CONFIG_NEW_QUANT
+ const tran_low_t *dqv_val = &dq_val[0][0];
+#endif // CONFIG_NEW_QUANT
+ const uint8_t *cat1_prob;
+ const uint8_t *cat2_prob;
+ const uint8_t *cat3_prob;
+ const uint8_t *cat4_prob;
+ const uint8_t *cat5_prob;
+ const uint8_t *cat6_prob;
+
+ dq_shift = get_tx_scale(xd, tx_type, tx_size);
+
+ if (counts) {
+ coef_counts = counts->coef[tx_size_ctx][type][ref];
+ eob_branch_count = counts->eob_branch[tx_size_ctx][type][ref];
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->bd > VPX_BITS_8) {
+ if (xd->bd == VPX_BITS_10) {
+ cat1_prob = vp10_cat1_prob_high10;
+ cat2_prob = vp10_cat2_prob_high10;
+ cat3_prob = vp10_cat3_prob_high10;
+ cat4_prob = vp10_cat4_prob_high10;
+ cat5_prob = vp10_cat5_prob_high10;
+ cat6_prob = vp10_cat6_prob_high10;
+ } else {
+ cat1_prob = vp10_cat1_prob_high12;
+ cat2_prob = vp10_cat2_prob_high12;
+ cat3_prob = vp10_cat3_prob_high12;
+ cat4_prob = vp10_cat4_prob_high12;
+ cat5_prob = vp10_cat5_prob_high12;
+ cat6_prob = vp10_cat6_prob_high12;
+ }
+ } else {
+ cat1_prob = vp10_cat1_prob;
+ cat2_prob = vp10_cat2_prob;
+ cat3_prob = vp10_cat3_prob;
+ cat4_prob = vp10_cat4_prob;
+ cat5_prob = vp10_cat5_prob;
+ cat6_prob = vp10_cat6_prob;
+ }
+#else
+ cat1_prob = vp10_cat1_prob;
+ cat2_prob = vp10_cat2_prob;
+ cat3_prob = vp10_cat3_prob;
+ cat4_prob = vp10_cat4_prob;
+ cat5_prob = vp10_cat5_prob;
+ cat6_prob = vp10_cat6_prob;
+#endif
+
+ while (c < max_eob) {
+ int val = -1;
+ band = *band_translate++;
+ prob = coef_probs[band][ctx];
+ if (!skip_eob) {
+ if (counts) ++eob_branch_count[band][ctx];
+ if (!uabs_read(ans, prob[EOB_CONTEXT_NODE])) {
+ INCREMENT_COUNT(EOB_MODEL_TOKEN);
+ break;
+ }
+ }
+
+#if CONFIG_NEW_QUANT
+ dqv_val = &dq_val[band][0];
+#endif // CONFIG_NEW_QUANT
+
+ cdf = &coef_cdfs[band][ctx];
+ token = ZERO_TOKEN + rans_read(ans, *cdf);
+ if (token == ZERO_TOKEN) {
+ INCREMENT_COUNT(ZERO_TOKEN);
+ token_cache[scan[c]] = 0;
+ skip_eob = 1;
+ } else {
+ INCREMENT_COUNT(ONE_TOKEN + (token > ONE_TOKEN));
+ switch (token) {
+ case ONE_TOKEN:
+ case TWO_TOKEN:
+ case THREE_TOKEN:
+ case FOUR_TOKEN: val = token; break;
+ case CATEGORY1_TOKEN:
+ val = CAT1_MIN_VAL + read_coeff(cat1_prob, 1, ans);
+ break;
+ case CATEGORY2_TOKEN:
+ val = CAT2_MIN_VAL + read_coeff(cat2_prob, 2, ans);
+ break;
+ case CATEGORY3_TOKEN:
+ val = CAT3_MIN_VAL + read_coeff(cat3_prob, 3, ans);
+ break;
+ case CATEGORY4_TOKEN:
+ val = CAT4_MIN_VAL + read_coeff(cat4_prob, 4, ans);
+ break;
+ case CATEGORY5_TOKEN:
+ val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, ans);
+ break;
+ case CATEGORY6_TOKEN: {
+ const int skip_bits = TX_SIZES - 1 - txsize_sqr_up_map[tx_size];
+ const uint8_t *cat6p = cat6_prob + skip_bits;
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (xd->bd) {
+ case VPX_BITS_8:
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 14 - skip_bits, ans);
+ break;
+ case VPX_BITS_10:
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 16 - skip_bits, ans);
+ break;
+ case VPX_BITS_12:
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 18 - skip_bits, ans);
+ break;
+ default: assert(0); return -1;
+ }
+#else
+ val = CAT6_MIN_VAL + read_coeff(cat6p, 14 - skip_bits, ans);
+#endif
+ } break;
+ }
+#if CONFIG_NEW_QUANT
+ v = vp10_dequant_abscoeff_nuq(val, dqv, dqv_val);
+ v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v;
+#else
+ v = (val * dqv) >> dq_shift;
+#endif // CONFIG_NEW_QUANT
+
+#if CONFIG_COEFFICIENT_RANGE_CHECKING
+#if CONFIG_VP9_HIGHBITDEPTH
+ dqcoeff[scan[c]] =
+ highbd_check_range((uabs_read_bit(ans) ? -v : v), xd->bd);
+#else
+ dqcoeff[scan[c]] = check_range(uabs_read_bit(ans) ? -v : v);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#else
+ dqcoeff[scan[c]] = uabs_read_bit(ans) ? -v : v;
+#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
+ token_cache[scan[c]] = vp10_pt_energy_class[token];
+ skip_eob = 0;
+ }
+ ++c;
+ ctx = get_coef_context(nb, token_cache, c);
+ dqv = dq[1];
+ }
+
+ return c;
+}
+#endif // !CONFIG_ANS
+
+// TODO(slavarnway): Decode version of vp10_set_context. Modify
+// vp10_set_context
+// after testing is complete, then delete this version.
+static void dec_set_contexts(const MACROBLOCKD *xd,
+ struct macroblockd_plane *pd, TX_SIZE tx_size,
+ int has_eob, int aoff, int loff) {
+ ENTROPY_CONTEXT *const a = pd->above_context + aoff;
+ ENTROPY_CONTEXT *const l = pd->left_context + loff;
+ const int tx_w_in_blocks = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int tx_h_in_blocks = num_4x4_blocks_high_txsize_lookup[tx_size];
+
+ // above
+ if (has_eob && xd->mb_to_right_edge < 0) {
+ int i;
+ const int blocks_wide =
+ pd->n4_w + (xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ int above_contexts = tx_w_in_blocks;
+ if (above_contexts + aoff > blocks_wide)
+ above_contexts = blocks_wide - aoff;
+
+ for (i = 0; i < above_contexts; ++i) a[i] = has_eob;
+ for (i = above_contexts; i < tx_w_in_blocks; ++i) a[i] = 0;
+ } else {
+ memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_w_in_blocks);
+ }
+
+ // left
+ if (has_eob && xd->mb_to_bottom_edge < 0) {
+ int i;
+ const int blocks_high =
+ pd->n4_h + (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+ int left_contexts = tx_h_in_blocks;
+ if (left_contexts + loff > blocks_high) left_contexts = blocks_high - loff;
+
+ for (i = 0; i < left_contexts; ++i) l[i] = has_eob;
+ for (i = left_contexts; i < tx_h_in_blocks; ++i) l[i] = 0;
+ } else {
+ memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_h_in_blocks);
+ }
+}
+
+void vp10_decode_palette_tokens(MACROBLOCKD *const xd, int plane,
+ vp10_reader *r) {
+ MODE_INFO *const mi = xd->mi[0];
+ MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const int rows = (4 * num_4x4_blocks_high_lookup[bsize]) >>
+ (xd->plane[plane != 0].subsampling_y);
+ const int cols = (4 * num_4x4_blocks_wide_lookup[bsize]) >>
+ (xd->plane[plane != 0].subsampling_x);
+ int color_idx, color_ctx, color_order[PALETTE_MAX_SIZE];
+ int n = mbmi->palette_mode_info.palette_size[plane != 0];
+ int i, j;
+ uint8_t *color_map = xd->plane[plane != 0].color_index_map;
+ const vpx_prob (*const prob)[PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] =
+ plane ? vp10_default_palette_uv_color_prob
+ : vp10_default_palette_y_color_prob;
+
+ for (i = 0; i < rows; ++i) {
+ for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
+ color_ctx =
+ vp10_get_palette_color_context(color_map, cols, i, j, n, color_order);
+ color_idx = vp10_read_tree(r, vp10_palette_color_tree[n - 2],
+ prob[n - 2][color_ctx]);
+ assert(color_idx >= 0 && color_idx < n);
+ color_map[i * cols + j] = color_order[color_idx];
+ }
+ }
+}
+
+int vp10_decode_block_tokens(MACROBLOCKD *const xd, int plane,
+ const scan_order *sc, int x, int y,
+ TX_SIZE tx_size, TX_TYPE tx_type,
+#if CONFIG_ANS
+ struct AnsDecoder *const r,
+#else
+ vp10_reader *r,
+#endif // CONFIG_ANS
+ int seg_id) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int16_t *const dequant = pd->seg_dequant[seg_id];
+ const int ctx =
+ get_entropy_context(tx_size, pd->above_context + x, pd->left_context + y);
+#if CONFIG_NEW_QUANT
+ int dq = get_dq_profile_from_ctx(ctx);
+#endif // CONFIG_NEW_QUANT
+
+#if !CONFIG_ANS
+#if CONFIG_AOM_QM
+ const int eob =
+ decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, tx_type, dequant,
+ ctx, sc->scan, sc->neighbors, r, pd->seg_iqmatrix[seg_id]);
+#else
+ const int eob =
+ decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, tx_type, dequant,
+#if CONFIG_NEW_QUANT
+ pd->seg_dequant_nuq[seg_id][dq],
+#endif // CONFIG_NEW_QUANT
+ ctx, sc->scan, sc->neighbors, r);
+#endif // CONFIG_AOM_QM
+#else
+ const int eob = decode_coefs_ans(xd, pd->plane_type, pd->dqcoeff, tx_size,
+ tx_type, dequant,
+#if CONFIG_NEW_QUANT
+ pd->seg_dequant_nuq[seg_id][dq],
+#endif // CONFIG_NEW_QUANT
+ ctx, sc->scan, sc->neighbors, r);
+#endif // !CONFIG_ANS
+ dec_set_contexts(xd, pd, tx_size, eob > 0, x, y);
+ /*
+ vp10_set_contexts(xd, pd,
+ get_plane_block_size(xd->mi[0]->mbmi.sb_type, pd),
+ tx_size, eob > 0, x, y);
+ */
+ return eob;
+}
diff --git a/av1/decoder/detokenize.h b/av1/decoder/detokenize.h
new file mode 100644
index 0000000..279c193
--- /dev/null
+++ b/av1/decoder/detokenize.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_DECODER_DETOKENIZE_H_
+#define VP10_DECODER_DETOKENIZE_H_
+
+#include "av1/decoder/decoder.h"
+#include "av1/common/ans.h"
+#include "av1/common/scan.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_decode_palette_tokens(MACROBLOCKD *const xd, int plane,
+ vp10_reader *r);
+int vp10_decode_block_tokens(MACROBLOCKD *const xd, int plane,
+ const scan_order *sc, int x, int y,
+ TX_SIZE tx_size, TX_TYPE tx_type,
+#if CONFIG_ANS
+ struct AnsDecoder *const r,
+#else
+ vp10_reader *r,
+#endif // CONFIG_ANS
+ int seg_id);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_DECODER_DETOKENIZE_H_
diff --git a/av1/decoder/dsubexp.c b/av1/decoder/dsubexp.c
new file mode 100644
index 0000000..146a1de
--- /dev/null
+++ b/av1/decoder/dsubexp.c
@@ -0,0 +1,82 @@
+/*
+ Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/entropy.h"
+
+#include "av1/decoder/dsubexp.h"
+
+static int inv_recenter_nonneg(int v, int m) {
+ if (v > 2 * m) return v;
+
+ return (v & 1) ? m - ((v + 1) >> 1) : m + (v >> 1);
+}
+
+static int decode_uniform(vp10_reader *r) {
+ const int l = 8;
+ const int m = (1 << l) - 190;
+ const int v = vp10_read_literal(r, l - 1);
+ return v < m ? v : (v << 1) - m + vp10_read_bit(r);
+}
+
+static int inv_remap_prob(int v, int m) {
+ static uint8_t inv_map_table[MAX_PROB - 1] = {
+ 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 189,
+ 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27,
+ 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+ 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60,
+ 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 73, 74, 75, 76,
+ 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 88, 89, 90, 91, 92,
+ 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125,
+ 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 138, 139, 140, 141,
+ 142, 143, 144, 145, 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157,
+ 158, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173,
+ 174, 175, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190,
+ 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
+ 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, 222,
+ 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238,
+ 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,
+ };
+ assert(v < (int)(sizeof(inv_map_table) / sizeof(inv_map_table[0])));
+ v = inv_map_table[v];
+ m--;
+ if ((m << 1) <= MAX_PROB) {
+ return 1 + inv_recenter_nonneg(v, m);
+ } else {
+ return MAX_PROB - inv_recenter_nonneg(v, MAX_PROB - 1 - m);
+ }
+}
+
+static int decode_term_subexp(vp10_reader *r) {
+ if (!vp10_read_bit(r)) return vp10_read_literal(r, 4);
+ if (!vp10_read_bit(r)) return vp10_read_literal(r, 4) + 16;
+ if (!vp10_read_bit(r)) return vp10_read_literal(r, 5) + 32;
+ return decode_uniform(r) + 64;
+}
+
+void vp10_diff_update_prob(vp10_reader *r, vpx_prob *p) {
+ if (vp10_read(r, DIFF_UPDATE_PROB)) {
+ const int delp = decode_term_subexp(r);
+ *p = (vpx_prob)inv_remap_prob(delp, *p);
+ }
+}
+
+int vp10_read_primitive_symmetric(vp10_reader *r, unsigned int mag_bits) {
+ if (vp10_read_bit(r)) {
+ int s = vp10_read_bit(r);
+ int x = vp10_read_literal(r, mag_bits) + 1;
+ return (s > 0 ? -x : x);
+ } else {
+ return 0;
+ }
+}
diff --git a/av1/decoder/dsubexp.h b/av1/decoder/dsubexp.h
new file mode 100644
index 0000000..b8980f7
--- /dev/null
+++ b/av1/decoder/dsubexp.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_DECODER_DSUBEXP_H_
+#define VP10_DECODER_DSUBEXP_H_
+
+#include "av1/decoder/bitreader.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_diff_update_prob(vp10_reader *r, vpx_prob *p);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+// mag_bits is number of bits for magnitude. The alphabet is of size
+// 2 * 2^mag_bits + 1, symmetric around 0, where one bit is used to
+// indicate 0 or non-zero, mag_bits bits are used to indicate magnitide
+// and 1 more bit for the sign if non-zero.
+int vp10_read_primitive_symmetric(vp10_reader *r, unsigned int mag_bits);
+#endif // VP10_DECODER_DSUBEXP_H_
diff --git a/av1/decoder/dthread.c b/av1/decoder/dthread.c
new file mode 100644
index 0000000..d9a2ce1
--- /dev/null
+++ b/av1/decoder/dthread.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "aom_mem/vpx_mem.h"
+#include "av1/common/reconinter.h"
+#include "av1/decoder/dthread.h"
+#include "av1/decoder/decoder.h"
+
+// #define DEBUG_THREAD
+
+// TODO(hkuang): Clean up all the #ifdef in this file.
+void vp10_frameworker_lock_stats(VPxWorker *const worker) {
+#if CONFIG_MULTITHREAD
+ FrameWorkerData *const worker_data = worker->data1;
+ pthread_mutex_lock(&worker_data->stats_mutex);
+#else
+ (void)worker;
+#endif
+}
+
+void vp10_frameworker_unlock_stats(VPxWorker *const worker) {
+#if CONFIG_MULTITHREAD
+ FrameWorkerData *const worker_data = worker->data1;
+ pthread_mutex_unlock(&worker_data->stats_mutex);
+#else
+ (void)worker;
+#endif
+}
+
+void vp10_frameworker_signal_stats(VPxWorker *const worker) {
+#if CONFIG_MULTITHREAD
+ FrameWorkerData *const worker_data = worker->data1;
+
+// TODO(hkuang): Fix the pthread_cond_broadcast in windows wrapper.
+#if defined(_WIN32) && !HAVE_PTHREAD_H
+ pthread_cond_signal(&worker_data->stats_cond);
+#else
+ pthread_cond_broadcast(&worker_data->stats_cond);
+#endif
+
+#else
+ (void)worker;
+#endif
+}
+
+// This macro prevents thread_sanitizer from reporting known concurrent writes.
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#define BUILDING_WITH_TSAN
+#endif
+#endif
+
+// TODO(hkuang): Remove worker parameter as it is only used in debug code.
+void vp10_frameworker_wait(VPxWorker *const worker, RefCntBuffer *const ref_buf,
+ int row) {
+#if CONFIG_MULTITHREAD
+ if (!ref_buf) return;
+
+#ifndef BUILDING_WITH_TSAN
+ // The following line of code will get harmless tsan error but it is the key
+ // to get best performance.
+ if (ref_buf->row >= row && ref_buf->buf.corrupted != 1) return;
+#endif
+
+ {
+ // Find the worker thread that owns the reference frame. If the reference
+ // frame has been fully decoded, it may not have owner.
+ VPxWorker *const ref_worker = ref_buf->frame_worker_owner;
+ FrameWorkerData *const ref_worker_data =
+ (FrameWorkerData *)ref_worker->data1;
+ const VP10Decoder *const pbi = ref_worker_data->pbi;
+
+#ifdef DEBUG_THREAD
+ {
+ FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
+ printf("%d %p worker is waiting for %d %p worker (%d) ref %d \r\n",
+ worker_data->worker_id, worker, ref_worker_data->worker_id,
+ ref_buf->frame_worker_owner, row, ref_buf->row);
+ }
+#endif
+
+ vp10_frameworker_lock_stats(ref_worker);
+ while (ref_buf->row < row && pbi->cur_buf == ref_buf &&
+ ref_buf->buf.corrupted != 1) {
+ pthread_cond_wait(&ref_worker_data->stats_cond,
+ &ref_worker_data->stats_mutex);
+ }
+
+ if (ref_buf->buf.corrupted == 1) {
+ FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
+ vp10_frameworker_unlock_stats(ref_worker);
+ vpx_internal_error(&worker_data->pbi->common.error,
+ VPX_CODEC_CORRUPT_FRAME,
+ "Worker %p failed to decode frame", worker);
+ }
+ vp10_frameworker_unlock_stats(ref_worker);
+ }
+#else
+ (void)worker;
+ (void)ref_buf;
+ (void)row;
+ (void)ref_buf;
+#endif // CONFIG_MULTITHREAD
+}
+
+void vp10_frameworker_broadcast(RefCntBuffer *const buf, int row) {
+#if CONFIG_MULTITHREAD
+ VPxWorker *worker = buf->frame_worker_owner;
+
+#ifdef DEBUG_THREAD
+ {
+ FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1;
+ printf("%d %p worker decode to (%d) \r\n", worker_data->worker_id,
+ buf->frame_worker_owner, row);
+ }
+#endif
+
+ vp10_frameworker_lock_stats(worker);
+ buf->row = row;
+ vp10_frameworker_signal_stats(worker);
+ vp10_frameworker_unlock_stats(worker);
+#else
+ (void)buf;
+ (void)row;
+#endif // CONFIG_MULTITHREAD
+}
+
+void vp10_frameworker_copy_context(VPxWorker *const dst_worker,
+ VPxWorker *const src_worker) {
+#if CONFIG_MULTITHREAD
+ FrameWorkerData *const src_worker_data = (FrameWorkerData *)src_worker->data1;
+ FrameWorkerData *const dst_worker_data = (FrameWorkerData *)dst_worker->data1;
+ VP10_COMMON *const src_cm = &src_worker_data->pbi->common;
+ VP10_COMMON *const dst_cm = &dst_worker_data->pbi->common;
+ int i;
+
+ // Wait until source frame's context is ready.
+ vp10_frameworker_lock_stats(src_worker);
+ while (!src_worker_data->frame_context_ready) {
+ pthread_cond_wait(&src_worker_data->stats_cond,
+ &src_worker_data->stats_mutex);
+ }
+
+ dst_cm->last_frame_seg_map = src_cm->seg.enabled
+ ? src_cm->current_frame_seg_map
+ : src_cm->last_frame_seg_map;
+ dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync;
+ vp10_frameworker_unlock_stats(src_worker);
+
+ dst_cm->bit_depth = src_cm->bit_depth;
+#if CONFIG_VP9_HIGHBITDEPTH
+ dst_cm->use_highbitdepth = src_cm->use_highbitdepth;
+#endif
+#if CONFIG_EXT_REFS
+// TODO(zoeliu): To handle parallel decoding
+#endif // CONFIG_EXT_REFS
+ dst_cm->prev_frame =
+ src_cm->show_existing_frame ? src_cm->prev_frame : src_cm->cur_frame;
+ dst_cm->last_width =
+ !src_cm->show_existing_frame ? src_cm->width : src_cm->last_width;
+ dst_cm->last_height =
+ !src_cm->show_existing_frame ? src_cm->height : src_cm->last_height;
+ dst_cm->subsampling_x = src_cm->subsampling_x;
+ dst_cm->subsampling_y = src_cm->subsampling_y;
+ dst_cm->frame_type = src_cm->frame_type;
+ dst_cm->last_show_frame = !src_cm->show_existing_frame
+ ? src_cm->show_frame
+ : src_cm->last_show_frame;
+ for (i = 0; i < REF_FRAMES; ++i)
+ dst_cm->ref_frame_map[i] = src_cm->next_ref_frame_map[i];
+
+ memcpy(dst_cm->lf_info.lfthr, src_cm->lf_info.lfthr,
+ (MAX_LOOP_FILTER + 1) * sizeof(loop_filter_thresh));
+ dst_cm->lf.last_sharpness_level = src_cm->lf.sharpness_level;
+ dst_cm->lf.filter_level = src_cm->lf.filter_level;
+ memcpy(dst_cm->lf.ref_deltas, src_cm->lf.ref_deltas, TOTAL_REFS_PER_FRAME);
+ memcpy(dst_cm->lf.mode_deltas, src_cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
+ dst_cm->seg = src_cm->seg;
+ memcpy(dst_cm->frame_contexts, src_cm->frame_contexts,
+ FRAME_CONTEXTS * sizeof(dst_cm->frame_contexts[0]));
+#else
+ (void)dst_worker;
+ (void)src_worker;
+#endif // CONFIG_MULTITHREAD
+}
diff --git a/av1/decoder/dthread.h b/av1/decoder/dthread.h
new file mode 100644
index 0000000..ef548b6
--- /dev/null
+++ b/av1/decoder/dthread.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_DECODER_DTHREAD_H_
+#define VP10_DECODER_DTHREAD_H_
+
+#include "./vpx_config.h"
+#include "aom_util/vpx_thread.h"
+#include "aom/internal/vpx_codec_internal.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP10Common;
+struct VP10Decoder;
+
+// WorkerData for the FrameWorker thread. It contains all the information of
+// the worker and decode structures for decoding a frame.
+typedef struct FrameWorkerData {
+ struct VP10Decoder *pbi;
+ const uint8_t *data;
+ const uint8_t *data_end;
+ size_t data_size;
+ void *user_priv;
+ int result;
+ int worker_id;
+ int received_frame;
+
+ // scratch_buffer is used in frame parallel mode only.
+ // It is used to make a copy of the compressed data.
+ uint8_t *scratch_buffer;
+ size_t scratch_buffer_size;
+
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t stats_mutex;
+ pthread_cond_t stats_cond;
+#endif
+
+ int frame_context_ready; // Current frame's context is ready to read.
+ int frame_decoded; // Finished decoding current frame.
+} FrameWorkerData;
+
+void vp10_frameworker_lock_stats(VPxWorker *const worker);
+void vp10_frameworker_unlock_stats(VPxWorker *const worker);
+void vp10_frameworker_signal_stats(VPxWorker *const worker);
+
+// Wait until ref_buf has been decoded to row in real pixel unit.
+// Note: worker may already finish decoding ref_buf and release it in order to
+// start decoding next frame. So need to check whether worker is still decoding
+// ref_buf.
+void vp10_frameworker_wait(VPxWorker *const worker, RefCntBuffer *const ref_buf,
+ int row);
+
+// FrameWorker broadcasts its decoding progress so other workers that are
+// waiting on it can resume decoding.
+void vp10_frameworker_broadcast(RefCntBuffer *const buf, int row);
+
+// Copy necessary decoding context from src worker to dst worker.
+void vp10_frameworker_copy_context(VPxWorker *const dst_worker,
+ VPxWorker *const src_worker);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_DECODER_DTHREAD_H_
diff --git a/av1/encoder/aq_complexity.c b/av1/encoder/aq_complexity.c
new file mode 100644
index 0000000..173556e
--- /dev/null
+++ b/av1/encoder/aq_complexity.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <math.h>
+
+#include "av1/encoder/aq_complexity.h"
+#include "av1/encoder/aq_variance.h"
+#include "av1/encoder/encodeframe.h"
+#include "av1/common/seg_common.h"
+#include "av1/encoder/segmentation.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_ports/system_state.h"
+
+#define AQ_C_SEGMENTS 5
+#define DEFAULT_AQ2_SEG 3 // Neutral Q segment
+#define AQ_C_STRENGTHS 3
+static const double aq_c_q_adj_factor[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
+ { 1.75, 1.25, 1.05, 1.00, 0.90 },
+ { 2.00, 1.50, 1.15, 1.00, 0.85 },
+ { 2.50, 1.75, 1.25, 1.00, 0.80 }
+};
+static const double aq_c_transitions[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
+ { 0.15, 0.30, 0.55, 2.00, 100.0 },
+ { 0.20, 0.40, 0.65, 2.00, 100.0 },
+ { 0.25, 0.50, 0.75, 2.00, 100.0 }
+};
+static const double aq_c_var_thresholds[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = {
+ { -4.0, -3.0, -2.0, 100.00, 100.0 },
+ { -3.5, -2.5, -1.5, 100.00, 100.0 },
+ { -3.0, -2.0, -1.0, 100.00, 100.0 }
+};
+
+#define DEFAULT_COMPLEXITY 64
+
+static int get_aq_c_strength(int q_index, vpx_bit_depth_t bit_depth) {
+ // Approximate base quatizer (truncated to int)
+ const int base_quant = vp10_ac_quant(q_index, 0, bit_depth) / 4;
+ return (base_quant > 10) + (base_quant > 25);
+}
+
+void vp10_setup_in_frame_q_adj(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ struct segmentation *const seg = &cm->seg;
+
+ // Make SURE use of floating point in this function is safe.
+ vpx_clear_system_state();
+
+ if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
+ cpi->refresh_alt_ref_frame ||
+ (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
+ int segment;
+ const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth);
+
+ // Clear down the segment map.
+ memset(cpi->segmentation_map, DEFAULT_AQ2_SEG, cm->mi_rows * cm->mi_cols);
+
+ vp10_clearall_segfeatures(seg);
+
+ // Segmentation only makes sense if the target bits per SB is above a
+ // threshold. Below this the overheads will usually outweigh any benefit.
+ if (cpi->rc.sb64_target_rate < 256) {
+ vp10_disable_segmentation(seg);
+ return;
+ }
+
+ vp10_enable_segmentation(seg);
+
+ // Select delta coding method.
+ seg->abs_delta = SEGMENT_DELTADATA;
+
+ // Default segment "Q" feature is disabled so it defaults to the baseline Q.
+ vp10_disable_segfeature(seg, DEFAULT_AQ2_SEG, SEG_LVL_ALT_Q);
+
+ // Use some of the segments for in frame Q adjustment.
+ for (segment = 0; segment < AQ_C_SEGMENTS; ++segment) {
+ int qindex_delta;
+
+ if (segment == DEFAULT_AQ2_SEG) continue;
+
+ qindex_delta = vp10_compute_qdelta_by_rate(
+ &cpi->rc, cm->frame_type, cm->base_qindex,
+ aq_c_q_adj_factor[aq_strength][segment], cm->bit_depth);
+
+ // For AQ complexity mode, we dont allow Q0 in a segment if the base
+ // Q is not 0. Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment
+ // Q delta is sometimes applied without going back around the rd loop.
+ // This could lead to an illegal combination of partition size and q.
+ if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) {
+ qindex_delta = -cm->base_qindex + 1;
+ }
+ if ((cm->base_qindex + qindex_delta) > 0) {
+ vp10_enable_segfeature(seg, segment, SEG_LVL_ALT_Q);
+ vp10_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta);
+ }
+ }
+ }
+}
+
+#define DEFAULT_LV_THRESH 10.0
+#define MIN_DEFAULT_LV_THRESH 8.0
+#define VAR_STRENGTH_STEP 0.25
+// Select a segment for the current block.
+// The choice of segment for a block depends on the ratio of the projected
+// bits for the block vs a target average and its spatial complexity.
+void vp10_caq_select_segment(VP10_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
+ int mi_row, int mi_col, int projected_rate) {
+ VP10_COMMON *const cm = &cpi->common;
+
+ const int mi_offset = mi_row * cm->mi_cols + mi_col;
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]);
+ int x, y;
+ int i;
+ unsigned char segment;
+
+ if (0) {
+ segment = DEFAULT_AQ2_SEG;
+ } else {
+ // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
+ // It is converted to bits * 256 units.
+ const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) /
+ (cm->mib_size * cm->mib_size);
+ double logvar;
+ double low_var_thresh;
+ const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth);
+
+ vpx_clear_system_state();
+ low_var_thresh = (cpi->oxcf.pass == 2) ? VPXMAX(cpi->twopass.mb_av_energy,
+ MIN_DEFAULT_LV_THRESH)
+ : DEFAULT_LV_THRESH;
+
+ vp10_setup_src_planes(mb, cpi->Source, mi_row, mi_col);
+ logvar = vp10_log_block_var(cpi, mb, bs);
+
+ segment = AQ_C_SEGMENTS - 1; // Just in case no break out below.
+ for (i = 0; i < AQ_C_SEGMENTS; ++i) {
+ // Test rate against a threshold value and variance against a threshold.
+ // Increasing segment number (higher variance and complexity) = higher Q.
+ if ((projected_rate < target_rate * aq_c_transitions[aq_strength][i]) &&
+ (logvar < (low_var_thresh + aq_c_var_thresholds[aq_strength][i]))) {
+ segment = i;
+ break;
+ }
+ }
+ }
+
+ // Fill in the entires in the segment map corresponding to this SB64.
+ for (y = 0; y < ymis; y++) {
+ for (x = 0; x < xmis; x++) {
+ cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment;
+ }
+ }
+}
diff --git a/av1/encoder/aq_complexity.h b/av1/encoder/aq_complexity.h
new file mode 100644
index 0000000..db85406
--- /dev/null
+++ b/av1/encoder/aq_complexity.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_AQ_COMPLEXITY_H_
+#define VP10_ENCODER_AQ_COMPLEXITY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "av1/common/enums.h"
+
+struct VP10_COMP;
+struct macroblock;
+
+// Select a segment for the current Block.
+void vp10_caq_select_segment(struct VP10_COMP *cpi, struct macroblock *,
+ BLOCK_SIZE bs, int mi_row, int mi_col,
+ int projected_rate);
+
+// This function sets up a set of segments with delta Q values around
+// the baseline frame quantizer.
+void vp10_setup_in_frame_q_adj(struct VP10_COMP *cpi);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_AQ_COMPLEXITY_H_
diff --git a/av1/encoder/aq_cyclicrefresh.c b/av1/encoder/aq_cyclicrefresh.c
new file mode 100644
index 0000000..b7897f9
--- /dev/null
+++ b/av1/encoder/aq_cyclicrefresh.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <math.h>
+
+#include "av1/common/seg_common.h"
+#include "av1/encoder/aq_cyclicrefresh.h"
+#include "av1/encoder/ratectrl.h"
+#include "av1/encoder/segmentation.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_ports/system_state.h"
+
+struct CYCLIC_REFRESH {
+ // Percentage of blocks per frame that are targeted as candidates
+ // for cyclic refresh.
+ int percent_refresh;
+ // Maximum q-delta as percentage of base q.
+ int max_qdelta_perc;
+ // Superblock starting index for cycling through the frame.
+ int sb_index;
+ // Controls how long block will need to wait to be refreshed again, in
+ // excess of the cycle time, i.e., in the case of all zero motion, block
+ // will be refreshed every (100/percent_refresh + time_for_refresh) frames.
+ int time_for_refresh;
+ // Target number of (8x8) blocks that are set for delta-q.
+ int target_num_seg_blocks;
+ // Actual number of (8x8) blocks that were applied delta-q.
+ int actual_num_seg1_blocks;
+ int actual_num_seg2_blocks;
+ // RD mult. parameters for segment 1.
+ int rdmult;
+ // Cyclic refresh map.
+ signed char *map;
+ // Map of the last q a block was coded at.
+ uint8_t *last_coded_q_map;
+ // Thresholds applied to the projected rate/distortion of the coding block,
+ // when deciding whether block should be refreshed.
+ int64_t thresh_rate_sb;
+ int64_t thresh_dist_sb;
+ // Threshold applied to the motion vector (in units of 1/8 pel) of the
+ // coding block, when deciding whether block should be refreshed.
+ int16_t motion_thresh;
+ // Rate target ratio to set q delta.
+ double rate_ratio_qdelta;
+ // Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
+ int rate_boost_fac;
+ double low_content_avg;
+ int qindex_delta[3];
+};
+
+CYCLIC_REFRESH *vp10_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
+ size_t last_coded_q_map_size;
+ CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
+ if (cr == NULL) return NULL;
+
+ cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map));
+ if (cr->map == NULL) {
+ vp10_cyclic_refresh_free(cr);
+ return NULL;
+ }
+ last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
+ cr->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
+ if (cr->last_coded_q_map == NULL) {
+ vp10_cyclic_refresh_free(cr);
+ return NULL;
+ }
+ assert(MAXQ <= 255);
+ memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
+
+ return cr;
+}
+
+void vp10_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
+ vpx_free(cr->map);
+ vpx_free(cr->last_coded_q_map);
+ vpx_free(cr);
+}
+
+// Check if we should turn off cyclic refresh based on bitrate condition.
+static int apply_cyclic_refresh_bitrate(const VP10_COMMON *cm,
+ const RATE_CONTROL *rc) {
+ // Turn off cyclic refresh if bits available per frame is not sufficiently
+ // larger than bit cost of segmentation. Segment map bit cost should scale
+ // with number of seg blocks, so compare available bits to number of blocks.
+ // Average bits available per frame = avg_frame_bandwidth
+ // Number of (8x8) blocks in frame = mi_rows * mi_cols;
+ const float factor = 0.25;
+ const int number_blocks = cm->mi_rows * cm->mi_cols;
+ // The condition below corresponds to turning off at target bitrates:
+ // (at 30fps), ~12kbps for CIF, 36kbps for VGA, 100kps for HD/720p.
+ // Also turn off at very small frame sizes, to avoid too large fraction of
+ // superblocks to be refreshed per frame. Threshold below is less than QCIF.
+ if (rc->avg_frame_bandwidth < factor * number_blocks ||
+ number_blocks / 64 < 5)
+ return 0;
+ else
+ return 1;
+}
+
+// Check if this coding block, of size bsize, should be considered for refresh
+// (lower-qp coding). Decision can be based on various factors, such as
+// size of the coding block (i.e., below min_block size rejected), coding
+// mode, and rate/distortion.
+static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
+ const MB_MODE_INFO *mbmi, int64_t rate,
+ int64_t dist, int bsize) {
+ MV mv = mbmi->mv[0].as_mv;
+ // Reject the block for lower-qp coding if projected distortion
+ // is above the threshold, and any of the following is true:
+ // 1) mode uses large mv
+ // 2) mode is an intra-mode
+ // Otherwise accept for refresh.
+ if (dist > cr->thresh_dist_sb &&
+ (mv.row > cr->motion_thresh || mv.row < -cr->motion_thresh ||
+ mv.col > cr->motion_thresh || mv.col < -cr->motion_thresh ||
+ !is_inter_block(mbmi)))
+ return CR_SEGMENT_ID_BASE;
+ else if (bsize >= BLOCK_16X16 && rate < cr->thresh_rate_sb &&
+ is_inter_block(mbmi) && mbmi->mv[0].as_int == 0 &&
+ cr->rate_boost_fac > 10)
+ // More aggressive delta-q for bigger blocks with zero motion.
+ return CR_SEGMENT_ID_BOOST2;
+ else
+ return CR_SEGMENT_ID_BOOST1;
+}
+
+// Compute delta-q for the segment.
+static int compute_deltaq(const VP10_COMP *cpi, int q, double rate_factor) {
+ const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ int deltaq = vp10_compute_qdelta_by_rate(rc, cpi->common.frame_type, q,
+ rate_factor, cpi->common.bit_depth);
+ if ((-deltaq) > cr->max_qdelta_perc * q / 100) {
+ deltaq = -cr->max_qdelta_perc * q / 100;
+ }
+ return deltaq;
+}
+
+// For the just encoded frame, estimate the bits, incorporating the delta-q
+// from non-base segment. For now ignore effect of multiple segments
+// (with different delta-q). Note this function is called in the postencode
+// (called from rc_update_rate_correction_factors()).
+int vp10_cyclic_refresh_estimate_bits_at_q(const VP10_COMP *cpi,
+ double correction_factor) {
+ const VP10_COMMON *const cm = &cpi->common;
+ const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ int estimated_bits;
+ int mbs = cm->MBs;
+ int num8x8bl = mbs << 2;
+ // Weight for non-base segments: use actual number of blocks refreshed in
+ // previous/just encoded frame. Note number of blocks here is in 8x8 units.
+ double weight_segment1 = (double)cr->actual_num_seg1_blocks / num8x8bl;
+ double weight_segment2 = (double)cr->actual_num_seg2_blocks / num8x8bl;
+ // Take segment weighted average for estimated bits.
+ estimated_bits =
+ (int)((1.0 - weight_segment1 - weight_segment2) *
+ vp10_estimate_bits_at_q(cm->frame_type, cm->base_qindex, mbs,
+ correction_factor, cm->bit_depth) +
+ weight_segment1 *
+ vp10_estimate_bits_at_q(cm->frame_type,
+ cm->base_qindex + cr->qindex_delta[1],
+ mbs, correction_factor, cm->bit_depth) +
+ weight_segment2 *
+ vp10_estimate_bits_at_q(cm->frame_type,
+ cm->base_qindex + cr->qindex_delta[2],
+ mbs, correction_factor, cm->bit_depth));
+ return estimated_bits;
+}
+
+// Prior to encoding the frame, estimate the bits per mb, for a given q = i and
+// a corresponding delta-q (for segment 1). This function is called in the
+// rc_regulate_q() to set the base qp index.
+// Note: the segment map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or
+// to 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock, prior to encoding.
+int vp10_cyclic_refresh_rc_bits_per_mb(const VP10_COMP *cpi, int i,
+ double correction_factor) {
+ const VP10_COMMON *const cm = &cpi->common;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ int bits_per_mb;
+ int num8x8bl = cm->MBs << 2;
+ // Weight for segment prior to encoding: take the average of the target
+ // number for the frame to be encoded and the actual from the previous frame.
+ double weight_segment =
+ (double)((cr->target_num_seg_blocks + cr->actual_num_seg1_blocks +
+ cr->actual_num_seg2_blocks) >>
+ 1) /
+ num8x8bl;
+ // Compute delta-q corresponding to qindex i.
+ int deltaq = compute_deltaq(cpi, i, cr->rate_ratio_qdelta);
+ // Take segment weighted average for bits per mb.
+ bits_per_mb =
+ (int)((1.0 - weight_segment) * vp10_rc_bits_per_mb(cm->frame_type, i,
+ correction_factor,
+ cm->bit_depth) +
+ weight_segment * vp10_rc_bits_per_mb(cm->frame_type, i + deltaq,
+ correction_factor,
+ cm->bit_depth));
+ return bits_per_mb;
+}
+
+// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
+// check if we should reset the segment_id, and update the cyclic_refresh map
+// and segmentation map.
+void vp10_cyclic_refresh_update_segment(VP10_COMP *const cpi,
+ MB_MODE_INFO *const mbmi, int mi_row,
+ int mi_col, BLOCK_SIZE bsize,
+ int64_t rate, int64_t dist, int skip) {
+ const VP10_COMMON *const cm = &cpi->common;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ const int bw = num_8x8_blocks_wide_lookup[bsize];
+ const int bh = num_8x8_blocks_high_lookup[bsize];
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+ const int block_index = mi_row * cm->mi_cols + mi_col;
+ const int refresh_this_block =
+ candidate_refresh_aq(cr, mbmi, rate, dist, bsize);
+ // Default is to not update the refresh map.
+ int new_map_value = cr->map[block_index];
+ int x = 0;
+ int y = 0;
+
+ // If this block is labeled for refresh, check if we should reset the
+ // segment_id.
+ if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
+ mbmi->segment_id = refresh_this_block;
+ // Reset segment_id if will be skipped.
+ if (skip) mbmi->segment_id = CR_SEGMENT_ID_BASE;
+ }
+
+ // Update the cyclic refresh map, to be used for setting segmentation map
+ // for the next frame. If the block will be refreshed this frame, mark it
+ // as clean. The magnitude of the -ve influences how long before we consider
+ // it for refresh again.
+ if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
+ new_map_value = -cr->time_for_refresh;
+ } else if (refresh_this_block) {
+ // Else if it is accepted as candidate for refresh, and has not already
+ // been refreshed (marked as 1) then mark it as a candidate for cleanup
+ // for future time (marked as 0), otherwise don't update it.
+ if (cr->map[block_index] == 1) new_map_value = 0;
+ } else {
+ // Leave it marked as block that is not candidate for refresh.
+ new_map_value = 1;
+ }
+
+ // Update entries in the cyclic refresh map with new_map_value, and
+ // copy mbmi->segment_id into global segmentation map.
+ for (y = 0; y < ymis; y++)
+ for (x = 0; x < xmis; x++) {
+ int map_offset = block_index + y * cm->mi_cols + x;
+ cr->map[map_offset] = new_map_value;
+ cpi->segmentation_map[map_offset] = mbmi->segment_id;
+ // Inter skip blocks were clearly not coded at the current qindex, so
+ // don't update the map for them. For cases where motion is non-zero or
+ // the reference frame isn't the previous frame, the previous value in
+ // the map for this spatial location is not entirely correct.
+ if ((!is_inter_block(mbmi) || !skip) &&
+ mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) {
+ cr->last_coded_q_map[map_offset] = clamp(
+ cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ);
+ } else if (is_inter_block(mbmi) && skip &&
+ mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) {
+ cr->last_coded_q_map[map_offset] =
+ VPXMIN(clamp(cm->base_qindex + cr->qindex_delta[mbmi->segment_id],
+ 0, MAXQ),
+ cr->last_coded_q_map[map_offset]);
+ }
+ }
+}
+
+// Update the actual number of blocks that were applied the segment delta q.
+void vp10_cyclic_refresh_postencode(VP10_COMP *const cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ unsigned char *const seg_map = cpi->segmentation_map;
+ int mi_row, mi_col;
+ cr->actual_num_seg1_blocks = 0;
+ cr->actual_num_seg2_blocks = 0;
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row++)
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
+ if (cyclic_refresh_segment_id(seg_map[mi_row * cm->mi_cols + mi_col]) ==
+ CR_SEGMENT_ID_BOOST1)
+ cr->actual_num_seg1_blocks++;
+ else if (cyclic_refresh_segment_id(
+ seg_map[mi_row * cm->mi_cols + mi_col]) ==
+ CR_SEGMENT_ID_BOOST2)
+ cr->actual_num_seg2_blocks++;
+ }
+}
+
+// Set golden frame update interval, for 1 pass CBR mode.
+void vp10_cyclic_refresh_set_golden_update(VP10_COMP *const cpi) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ // Set minimum gf_interval for GF update to a multiple (== 2) of refresh
+ // period. Depending on past encoding stats, GF flag may be reset and update
+ // may not occur until next baseline_gf_interval.
+ if (cr->percent_refresh > 0)
+ rc->baseline_gf_interval = 4 * (100 / cr->percent_refresh);
+ else
+ rc->baseline_gf_interval = 40;
+}
+
+// Update some encoding stats (from the just encoded frame). If this frame's
+// background has high motion, refresh the golden frame. Otherwise, if the
+// golden reference is to be updated check if we should NOT update the golden
+// ref.
+void vp10_cyclic_refresh_check_golden_update(VP10_COMP *const cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ int mi_row, mi_col;
+ double fraction_low = 0.0;
+ int low_content_frame = 0;
+
+ MODE_INFO **mi;
+ RATE_CONTROL *const rc = &cpi->rc;
+ const int rows = cm->mi_rows, cols = cm->mi_cols;
+ int cnt1 = 0, cnt2 = 0;
+ int force_gf_refresh = 0;
+
+ for (mi_row = 0; mi_row < rows; mi_row++) {
+ mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
+
+ for (mi_col = 0; mi_col < cols; mi_col++) {
+ int16_t abs_mvr = mi[0]->mbmi.mv[0].as_mv.row >= 0
+ ? mi[0]->mbmi.mv[0].as_mv.row
+ : -1 * mi[0]->mbmi.mv[0].as_mv.row;
+ int16_t abs_mvc = mi[0]->mbmi.mv[0].as_mv.col >= 0
+ ? mi[0]->mbmi.mv[0].as_mv.col
+ : -1 * mi[0]->mbmi.mv[0].as_mv.col;
+
+ // Calculate the motion of the background.
+ if (abs_mvr <= 16 && abs_mvc <= 16) {
+ cnt1++;
+ if (abs_mvr == 0 && abs_mvc == 0) cnt2++;
+ }
+ mi++;
+
+ // Accumulate low_content_frame.
+ if (cr->map[mi_row * cols + mi_col] < 1) low_content_frame++;
+ }
+ }
+
+ // For video conference clips, if the background has high motion in current
+ // frame because of the camera movement, set this frame as the golden frame.
+ // Use 70% and 5% as the thresholds for golden frame refreshing.
+ // Also, force this frame as a golden update frame if this frame will change
+ // the resolution (resize_pending != 0).
+ if (cpi->resize_pending != 0 ||
+ (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1)) {
+ vp10_cyclic_refresh_set_golden_update(cpi);
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+
+ if (rc->frames_till_gf_update_due > rc->frames_to_key)
+ rc->frames_till_gf_update_due = rc->frames_to_key;
+ cpi->refresh_golden_frame = 1;
+ force_gf_refresh = 1;
+ }
+
+ fraction_low = (double)low_content_frame / (rows * cols);
+ // Update average.
+ cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4;
+ if (!force_gf_refresh && cpi->refresh_golden_frame == 1) {
+ // Don't update golden reference if the amount of low_content for the
+ // current encoded frame is small, or if the recursive average of the
+ // low_content over the update interval window falls below threshold.
+ if (fraction_low < 0.8 || cr->low_content_avg < 0.7)
+ cpi->refresh_golden_frame = 0;
+ // Reset for next internal.
+ cr->low_content_avg = fraction_low;
+ }
+}
+
+// Update the segmentation map, and related quantities: cyclic refresh map,
+// refresh sb_index, and target number of blocks to be refreshed.
+// The map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or to
+// 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock.
+// Blocks labeled as BOOST1 may later get set to BOOST2 (during the
+// encoding of the superblock).
+static void cyclic_refresh_update_map(VP10_COMP *const cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ unsigned char *const seg_map = cpi->segmentation_map;
+ int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame;
+ int xmis, ymis, x, y;
+ memset(seg_map, CR_SEGMENT_ID_BASE, cm->mi_rows * cm->mi_cols);
+ sb_cols = (cm->mi_cols + cm->mib_size - 1) / cm->mib_size;
+ sb_rows = (cm->mi_rows + cm->mib_size - 1) / cm->mib_size;
+ sbs_in_frame = sb_cols * sb_rows;
+ // Number of target blocks to get the q delta (segment 1).
+ block_count = cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100;
+ // Set the segmentation map: cycle through the superblocks, starting at
+ // cr->mb_index, and stopping when either block_count blocks have been found
+ // to be refreshed, or we have passed through whole frame.
+ assert(cr->sb_index < sbs_in_frame);
+ i = cr->sb_index;
+ cr->target_num_seg_blocks = 0;
+ do {
+ int sum_map = 0;
+ // Get the mi_row/mi_col corresponding to superblock index i.
+ int sb_row_index = (i / sb_cols);
+ int sb_col_index = i - sb_row_index * sb_cols;
+ int mi_row = sb_row_index * cm->mib_size;
+ int mi_col = sb_col_index * cm->mib_size;
+ int qindex_thresh =
+ cpi->oxcf.content == VPX_CONTENT_SCREEN
+ ? vp10_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
+ : 0;
+ assert(mi_row >= 0 && mi_row < cm->mi_rows);
+ assert(mi_col >= 0 && mi_col < cm->mi_cols);
+ bl_index = mi_row * cm->mi_cols + mi_col;
+ // Loop through all MI blocks in superblock and update map.
+ xmis = VPXMIN(cm->mi_cols - mi_col, cm->mib_size);
+ ymis = VPXMIN(cm->mi_rows - mi_row, cm->mib_size);
+ for (y = 0; y < ymis; y++) {
+ for (x = 0; x < xmis; x++) {
+ const int bl_index2 = bl_index + y * cm->mi_cols + x;
+ // If the block is as a candidate for clean up then mark it
+ // for possible boost/refresh (segment 1). The segment id may get
+ // reset to 0 later if block gets coded anything other than ZEROMV.
+ if (cr->map[bl_index2] == 0) {
+ if (cr->last_coded_q_map[bl_index2] > qindex_thresh) sum_map++;
+ } else if (cr->map[bl_index2] < 0) {
+ cr->map[bl_index2]++;
+ }
+ }
+ }
+ // Enforce constant segment over superblock.
+ // If segment is at least half of superblock, set to 1.
+ if (sum_map >= xmis * ymis / 2) {
+ for (y = 0; y < ymis; y++)
+ for (x = 0; x < xmis; x++) {
+ seg_map[bl_index + y * cm->mi_cols + x] = CR_SEGMENT_ID_BOOST1;
+ }
+ cr->target_num_seg_blocks += xmis * ymis;
+ }
+ i++;
+ if (i == sbs_in_frame) {
+ i = 0;
+ }
+ } while (cr->target_num_seg_blocks < block_count && i != cr->sb_index);
+ cr->sb_index = i;
+}
+
+// Set cyclic refresh parameters.
+void vp10_cyclic_refresh_update_parameters(VP10_COMP *const cpi) {
+ const RATE_CONTROL *const rc = &cpi->rc;
+ const VP10_COMMON *const cm = &cpi->common;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ cr->percent_refresh = 10;
+ cr->max_qdelta_perc = 50;
+ cr->time_for_refresh = 0;
+ // Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4)
+ // periods of the refresh cycle, after a key frame.
+ if (rc->frames_since_key < 4 * cr->percent_refresh)
+ cr->rate_ratio_qdelta = 3.0;
+ else
+ cr->rate_ratio_qdelta = 2.0;
+ // Adjust some parameters for low resolutions at low bitrates.
+ if (cm->width <= 352 && cm->height <= 288 && rc->avg_frame_bandwidth < 3400) {
+ cr->motion_thresh = 4;
+ cr->rate_boost_fac = 10;
+ } else {
+ cr->motion_thresh = 32;
+ cr->rate_boost_fac = 17;
+ }
+}
+
+// Setup cyclic background refresh: set delta q and segmentation map.
+void vp10_cyclic_refresh_setup(VP10_COMP *const cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ struct segmentation *const seg = &cm->seg;
+ const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc);
+ if (cm->current_video_frame == 0) cr->low_content_avg = 0.0;
+ // Don't apply refresh on key frame or enhancement layer frames.
+ if (!apply_cyclic_refresh || cm->frame_type == KEY_FRAME) {
+ // Set segmentation map to 0 and disable.
+ unsigned char *const seg_map = cpi->segmentation_map;
+ memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
+ vp10_disable_segmentation(&cm->seg);
+ if (cm->frame_type == KEY_FRAME) {
+ memset(cr->last_coded_q_map, MAXQ,
+ cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
+ cr->sb_index = 0;
+ }
+ return;
+ } else {
+ int qindex_delta = 0;
+ int qindex2;
+ const double q = vp10_convert_qindex_to_q(cm->base_qindex, cm->bit_depth);
+ vpx_clear_system_state();
+ // Set rate threshold to some multiple (set to 2 for now) of the target
+ // rate (target is given by sb64_target_rate and scaled by 256).
+ cr->thresh_rate_sb = ((int64_t)(rc->sb64_target_rate) << 8) << 2;
+ // Distortion threshold, quadratic in Q, scale factor to be adjusted.
+ // q will not exceed 457, so (q * q) is within 32bit; see:
+ // vp10_convert_qindex_to_q(), vp10_ac_quant(), ac_qlookup*[].
+ cr->thresh_dist_sb = ((int64_t)(q * q)) << 2;
+
+ // Set up segmentation.
+ // Clear down the segment map.
+ vp10_enable_segmentation(&cm->seg);
+ vp10_clearall_segfeatures(seg);
+ // Select delta coding method.
+ seg->abs_delta = SEGMENT_DELTADATA;
+
+ // Note: setting temporal_update has no effect, as the seg-map coding method
+ // (temporal or spatial) is determined in
+ // vp10_choose_segmap_coding_method(),
+ // based on the coding cost of each method. For error_resilient mode on the
+ // last_frame_seg_map is set to 0, so if temporal coding is used, it is
+ // relative to 0 previous map.
+ // seg->temporal_update = 0;
+
+ // Segment BASE "Q" feature is disabled so it defaults to the baseline Q.
+ vp10_disable_segfeature(seg, CR_SEGMENT_ID_BASE, SEG_LVL_ALT_Q);
+ // Use segment BOOST1 for in-frame Q adjustment.
+ vp10_enable_segfeature(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q);
+ // Use segment BOOST2 for more aggressive in-frame Q adjustment.
+ vp10_enable_segfeature(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q);
+
+ // Set the q delta for segment BOOST1.
+ qindex_delta = compute_deltaq(cpi, cm->base_qindex, cr->rate_ratio_qdelta);
+ cr->qindex_delta[1] = qindex_delta;
+
+ // Compute rd-mult for segment BOOST1.
+ qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ);
+
+ cr->rdmult = vp10_compute_rd_mult(cpi, qindex2);
+
+ vp10_set_segdata(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q, qindex_delta);
+
+ // Set a more aggressive (higher) q delta for segment BOOST2.
+ qindex_delta = compute_deltaq(
+ cpi, cm->base_qindex,
+ VPXMIN(CR_MAX_RATE_TARGET_RATIO,
+ 0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta));
+ cr->qindex_delta[2] = qindex_delta;
+ vp10_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);
+
+ // Update the segmentation and refresh map.
+ cyclic_refresh_update_map(cpi);
+ }
+}
+
+int vp10_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) {
+ return cr->rdmult;
+}
+
+void vp10_cyclic_refresh_reset_resize(VP10_COMP *const cpi) {
+ const VP10_COMMON *const cm = &cpi->common;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
+ cr->sb_index = 0;
+ cpi->refresh_golden_frame = 1;
+}
diff --git a/av1/encoder/aq_cyclicrefresh.h b/av1/encoder/aq_cyclicrefresh.h
new file mode 100644
index 0000000..24491fc
--- /dev/null
+++ b/av1/encoder/aq_cyclicrefresh.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_AQ_CYCLICREFRESH_H_
+#define VP10_ENCODER_AQ_CYCLICREFRESH_H_
+
+#include "av1/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// The segment ids used in cyclic refresh: from base (no boost) to increasing
+// boost (higher delta-qp).
+#define CR_SEGMENT_ID_BASE 0
+#define CR_SEGMENT_ID_BOOST1 1
+#define CR_SEGMENT_ID_BOOST2 2
+
+// Maximum rate target ratio for setting segment delta-qp.
+#define CR_MAX_RATE_TARGET_RATIO 4.0
+
+struct VP10_COMP;
+
+struct CYCLIC_REFRESH;
+typedef struct CYCLIC_REFRESH CYCLIC_REFRESH;
+
+CYCLIC_REFRESH *vp10_cyclic_refresh_alloc(int mi_rows, int mi_cols);
+
+void vp10_cyclic_refresh_free(CYCLIC_REFRESH *cr);
+
+// Estimate the bits, incorporating the delta-q from segment 1, after encoding
+// the frame.
+int vp10_cyclic_refresh_estimate_bits_at_q(const struct VP10_COMP *cpi,
+ double correction_factor);
+
+// Estimate the bits per mb, for a given q = i and a corresponding delta-q
+// (for segment 1), prior to encoding the frame.
+int vp10_cyclic_refresh_rc_bits_per_mb(const struct VP10_COMP *cpi, int i,
+ double correction_factor);
+
+// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
+// check if we should reset the segment_id, and update the cyclic_refresh map
+// and segmentation map.
+void vp10_cyclic_refresh_update_segment(struct VP10_COMP *const cpi,
+ MB_MODE_INFO *const mbmi, int mi_row,
+ int mi_col, BLOCK_SIZE bsize,
+ int64_t rate, int64_t dist, int skip);
+
+// Update the segmentation map, and related quantities: cyclic refresh map,
+// refresh sb_index, and target number of blocks to be refreshed.
+void vp10_cyclic_refresh_update__map(struct VP10_COMP *const cpi);
+
+// Update the actual number of blocks that were applied the segment delta q.
+void vp10_cyclic_refresh_postencode(struct VP10_COMP *const cpi);
+
+// Set golden frame update interval, for 1 pass CBR mode.
+void vp10_cyclic_refresh_set_golden_update(struct VP10_COMP *const cpi);
+
+// Check if we should not update golden reference, based on past refresh stats.
+void vp10_cyclic_refresh_check_golden_update(struct VP10_COMP *const cpi);
+
+// Set/update global/frame level refresh parameters.
+void vp10_cyclic_refresh_update_parameters(struct VP10_COMP *const cpi);
+
+// Setup cyclic background refresh: set delta q and segmentation map.
+void vp10_cyclic_refresh_setup(struct VP10_COMP *const cpi);
+
+int vp10_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr);
+
+void vp10_cyclic_refresh_reset_resize(struct VP10_COMP *const cpi);
+
+static INLINE int cyclic_refresh_segment_id_boosted(int segment_id) {
+ return segment_id == CR_SEGMENT_ID_BOOST1 ||
+ segment_id == CR_SEGMENT_ID_BOOST2;
+}
+
+static INLINE int cyclic_refresh_segment_id(int segment_id) {
+ if (segment_id == CR_SEGMENT_ID_BOOST1)
+ return CR_SEGMENT_ID_BOOST1;
+ else if (segment_id == CR_SEGMENT_ID_BOOST2)
+ return CR_SEGMENT_ID_BOOST2;
+ else
+ return CR_SEGMENT_ID_BASE;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_AQ_CYCLICREFRESH_H_
diff --git a/av1/encoder/aq_variance.c b/av1/encoder/aq_variance.c
new file mode 100644
index 0000000..2a529a1
--- /dev/null
+++ b/av1/encoder/aq_variance.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "aom_ports/mem.h"
+
+#include "av1/encoder/aq_variance.h"
+
+#include "av1/common/seg_common.h"
+#include "av1/encoder/ratectrl.h"
+#include "av1/encoder/rd.h"
+#include "av1/encoder/segmentation.h"
+#include "aom_ports/system_state.h"
+
+#define ENERGY_MIN (-4)
+#define ENERGY_MAX (1)
+#define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1)
+#define ENERGY_IN_BOUNDS(energy) \
+ assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX)
+
+static const double rate_ratio[MAX_SEGMENTS] = { 2.5, 2.0, 1.5, 1.0,
+ 0.75, 1.0, 1.0, 1.0 };
+static const int segment_id[ENERGY_SPAN] = { 0, 1, 1, 2, 3, 4 };
+
+#define SEGMENT_ID(i) segment_id[(i)-ENERGY_MIN]
+
+DECLARE_ALIGNED(16, static const uint8_t, vp10_all_zeros[MAX_SB_SIZE]) = { 0 };
+#if CONFIG_VP9_HIGHBITDEPTH
+DECLARE_ALIGNED(16, static const uint16_t,
+ vp10_highbd_all_zeros[MAX_SB_SIZE]) = { 0 };
+#endif
+
+unsigned int vp10_vaq_segment_id(int energy) {
+ ENERGY_IN_BOUNDS(energy);
+ return SEGMENT_ID(energy);
+}
+
+void vp10_vaq_frame_setup(VP10_COMP *cpi) {
+ VP10_COMMON *cm = &cpi->common;
+ struct segmentation *seg = &cm->seg;
+ int i;
+
+ if (frame_is_intra_only(cm) || cm->error_resilient_mode ||
+ cpi->refresh_alt_ref_frame ||
+ (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
+ cpi->vaq_refresh = 1;
+
+ vp10_enable_segmentation(seg);
+ vp10_clearall_segfeatures(seg);
+
+ seg->abs_delta = SEGMENT_DELTADATA;
+
+ vpx_clear_system_state();
+
+ for (i = 0; i < MAX_SEGMENTS; ++i) {
+ int qindex_delta =
+ vp10_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex,
+ rate_ratio[i], cm->bit_depth);
+
+ // We don't allow qindex 0 in a segment if the base value is not 0.
+ // Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment
+ // Q delta is sometimes applied without going back around the rd loop.
+ // This could lead to an illegal combination of partition size and q.
+ if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) {
+ qindex_delta = -cm->base_qindex + 1;
+ }
+
+ // No need to enable SEG_LVL_ALT_Q for this segment.
+ if (rate_ratio[i] == 1.0) {
+ continue;
+ }
+
+ vp10_set_segdata(seg, i, SEG_LVL_ALT_Q, qindex_delta);
+ vp10_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
+ }
+ }
+}
+
+/* TODO(agrange, paulwilkins): The block_variance calls the unoptimized versions
+ * of variance() and highbd_8_variance(). It should not.
+ */
+static void aq_variance(const uint8_t *a, int a_stride, const uint8_t *b,
+ int b_stride, int w, int h, unsigned int *sse,
+ int *sum) {
+ int i, j;
+
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void aq_highbd_variance64(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride, int w, int h,
+ uint64_t *sse, uint64_t *sum) {
+ int i, j;
+
+ uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+ uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+static void aq_highbd_8_variance(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride, int w, int h,
+ unsigned int *sse, int *sum) {
+ uint64_t sse_long = 0;
+ uint64_t sum_long = 0;
+ aq_highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+ *sse = (unsigned int)sse_long;
+ *sum = (int)sum_long;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static unsigned int block_variance(VP10_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bs) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ unsigned int var, sse;
+ int right_overflow =
+ (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0;
+ int bottom_overflow =
+ (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0;
+
+ if (right_overflow || bottom_overflow) {
+ const int bw = 8 * num_8x8_blocks_wide_lookup[bs] - right_overflow;
+ const int bh = 8 * num_8x8_blocks_high_lookup[bs] - bottom_overflow;
+ int avg;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ aq_highbd_8_variance(x->plane[0].src.buf, x->plane[0].src.stride,
+ CONVERT_TO_BYTEPTR(vp10_highbd_all_zeros), 0, bw, bh,
+ &sse, &avg);
+ sse >>= 2 * (xd->bd - 8);
+ avg >>= (xd->bd - 8);
+ } else {
+ aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, vp10_all_zeros,
+ 0, bw, bh, &sse, &avg);
+ }
+#else
+ aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, vp10_all_zeros, 0,
+ bw, bh, &sse, &avg);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ var = sse - (((int64_t)avg * avg) / (bw * bh));
+ return (256 * var) / (bw * bh);
+ } else {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride,
+ CONVERT_TO_BYTEPTR(vp10_highbd_all_zeros), 0,
+ &sse);
+ } else {
+ var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride,
+ vp10_all_zeros, 0, &sse);
+ }
+#else
+ var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride,
+ vp10_all_zeros, 0, &sse);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ return (256 * var) >> num_pels_log2_lookup[bs];
+ }
+}
+
+double vp10_log_block_var(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
+ unsigned int var = block_variance(cpi, x, bs);
+ vpx_clear_system_state();
+ return log(var + 1.0);
+}
+
+#define DEFAULT_E_MIDPOINT 10.0
+int vp10_block_energy(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
+ double energy;
+ double energy_midpoint;
+ vpx_clear_system_state();
+ energy_midpoint =
+ (cpi->oxcf.pass == 2) ? cpi->twopass.mb_av_energy : DEFAULT_E_MIDPOINT;
+ energy = vp10_log_block_var(cpi, x, bs) - energy_midpoint;
+ return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX);
+}
diff --git a/av1/encoder/aq_variance.h b/av1/encoder/aq_variance.h
new file mode 100644
index 0000000..a30a449
--- /dev/null
+++ b/av1/encoder/aq_variance.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_AQ_VARIANCE_H_
+#define VP10_ENCODER_AQ_VARIANCE_H_
+
+#include "av1/encoder/encoder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned int vp10_vaq_segment_id(int energy);
+void vp10_vaq_frame_setup(VP10_COMP *cpi);
+
+int vp10_block_energy(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
+double vp10_log_block_var(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_AQ_VARIANCE_H_
diff --git a/av1/encoder/arm/neon/dct_neon.c b/av1/encoder/arm/neon/dct_neon.c
new file mode 100644
index 0000000..1d77bec
--- /dev/null
+++ b/av1/encoder/arm/neon/dct_neon.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "av1/common/blockd.h"
+#include "aom_dsp/txfm_common.h"
+
+void vp10_fdct8x8_quant_neon(
+ const int16_t *input, int stride, int16_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+ int16_t temp_buffer[64];
+ (void)coeff_ptr;
+
+ vpx_fdct8x8_neon(input, temp_buffer, stride);
+ vp10_quantize_fp_neon(temp_buffer, n_coeffs, skip_block, zbin_ptr, round_ptr,
+ quant_ptr, quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr,
+ dequant_ptr, eob_ptr, scan_ptr, iscan_ptr);
+}
diff --git a/av1/encoder/arm/neon/error_neon.c b/av1/encoder/arm/neon/error_neon.c
new file mode 100644
index 0000000..34805d3
--- /dev/null
+++ b/av1/encoder/arm/neon/error_neon.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "./vp10_rtcd.h"
+
+int64_t vp10_block_error_fp_neon(const int16_t *coeff, const int16_t *dqcoeff,
+ int block_size) {
+ int64x2_t error = vdupq_n_s64(0);
+
+ assert(block_size >= 8);
+ assert((block_size % 8) == 0);
+
+ do {
+ const int16x8_t c = vld1q_s16(coeff);
+ const int16x8_t d = vld1q_s16(dqcoeff);
+ const int16x8_t diff = vsubq_s16(c, d);
+ const int16x4_t diff_lo = vget_low_s16(diff);
+ const int16x4_t diff_hi = vget_high_s16(diff);
+ // diff is 15-bits, the squares 30, so we can store 2 in 31-bits before
+ // accumulating them in 64-bits.
+ const int32x4_t err0 = vmull_s16(diff_lo, diff_lo);
+ const int32x4_t err1 = vmlal_s16(err0, diff_hi, diff_hi);
+ const int64x2_t err2 = vaddl_s32(vget_low_s32(err1), vget_high_s32(err1));
+ error = vaddq_s64(error, err2);
+ coeff += 8;
+ dqcoeff += 8;
+ block_size -= 8;
+ } while (block_size != 0);
+
+ return vgetq_lane_s64(error, 0) + vgetq_lane_s64(error, 1);
+}
diff --git a/av1/encoder/arm/neon/quantize_neon.c b/av1/encoder/arm/neon/quantize_neon.c
new file mode 100644
index 0000000..db85b4d
--- /dev/null
+++ b/av1/encoder/arm/neon/quantize_neon.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include <math.h>
+
+#include "aom_mem/vpx_mem.h"
+
+#include "av1/common/quant_common.h"
+#include "av1/common/seg_common.h"
+
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/quantize.h"
+#include "av1/encoder/rd.h"
+
+void vp10_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
+ int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
+ // TODO(jingning) Decide the need of these arguments after the
+ // quantization process is completed.
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)scan;
+
+ if (!skip_block) {
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ int i;
+ const int16x8_t v_zero = vdupq_n_s16(0);
+ const int16x8_t v_one = vdupq_n_s16(1);
+ int16x8_t v_eobmax_76543210 = vdupq_n_s16(-1);
+ int16x8_t v_round = vmovq_n_s16(round_ptr[1]);
+ int16x8_t v_quant = vmovq_n_s16(quant_ptr[1]);
+ int16x8_t v_dequant = vmovq_n_s16(dequant_ptr[1]);
+ // adjust for dc
+ v_round = vsetq_lane_s16(round_ptr[0], v_round, 0);
+ v_quant = vsetq_lane_s16(quant_ptr[0], v_quant, 0);
+ v_dequant = vsetq_lane_s16(dequant_ptr[0], v_dequant, 0);
+ // process dc and the first seven ac coeffs
+ {
+ const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
+ const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]);
+ const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
+ const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+ const int32x4_t v_tmp_lo =
+ vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
+ const int32x4_t v_tmp_hi =
+ vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
+ const int16x8_t v_tmp2 =
+ vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
+ const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
+ const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
+ const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
+ const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
+ const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
+ const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
+ v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
+ vst1q_s16(&qcoeff_ptr[0], v_qcoeff);
+ vst1q_s16(&dqcoeff_ptr[0], v_dqcoeff);
+ v_round = vmovq_n_s16(round_ptr[1]);
+ v_quant = vmovq_n_s16(quant_ptr[1]);
+ v_dequant = vmovq_n_s16(dequant_ptr[1]);
+ }
+ // now process the rest of the ac coeffs
+ for (i = 8; i < count; i += 8) {
+ const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
+ const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]);
+ const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
+ const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+ const int32x4_t v_tmp_lo =
+ vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
+ const int32x4_t v_tmp_hi =
+ vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
+ const int16x8_t v_tmp2 =
+ vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
+ const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
+ const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
+ const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
+ const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
+ const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
+ const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
+ v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
+ vst1q_s16(&qcoeff_ptr[i], v_qcoeff);
+ vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff);
+ }
+ {
+ const int16x4_t v_eobmax_3210 = vmax_s16(
+ vget_low_s16(v_eobmax_76543210), vget_high_s16(v_eobmax_76543210));
+ const int64x1_t v_eobmax_xx32 =
+ vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
+ const int16x4_t v_eobmax_tmp =
+ vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
+ const int64x1_t v_eobmax_xxx3 =
+ vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
+ const int16x4_t v_eobmax_final =
+ vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
+
+ *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0);
+ }
+ } else {
+ memset(qcoeff_ptr, 0, count * sizeof(int16_t));
+ memset(dqcoeff_ptr, 0, count * sizeof(int16_t));
+ *eob_ptr = 0;
+ }
+}
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
new file mode 100644
index 0000000..30699b4
--- /dev/null
+++ b/av1/encoder/bitstream.c
@@ -0,0 +1,3631 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include "aom/vpx_encoder.h"
+#include "aom_dsp/bitwriter_buffer.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem_ops.h"
+#include "aom_ports/system_state.h"
+#include "aom_util/debug_util.h"
+
+#if CONFIG_CLPF
+#include "av1/common/clpf.h"
+#endif
+#if CONFIG_DERING
+#include "av1/common/dering.h"
+#endif // CONFIG_DERING
+#include "av1/common/entropy.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/entropymv.h"
+#include "av1/common/mvref_common.h"
+#include "av1/common/pred_common.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/seg_common.h"
+#include "av1/common/tile_common.h"
+
+#if CONFIG_ANS
+#include "av1/encoder/buf_ans.h"
+#endif // CONFIG_ANS
+#include "av1/encoder/bitstream.h"
+#include "av1/encoder/cost.h"
+#include "av1/encoder/encodemv.h"
+#include "av1/encoder/mcomp.h"
+#include "av1/encoder/segmentation.h"
+#include "av1/encoder/subexp.h"
+#include "av1/encoder/tokenize.h"
+
+static const struct vp10_token intra_mode_encodings[INTRA_MODES] = {
+ { 0, 1 }, { 6, 3 }, { 28, 5 }, { 30, 5 }, { 58, 6 },
+ { 59, 6 }, { 126, 7 }, { 127, 7 }, { 62, 6 }, { 2, 2 }
+};
+#if CONFIG_EXT_INTERP
+static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
+ { { 0, 1 }, { 4, 3 }, { 6, 3 }, { 5, 3 }, { 7, 3 } };
+#else
+static const struct vp10_token switchable_interp_encodings[SWITCHABLE_FILTERS] =
+ { { 0, 1 }, { 2, 2 }, { 3, 2 } };
+#endif // CONFIG_EXT_INTERP
+#if CONFIG_EXT_PARTITION_TYPES
+static const struct vp10_token ext_partition_encodings[EXT_PARTITION_TYPES] = {
+ { 0, 1 }, { 4, 3 }, { 12, 4 }, { 7, 3 },
+ { 10, 4 }, { 11, 4 }, { 26, 5 }, { 27, 5 }
+};
+#endif
+static const struct vp10_token partition_encodings[PARTITION_TYPES] = {
+ { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 }
+};
+#if !CONFIG_REF_MV
+static const struct vp10_token inter_mode_encodings[INTER_MODES] =
+#if CONFIG_EXT_INTER
+ { { 2, 2 }, { 6, 3 }, { 0, 1 }, { 14, 4 }, { 15, 4 } };
+#else
+ { { 2, 2 }, { 6, 3 }, { 0, 1 }, { 7, 3 } };
+#endif // CONFIG_EXT_INTER
+#endif
+#if CONFIG_EXT_INTER
+static const struct vp10_token
+ inter_compound_mode_encodings[INTER_COMPOUND_MODES] = {
+ { 2, 2 }, { 50, 6 }, { 51, 6 }, { 24, 5 }, { 52, 6 },
+ { 53, 6 }, { 54, 6 }, { 55, 6 }, { 0, 1 }, { 7, 3 }
+ };
+#endif // CONFIG_EXT_INTER
+static const struct vp10_token palette_size_encodings[] = {
+ { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 62, 6 }, { 63, 6 },
+};
+static const struct vp10_token
+ palette_color_encodings[PALETTE_MAX_SIZE - 1][PALETTE_MAX_SIZE] = {
+ { { 0, 1 }, { 1, 1 } }, // 2 colors
+ { { 0, 1 }, { 2, 2 }, { 3, 2 } }, // 3 colors
+ { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 } }, // 4 colors
+ { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 } }, // 5 colors
+ { { 0, 1 },
+ { 2, 2 },
+ { 6, 3 },
+ { 14, 4 },
+ { 30, 5 },
+ { 31, 5 } }, // 6 colors
+ { { 0, 1 },
+ { 2, 2 },
+ { 6, 3 },
+ { 14, 4 },
+ { 30, 5 },
+ { 62, 6 },
+ { 63, 6 } }, // 7 colors
+ { { 0, 1 },
+ { 2, 2 },
+ { 6, 3 },
+ { 14, 4 },
+ { 30, 5 },
+ { 62, 6 },
+ { 126, 7 },
+ { 127, 7 } }, // 8 colors
+ };
+
+static const struct vp10_token tx_size_encodings[TX_SIZES - 1][TX_SIZES] = {
+ { { 0, 1 }, { 1, 1 } }, // Max tx_size is 8X8
+ { { 0, 1 }, { 2, 2 }, { 3, 2 } }, // Max tx_size is 16X16
+ { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 } }, // Max tx_size is 32X32
+};
+
+static INLINE void write_uniform(vp10_writer *w, int n, int v) {
+ int l = get_unsigned_bits(n);
+ int m = (1 << l) - n;
+ if (l == 0) return;
+ if (v < m) {
+ vp10_write_literal(w, v, l - 1);
+ } else {
+ vp10_write_literal(w, m + ((v - m) >> 1), l - 1);
+ vp10_write_literal(w, (v - m) & 1, 1);
+ }
+}
+
+#if CONFIG_EXT_TX
+static struct vp10_token ext_tx_inter_encodings[EXT_TX_SETS_INTER][TX_TYPES];
+static struct vp10_token ext_tx_intra_encodings[EXT_TX_SETS_INTRA][TX_TYPES];
+#else
+static struct vp10_token ext_tx_encodings[TX_TYPES];
+#endif // CONFIG_EXT_TX
+#if CONFIG_GLOBAL_MOTION
+static struct vp10_token global_motion_types_encodings[GLOBAL_MOTION_TYPES];
+#endif // CONFIG_GLOBAL_MOTION
+#if CONFIG_EXT_INTRA
+static struct vp10_token intra_filter_encodings[INTRA_FILTERS];
+#endif // CONFIG_EXT_INTRA
+#if CONFIG_EXT_INTER
+static struct vp10_token interintra_mode_encodings[INTERINTRA_MODES];
+#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+static struct vp10_token motvar_encodings[MOTION_VARIATIONS];
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+void vp10_encode_token_init(void) {
+#if CONFIG_EXT_TX
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ vp10_tokens_from_tree(ext_tx_inter_encodings[s], vp10_ext_tx_inter_tree[s]);
+ }
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ vp10_tokens_from_tree(ext_tx_intra_encodings[s], vp10_ext_tx_intra_tree[s]);
+ }
+#else
+ vp10_tokens_from_tree(ext_tx_encodings, vp10_ext_tx_tree);
+#endif // CONFIG_EXT_TX
+#if CONFIG_EXT_INTRA
+ vp10_tokens_from_tree(intra_filter_encodings, vp10_intra_filter_tree);
+#endif // CONFIG_EXT_INTRA
+#if CONFIG_EXT_INTER
+ vp10_tokens_from_tree(interintra_mode_encodings, vp10_interintra_mode_tree);
+#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ vp10_tokens_from_tree(motvar_encodings, vp10_motvar_tree);
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+#if CONFIG_GLOBAL_MOTION
+ vp10_tokens_from_tree(global_motion_types_encodings,
+ vp10_global_motion_types_tree);
+#endif // CONFIG_GLOBAL_MOTION
+}
+
+static void write_intra_mode(vp10_writer *w, PREDICTION_MODE mode,
+ const vpx_prob *probs) {
+ vp10_write_token(w, vp10_intra_mode_tree, probs, &intra_mode_encodings[mode]);
+}
+
+#if CONFIG_EXT_INTER
+static void write_interintra_mode(vp10_writer *w, INTERINTRA_MODE mode,
+ const vpx_prob *probs) {
+ vp10_write_token(w, vp10_interintra_mode_tree, probs,
+ &interintra_mode_encodings[mode]);
+}
+#endif // CONFIG_EXT_INTER
+
+static void write_inter_mode(VP10_COMMON *cm, vp10_writer *w,
+ PREDICTION_MODE mode,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ int is_compound,
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ const int16_t mode_ctx) {
+#if CONFIG_REF_MV
+ const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
+ const vpx_prob newmv_prob = cm->fc->newmv_prob[newmv_ctx];
+#if CONFIG_EXT_INTER
+ vp10_write(w, mode != NEWMV && mode != NEWFROMNEARMV, newmv_prob);
+
+ if (!is_compound && (mode == NEWMV || mode == NEWFROMNEARMV))
+ vp10_write(w, mode == NEWFROMNEARMV, cm->fc->new2mv_prob);
+
+ if (mode != NEWMV && mode != NEWFROMNEARMV) {
+#else
+ vp10_write(w, mode != NEWMV, newmv_prob);
+
+ if (mode != NEWMV) {
+#endif // CONFIG_EXT_INTER
+ const int16_t zeromv_ctx = (mode_ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
+ const vpx_prob zeromv_prob = cm->fc->zeromv_prob[zeromv_ctx];
+
+ if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
+ assert(mode == ZEROMV);
+ return;
+ }
+
+ vp10_write(w, mode != ZEROMV, zeromv_prob);
+
+ if (mode != ZEROMV) {
+ int16_t refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK;
+ vpx_prob refmv_prob;
+
+ if (mode_ctx & (1 << SKIP_NEARESTMV_OFFSET)) refmv_ctx = 6;
+ if (mode_ctx & (1 << SKIP_NEARMV_OFFSET)) refmv_ctx = 7;
+ if (mode_ctx & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) refmv_ctx = 8;
+
+ refmv_prob = cm->fc->refmv_prob[refmv_ctx];
+ vp10_write(w, mode != NEARESTMV, refmv_prob);
+ }
+ }
+#else
+ const vpx_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx];
+ assert(is_inter_mode(mode));
+ vp10_write_token(w, vp10_inter_mode_tree, inter_probs,
+ &inter_mode_encodings[INTER_OFFSET(mode)]);
+#endif
+}
+
+#if CONFIG_REF_MV
+static void write_drl_idx(const VP10_COMMON *cm, const MB_MODE_INFO *mbmi,
+ const MB_MODE_INFO_EXT *mbmi_ext, vp10_writer *w) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+
+ assert(mbmi->ref_mv_idx < 3);
+
+ if (mbmi->mode == NEWMV) {
+ int idx;
+ for (idx = 0; idx < 2; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx];
+
+ vp10_write(w, mbmi->ref_mv_idx != idx, drl_prob);
+ if (mbmi->ref_mv_idx == idx) return;
+ }
+ }
+ return;
+ }
+
+ if (mbmi->mode == NEARMV) {
+ int idx;
+ // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
+ for (idx = 1; idx < 3; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ vpx_prob drl_prob = cm->fc->drl_prob[drl_ctx];
+
+ vp10_write(w, mbmi->ref_mv_idx != (idx - 1), drl_prob);
+ if (mbmi->ref_mv_idx == (idx - 1)) return;
+ }
+ }
+ return;
+ }
+}
+#endif
+
+#if CONFIG_EXT_INTER
+static void write_inter_compound_mode(VP10_COMMON *cm, vp10_writer *w,
+ PREDICTION_MODE mode,
+ const int16_t mode_ctx) {
+ const vpx_prob *const inter_compound_probs =
+ cm->fc->inter_compound_mode_probs[mode_ctx];
+
+ assert(is_inter_compound_mode(mode));
+ vp10_write_token(w, vp10_inter_compound_mode_tree, inter_compound_probs,
+ &inter_compound_mode_encodings[INTER_COMPOUND_OFFSET(mode)]);
+}
+#endif // CONFIG_EXT_INTER
+
+static void encode_unsigned_max(struct vpx_write_bit_buffer *wb, int data,
+ int max) {
+ vpx_wb_write_literal(wb, data, get_unsigned_bits(max));
+}
+
+static void prob_diff_update(const vpx_tree_index *tree,
+ vpx_prob probs[/*n - 1*/],
+ const unsigned int counts[/*n - 1*/], int n,
+ vp10_writer *w) {
+ int i;
+ unsigned int branch_ct[32][2];
+
+ // Assuming max number of probabilities <= 32
+ assert(n <= 32);
+
+ vp10_tree_probs_from_distribution(tree, branch_ct, counts);
+ for (i = 0; i < n - 1; ++i)
+ vp10_cond_prob_diff_update(w, &probs[i], branch_ct[i]);
+}
+
+static int prob_diff_update_savings(const vpx_tree_index *tree,
+ vpx_prob probs[/*n - 1*/],
+ const unsigned int counts[/*n - 1*/],
+ int n) {
+ int i;
+ unsigned int branch_ct[32][2];
+ int savings = 0;
+
+ // Assuming max number of probabilities <= 32
+ assert(n <= 32);
+ vp10_tree_probs_from_distribution(tree, branch_ct, counts);
+ for (i = 0; i < n - 1; ++i) {
+ savings += vp10_cond_prob_diff_update_savings(&probs[i], branch_ct[i]);
+ }
+ return savings;
+}
+
+#if CONFIG_VAR_TX
+static void write_tx_size_vartx(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ const MB_MODE_INFO *mbmi, TX_SIZE tx_size,
+ int blk_row, int blk_col, vp10_writer *w) {
+ const int tx_row = blk_row >> 1;
+ const int tx_col = blk_col >> 1;
+ int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ int ctx = txfm_partition_context(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, tx_size);
+
+ if (xd->mb_to_bottom_edge < 0) max_blocks_high += xd->mb_to_bottom_edge >> 5;
+ if (xd->mb_to_right_edge < 0) max_blocks_wide += xd->mb_to_right_edge >> 5;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+ if (tx_size == mbmi->inter_tx_size[tx_row][tx_col]) {
+ vp10_write(w, 0, cm->fc->txfm_partition_prob[ctx]);
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, tx_size);
+ } else {
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+ vp10_write(w, 1, cm->fc->txfm_partition_prob[ctx]);
+
+ if (tx_size == TX_8X8) {
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, TX_4X4);
+ return;
+ }
+
+ assert(bsl > 0);
+ --bsl;
+ for (i = 0; i < 4; ++i) {
+ int offsetr = blk_row + ((i >> 1) << bsl);
+ int offsetc = blk_col + ((i & 0x01) << bsl);
+ write_tx_size_vartx(cm, xd, mbmi, tx_size - 1, offsetr, offsetc, w);
+ }
+ }
+}
+
+static void update_txfm_partition_probs(VP10_COMMON *cm, vp10_writer *w,
+ FRAME_COUNTS *counts) {
+ int k;
+ for (k = 0; k < TXFM_PARTITION_CONTEXTS; ++k)
+ vp10_cond_prob_diff_update(w, &cm->fc->txfm_partition_prob[k],
+ counts->txfm_partition[k]);
+}
+#endif
+
+static void write_selected_tx_size(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ vp10_writer *w) {
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ // For sub8x8 blocks the tx_size symbol does not need to be sent
+ if (bsize >= BLOCK_8X8) {
+ const TX_SIZE tx_size = mbmi->tx_size;
+ const int is_inter = is_inter_block(mbmi);
+ const int tx_size_ctx = get_tx_size_context(xd);
+ const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
+ : intra_tx_size_cat_lookup[bsize];
+ const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
+
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(mbmi)));
+ assert(
+ IMPLIES(is_rect_tx(tx_size), tx_size == max_txsize_rect_lookup[bsize]));
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+
+ vp10_write_token(w, vp10_tx_size_tree[tx_size_cat],
+ cm->fc->tx_size_probs[tx_size_cat][tx_size_ctx],
+ &tx_size_encodings[tx_size_cat][coded_tx_size]);
+ }
+}
+
+#if CONFIG_REF_MV
+static void update_inter_mode_probs(VP10_COMMON *cm, vp10_writer *w,
+ FRAME_COUNTS *counts) {
+ int i;
+ for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
+ vp10_cond_prob_diff_update(w, &cm->fc->newmv_prob[i],
+ counts->newmv_mode[i]);
+ for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i)
+ vp10_cond_prob_diff_update(w, &cm->fc->zeromv_prob[i],
+ counts->zeromv_mode[i]);
+ for (i = 0; i < REFMV_MODE_CONTEXTS; ++i)
+ vp10_cond_prob_diff_update(w, &cm->fc->refmv_prob[i],
+ counts->refmv_mode[i]);
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
+ vp10_cond_prob_diff_update(w, &cm->fc->drl_prob[i], counts->drl_mode[i]);
+#if CONFIG_EXT_INTER
+ vp10_cond_prob_diff_update(w, &cm->fc->new2mv_prob, counts->new2mv_mode);
+#endif // CONFIG_EXT_INTER
+}
+#endif
+
+#if CONFIG_EXT_INTER
+static void update_inter_compound_mode_probs(VP10_COMMON *cm, vp10_writer *w) {
+ const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) -
+ vp10_cost_zero(GROUP_DIFF_UPDATE_PROB);
+ int i;
+ int savings = 0;
+ int do_update = 0;
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
+ savings += prob_diff_update_savings(
+ vp10_inter_compound_mode_tree, cm->fc->inter_compound_mode_probs[i],
+ cm->counts.inter_compound_mode[i], INTER_COMPOUND_MODES);
+ }
+ do_update = savings > savings_thresh;
+ vp10_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
+ prob_diff_update(
+ vp10_inter_compound_mode_tree, cm->fc->inter_compound_mode_probs[i],
+ cm->counts.inter_compound_mode[i], INTER_COMPOUND_MODES, w);
+ }
+ }
+}
+#endif // CONFIG_EXT_INTER
+
+static int write_skip(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ int segment_id, const MODE_INFO *mi, vp10_writer *w) {
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
+ return 1;
+ } else {
+ const int skip = mi->mbmi.skip;
+ vp10_write(w, skip, vp10_get_skip_prob(cm, xd));
+ return skip;
+ }
+}
+
+static void update_skip_probs(VP10_COMMON *cm, vp10_writer *w,
+ FRAME_COUNTS *counts) {
+ int k;
+
+ for (k = 0; k < SKIP_CONTEXTS; ++k)
+ vp10_cond_prob_diff_update(w, &cm->fc->skip_probs[k], counts->skip[k]);
+}
+
+static void update_switchable_interp_probs(VP10_COMMON *cm, vp10_writer *w,
+ FRAME_COUNTS *counts) {
+ int j;
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
+ prob_diff_update(vp10_switchable_interp_tree,
+ cm->fc->switchable_interp_prob[j],
+ counts->switchable_interp[j], SWITCHABLE_FILTERS, w);
+}
+
+#if CONFIG_EXT_TX
+static void update_ext_tx_probs(VP10_COMMON *cm, vp10_writer *w) {
+ const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) -
+ vp10_cost_zero(GROUP_DIFF_UPDATE_PROB);
+ int i, j;
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ int savings = 0;
+ int do_update = 0;
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_inter_ext_tx_for_txsize[s][i]) continue;
+ savings += prob_diff_update_savings(
+ vp10_ext_tx_inter_tree[s], cm->fc->inter_ext_tx_prob[s][i],
+ cm->counts.inter_ext_tx[s][i], num_ext_tx_set_inter[s]);
+ }
+ do_update = savings > savings_thresh;
+ vp10_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_inter_ext_tx_for_txsize[s][i]) continue;
+ prob_diff_update(
+ vp10_ext_tx_inter_tree[s], cm->fc->inter_ext_tx_prob[s][i],
+ cm->counts.inter_ext_tx[s][i], num_ext_tx_set_inter[s], w);
+ }
+ }
+ }
+
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ int savings = 0;
+ int do_update = 0;
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_intra_ext_tx_for_txsize[s][i]) continue;
+ for (j = 0; j < INTRA_MODES; ++j)
+ savings += prob_diff_update_savings(
+ vp10_ext_tx_intra_tree[s], cm->fc->intra_ext_tx_prob[s][i][j],
+ cm->counts.intra_ext_tx[s][i][j], num_ext_tx_set_intra[s]);
+ }
+ do_update = savings > savings_thresh;
+ vp10_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ if (!use_intra_ext_tx_for_txsize[s][i]) continue;
+ for (j = 0; j < INTRA_MODES; ++j)
+ prob_diff_update(
+ vp10_ext_tx_intra_tree[s], cm->fc->intra_ext_tx_prob[s][i][j],
+ cm->counts.intra_ext_tx[s][i][j], num_ext_tx_set_intra[s], w);
+ }
+ }
+ }
+}
+
+#else
+
+static void update_ext_tx_probs(VP10_COMMON *cm, vp10_writer *w) {
+ const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) -
+ vp10_cost_zero(GROUP_DIFF_UPDATE_PROB);
+ int i, j;
+
+ int savings = 0;
+ int do_update = 0;
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ for (j = 0; j < TX_TYPES; ++j)
+ savings += prob_diff_update_savings(
+ vp10_ext_tx_tree, cm->fc->intra_ext_tx_prob[i][j],
+ cm->counts.intra_ext_tx[i][j], TX_TYPES);
+ }
+ do_update = savings > savings_thresh;
+ vp10_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ for (j = 0; j < TX_TYPES; ++j)
+ prob_diff_update(vp10_ext_tx_tree, cm->fc->intra_ext_tx_prob[i][j],
+ cm->counts.intra_ext_tx[i][j], TX_TYPES, w);
+ }
+ }
+ savings = 0;
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ savings +=
+ prob_diff_update_savings(vp10_ext_tx_tree, cm->fc->inter_ext_tx_prob[i],
+ cm->counts.inter_ext_tx[i], TX_TYPES);
+ }
+ do_update = savings > savings_thresh;
+ vp10_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ prob_diff_update(vp10_ext_tx_tree, cm->fc->inter_ext_tx_prob[i],
+ cm->counts.inter_ext_tx[i], TX_TYPES, w);
+ }
+ }
+}
+#endif // CONFIG_EXT_TX
+
+static void pack_palette_tokens(vp10_writer *w, const TOKENEXTRA **tp, int n,
+ int num) {
+ int i;
+ const TOKENEXTRA *p = *tp;
+
+ for (i = 0; i < num; ++i) {
+ vp10_write_token(w, vp10_palette_color_tree[n - 2], p->context_tree,
+ &palette_color_encodings[n - 2][p->token]);
+ ++p;
+ }
+
+ *tp = p;
+}
+
+#if CONFIG_SUPERTX
+static void update_supertx_probs(VP10_COMMON *cm, vp10_writer *w) {
+ const int savings_thresh = vp10_cost_one(GROUP_DIFF_UPDATE_PROB) -
+ vp10_cost_zero(GROUP_DIFF_UPDATE_PROB);
+ int i, j;
+ int savings = 0;
+ int do_update = 0;
+ for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
+ for (j = 1; j < TX_SIZES; ++j) {
+ savings += vp10_cond_prob_diff_update_savings(&cm->fc->supertx_prob[i][j],
+ cm->counts.supertx[i][j]);
+ }
+ }
+ do_update = savings > savings_thresh;
+ vp10_write(w, do_update, GROUP_DIFF_UPDATE_PROB);
+ if (do_update) {
+ for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; ++i) {
+ for (j = 1; j < TX_SIZES; ++j) {
+ vp10_cond_prob_diff_update(w, &cm->fc->supertx_prob[i][j],
+ cm->counts.supertx[i][j]);
+ }
+ }
+ }
+}
+#endif // CONFIG_SUPERTX
+
+#if !CONFIG_ANS
+static void pack_mb_tokens(vp10_writer *w, const TOKENEXTRA **tp,
+ const TOKENEXTRA *const stop,
+ vpx_bit_depth_t bit_depth, const TX_SIZE tx) {
+ const TOKENEXTRA *p = *tp;
+#if CONFIG_VAR_TX
+ int count = 0;
+ const int seg_eob = get_tx2d_size(tx);
+#endif
+
+ while (p < stop && p->token != EOSB_TOKEN) {
+ const int t = p->token;
+ const struct vp10_token *const a = &vp10_coef_encodings[t];
+ int v = a->value;
+ int n = a->len;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const vp10_extra_bit *b;
+ if (bit_depth == VPX_BITS_12)
+ b = &vp10_extra_bits_high12[t];
+ else if (bit_depth == VPX_BITS_10)
+ b = &vp10_extra_bits_high10[t];
+ else
+ b = &vp10_extra_bits[t];
+#else
+ const vp10_extra_bit *const b = &vp10_extra_bits[t];
+ (void)bit_depth;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ /* skip one or two nodes */
+ if (p->skip_eob_node)
+ n -= p->skip_eob_node;
+ else
+ vp10_write(w, t != EOB_TOKEN, p->context_tree[0]);
+
+ if (t != EOB_TOKEN) {
+ vp10_write(w, t != ZERO_TOKEN, p->context_tree[1]);
+
+ if (t != ZERO_TOKEN) {
+ vp10_write(w, t != ONE_TOKEN, p->context_tree[2]);
+
+ if (t != ONE_TOKEN) {
+ int len = UNCONSTRAINED_NODES - p->skip_eob_node;
+ vp10_write_tree(w, vp10_coef_con_tree,
+ vp10_pareto8_full[p->context_tree[PIVOT_NODE] - 1], v,
+ n - len, 0);
+ }
+ }
+ }
+
+ if (b->base_val) {
+ const int e = p->extra, l = b->len;
+ int skip_bits = (b->base_val == CAT6_MIN_VAL)
+ ? TX_SIZES - 1 - txsize_sqr_up_map[tx]
+ : 0;
+
+ if (l) {
+ const unsigned char *pb = b->prob;
+ int v = e >> 1;
+ int n = l; /* number of bits in v, assumed nonzero */
+ int i = 0;
+
+ do {
+ const int bb = (v >> --n) & 1;
+ if (skip_bits) {
+ skip_bits--;
+ assert(!bb);
+ } else {
+ vp10_write(w, bb, pb[i >> 1]);
+ }
+ i = b->tree[i + bb];
+ } while (n);
+ }
+
+ vp10_write_bit(w, e & 1);
+ }
+ ++p;
+
+#if CONFIG_VAR_TX
+ ++count;
+ if (t == EOB_TOKEN || count == seg_eob) break;
+#endif
+ }
+
+ *tp = p;
+}
+#else
+// This function serializes the tokens in forward order using a buffered ans
+// coder.
+static void pack_mb_tokens(struct BufAnsCoder *ans, const TOKENEXTRA **tp,
+ const TOKENEXTRA *const stop,
+ vpx_bit_depth_t bit_depth, const TX_SIZE tx) {
+ const TOKENEXTRA *p = *tp;
+#if CONFIG_VAR_TX
+ int count = 0;
+ const int seg_eob = 16 << (tx << 1);
+#endif // CONFIG_VAR_TX
+
+ while (p < stop && p->token != EOSB_TOKEN) {
+ const int t = p->token;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const vp10_extra_bit *b;
+ if (bit_depth == VPX_BITS_12)
+ b = &vp10_extra_bits_high12[t];
+ else if (bit_depth == VPX_BITS_10)
+ b = &vp10_extra_bits_high10[t];
+ else
+ b = &vp10_extra_bits[t];
+#else
+ const vp10_extra_bit *const b = &vp10_extra_bits[t];
+ (void)bit_depth;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ /* skip one or two nodes */
+ if (!p->skip_eob_node)
+ buf_uabs_write(ans, t != EOB_TOKEN, p->context_tree[0]);
+
+ if (t != EOB_TOKEN) {
+ struct rans_sym s;
+ const rans_dec_lut *token_cdf = p->token_cdf;
+ assert(token_cdf);
+ s.cum_prob = (*token_cdf)[t - ZERO_TOKEN];
+ s.prob = (*token_cdf)[t - ZERO_TOKEN + 1] - s.cum_prob;
+ buf_rans_write(ans, &s);
+
+ if (b->base_val) {
+ const int e = p->extra, l = b->len;
+ int skip_bits = (b->base_val == CAT6_MIN_VAL)
+ ? TX_SIZES - 1 - txsize_sqr_up_map[tx]
+ : 0;
+
+ if (l) {
+ const unsigned char *pb = b->prob;
+ int v = e >> 1;
+ int n = l; /* number of bits in v, assumed nonzero */
+ int i = 0;
+
+ do {
+ const int bb = (v >> --n) & 1;
+ if (skip_bits) {
+ skip_bits--;
+ assert(!bb);
+ } else {
+ buf_uabs_write(ans, bb, pb[i >> 1]);
+ }
+ i = b->tree[i + bb];
+ } while (n);
+ }
+
+ buf_uabs_write(ans, e & 1, 128);
+ }
+ }
+ ++p;
+
+#if CONFIG_VAR_TX
+ ++count;
+ if (t == EOB_TOKEN || count == seg_eob) break;
+#endif // CONFIG_VAR_TX
+ }
+
+ *tp = p;
+}
+#endif // !CONFIG_ANS
+
+#if CONFIG_VAR_TX
+static void pack_txb_tokens(vp10_writer *w, const TOKENEXTRA **tp,
+ const TOKENEXTRA *const tok_end, MACROBLOCKD *xd,
+ MB_MODE_INFO *mbmi, int plane,
+ BLOCK_SIZE plane_bsize, vpx_bit_depth_t bit_depth,
+ int block, int blk_row, int blk_col,
+ TX_SIZE tx_size) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ TX_SIZE plane_tx_size;
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+ plane_tx_size = plane ? get_uv_tx_size_impl(
+ mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0)
+ : mbmi->inter_tx_size[tx_row][tx_col];
+
+ if (tx_size == plane_tx_size) {
+ pack_mb_tokens(w, tp, tok_end, bit_depth, tx_size);
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+ int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
+
+ pack_txb_tokens(w, tp, tok_end, xd, mbmi, plane, plane_bsize, bit_depth,
+ block + i * step, offsetr, offsetc, tx_size - 1);
+ }
+ }
+}
+#endif
+
+static void write_segment_id(vp10_writer *w, const struct segmentation *seg,
+ const struct segmentation_probs *segp,
+ int segment_id) {
+ if (seg->enabled && seg->update_map)
+ vp10_write_tree(w, vp10_segment_tree, segp->tree_probs, segment_id, 3, 0);
+}
+
+// This function encodes the reference frame
+static void write_ref_frames(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ vp10_writer *w) {
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const int is_compound = has_second_ref(mbmi);
+ const int segment_id = mbmi->segment_id;
+
+ // If segment level coding of this signal is disabled...
+ // or the segment allows multiple reference frame options
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ assert(!is_compound);
+ assert(mbmi->ref_frame[0] ==
+ get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
+ } else {
+ // does the feature use compound prediction or not
+ // (if not specified at the frame/segment level)
+ if (cm->reference_mode == REFERENCE_MODE_SELECT) {
+ vp10_write(w, is_compound, vp10_get_reference_mode_prob(cm, xd));
+ } else {
+ assert((!is_compound) == (cm->reference_mode == SINGLE_REFERENCE));
+ }
+
+ if (is_compound) {
+#if CONFIG_EXT_REFS
+ const int bit = (mbmi->ref_frame[0] == GOLDEN_FRAME ||
+ mbmi->ref_frame[0] == LAST3_FRAME);
+ const int bit_bwd = mbmi->ref_frame[1] == ALTREF_FRAME;
+#else // CONFIG_EXT_REFS
+ const int bit = mbmi->ref_frame[0] == GOLDEN_FRAME;
+#endif // CONFIG_EXT_REFS
+
+ vp10_write(w, bit, vp10_get_pred_prob_comp_ref_p(cm, xd));
+
+#if CONFIG_EXT_REFS
+ if (!bit) {
+ const int bit1 = mbmi->ref_frame[0] == LAST_FRAME;
+ vp10_write(w, bit1, vp10_get_pred_prob_comp_ref_p1(cm, xd));
+ } else {
+ const int bit2 = mbmi->ref_frame[0] == GOLDEN_FRAME;
+ vp10_write(w, bit2, vp10_get_pred_prob_comp_ref_p2(cm, xd));
+ }
+ vp10_write(w, bit_bwd, vp10_get_pred_prob_comp_bwdref_p(cm, xd));
+#endif // CONFIG_EXT_REFS
+ } else {
+#if CONFIG_EXT_REFS
+ const int bit0 = (mbmi->ref_frame[0] == ALTREF_FRAME ||
+ mbmi->ref_frame[0] == BWDREF_FRAME);
+ vp10_write(w, bit0, vp10_get_pred_prob_single_ref_p1(cm, xd));
+
+ if (bit0) {
+ const int bit1 = mbmi->ref_frame[0] == ALTREF_FRAME;
+ vp10_write(w, bit1, vp10_get_pred_prob_single_ref_p2(cm, xd));
+ } else {
+ const int bit2 = (mbmi->ref_frame[0] == LAST3_FRAME ||
+ mbmi->ref_frame[0] == GOLDEN_FRAME);
+ vp10_write(w, bit2, vp10_get_pred_prob_single_ref_p3(cm, xd));
+
+ if (!bit2) {
+ const int bit3 = mbmi->ref_frame[0] != LAST_FRAME;
+ vp10_write(w, bit3, vp10_get_pred_prob_single_ref_p4(cm, xd));
+ } else {
+ const int bit4 = mbmi->ref_frame[0] != LAST3_FRAME;
+ vp10_write(w, bit4, vp10_get_pred_prob_single_ref_p5(cm, xd));
+ }
+ }
+#else // CONFIG_EXT_REFS
+ const int bit0 = mbmi->ref_frame[0] != LAST_FRAME;
+ vp10_write(w, bit0, vp10_get_pred_prob_single_ref_p1(cm, xd));
+
+ if (bit0) {
+ const int bit1 = mbmi->ref_frame[0] != GOLDEN_FRAME;
+ vp10_write(w, bit1, vp10_get_pred_prob_single_ref_p2(cm, xd));
+ }
+#endif // CONFIG_EXT_REFS
+ }
+ }
+}
+
+#if CONFIG_EXT_INTRA
+static void write_ext_intra_mode_info(const VP10_COMMON *const cm,
+ const MB_MODE_INFO *const mbmi,
+ vp10_writer *w) {
+#if !ALLOW_FILTER_INTRA_MODES
+ return;
+#endif
+ if (mbmi->mode == DC_PRED && mbmi->palette_mode_info.palette_size[0] == 0) {
+ vp10_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[0],
+ cm->fc->ext_intra_probs[0]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
+ EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[0];
+ write_uniform(w, FILTER_INTRA_MODES, mode);
+ }
+ }
+
+ if (mbmi->uv_mode == DC_PRED &&
+ mbmi->palette_mode_info.palette_size[1] == 0) {
+ vp10_write(w, mbmi->ext_intra_mode_info.use_ext_intra_mode[1],
+ cm->fc->ext_intra_probs[1]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1]) {
+ EXT_INTRA_MODE mode = mbmi->ext_intra_mode_info.ext_intra_mode[1];
+ write_uniform(w, FILTER_INTRA_MODES, mode);
+ }
+ }
+}
+
+static void write_intra_angle_info(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ vp10_writer *w) {
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const int intra_filter_ctx = vp10_get_pred_context_intra_interp(xd);
+ int p_angle;
+
+ if (bsize < BLOCK_8X8) return;
+
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) {
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[0]);
+ p_angle = mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+ if (vp10_is_intra_filter_switchable(p_angle)) {
+ vp10_write_token(w, vp10_intra_filter_tree,
+ cm->fc->intra_filter_probs[intra_filter_ctx],
+ &intra_filter_encodings[mbmi->intra_filter]);
+ }
+ }
+
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
+ write_uniform(w, 2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
+ }
+}
+#endif // CONFIG_EXT_INTRA
+
+static void write_switchable_interp_filter(VP10_COMP *cpi,
+ const MACROBLOCKD *xd,
+ vp10_writer *w) {
+ VP10_COMMON *const cm = &cpi->common;
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+#if CONFIG_DUAL_FILTER
+ int dir;
+#endif
+ if (cm->interp_filter == SWITCHABLE) {
+#if CONFIG_EXT_INTERP
+#if CONFIG_DUAL_FILTER
+ if (!vp10_is_interp_needed(xd)) {
+ assert(mbmi->interp_filter[0] == EIGHTTAP_REGULAR);
+ return;
+ }
+#else
+ if (!vp10_is_interp_needed(xd)) {
+#if CONFIG_DUAL_FILTER
+ assert(mbmi->interp_filter[0] == EIGHTTAP_REGULAR);
+ assert(mbmi->interp_filter[1] == EIGHTTAP_REGULAR);
+#else
+ assert(mbmi->interp_filter == EIGHTTAP_REGULAR);
+#endif
+ return;
+ }
+#endif // CONFIG_DUAL_FILTER
+#endif // CONFIG_EXT_INTERP
+#if CONFIG_DUAL_FILTER
+ for (dir = 0; dir < 2; ++dir) {
+ if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
+ (mbmi->ref_frame[1] > INTRA_FRAME &&
+ has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
+ const int ctx = vp10_get_pred_context_switchable_interp(xd, dir);
+ vp10_write_token(
+ w, vp10_switchable_interp_tree, cm->fc->switchable_interp_prob[ctx],
+ &switchable_interp_encodings[mbmi->interp_filter[dir]]);
+ ++cpi->interp_filter_selected[0][mbmi->interp_filter[dir]];
+ }
+ }
+#else
+ {
+ const int ctx = vp10_get_pred_context_switchable_interp(xd);
+ vp10_write_token(w, vp10_switchable_interp_tree,
+ cm->fc->switchable_interp_prob[ctx],
+ &switchable_interp_encodings[mbmi->interp_filter]);
+ ++cpi->interp_filter_selected[0][mbmi->interp_filter];
+ }
+#endif
+ }
+}
+
+static void write_palette_mode_info(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd,
+ const MODE_INFO *const mi, vp10_writer *w) {
+ const MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+ int palette_ctx = 0;
+ int n, i;
+
+ if (mbmi->mode == DC_PRED) {
+ n = pmi->palette_size[0];
+ if (above_mi)
+ palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ if (left_mi)
+ palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ vp10_write(
+ w, n > 0,
+ vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx]);
+ if (n > 0) {
+ vp10_write_token(w, vp10_palette_size_tree,
+ vp10_default_palette_y_size_prob[bsize - BLOCK_8X8],
+ &palette_size_encodings[n - 2]);
+ for (i = 0; i < n; ++i)
+ vp10_write_literal(w, pmi->palette_colors[i], cm->bit_depth);
+ write_uniform(w, n, pmi->palette_first_color_idx[0]);
+ }
+ }
+
+ if (mbmi->uv_mode == DC_PRED) {
+ n = pmi->palette_size[1];
+ vp10_write(w, n > 0,
+ vp10_default_palette_uv_mode_prob[pmi->palette_size[0] > 0]);
+ if (n > 0) {
+ vp10_write_token(w, vp10_palette_size_tree,
+ vp10_default_palette_uv_size_prob[bsize - BLOCK_8X8],
+ &palette_size_encodings[n - 2]);
+ for (i = 0; i < n; ++i) {
+ vp10_write_literal(w, pmi->palette_colors[PALETTE_MAX_SIZE + i],
+ cm->bit_depth);
+ vp10_write_literal(w, pmi->palette_colors[2 * PALETTE_MAX_SIZE + i],
+ cm->bit_depth);
+ }
+ write_uniform(w, n, pmi->palette_first_color_idx[1]);
+ }
+ }
+}
+
+static void pack_inter_mode_mvs(VP10_COMP *cpi, const MODE_INFO *mi,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif
+ vp10_writer *w) {
+ VP10_COMMON *const cm = &cpi->common;
+#if !CONFIG_REF_MV
+ const nmv_context *nmvc = &cm->fc->nmvc;
+#endif
+ const MACROBLOCK *x = &cpi->td.mb;
+ const MACROBLOCKD *xd = &x->e_mbd;
+ const struct segmentation *const seg = &cm->seg;
+ const struct segmentation_probs *const segp = &cm->fc->seg;
+ const MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+ const PREDICTION_MODE mode = mbmi->mode;
+ const int segment_id = mbmi->segment_id;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const int allow_hp = cm->allow_high_precision_mv;
+ const int is_inter = is_inter_block(mbmi);
+ const int is_compound = has_second_ref(mbmi);
+ int skip, ref;
+
+ if (seg->update_map) {
+ if (seg->temporal_update) {
+ const int pred_flag = mbmi->seg_id_predicted;
+ vpx_prob pred_prob = vp10_get_pred_prob_seg_id(segp, xd);
+ vp10_write(w, pred_flag, pred_prob);
+ if (!pred_flag) write_segment_id(w, seg, segp, segment_id);
+ } else {
+ write_segment_id(w, seg, segp, segment_id);
+ }
+ }
+
+#if CONFIG_SUPERTX
+ if (supertx_enabled)
+ skip = mbmi->skip;
+ else
+ skip = write_skip(cm, xd, segment_id, mi, w);
+#else
+ skip = write_skip(cm, xd, segment_id, mi, w);
+#endif // CONFIG_SUPERTX
+
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif // CONFIG_SUPERTX
+ if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+ vp10_write(w, is_inter, vp10_get_intra_inter_prob(cm, xd));
+
+ if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
+#if CONFIG_SUPERTX
+ !supertx_enabled &&
+#endif // CONFIG_SUPERTX
+ !(is_inter && skip) && !xd->lossless[segment_id]) {
+#if CONFIG_VAR_TX
+ if (is_inter) { // This implies skip flag is 0.
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ const int txb_size = txsize_to_bsize[max_tx_size];
+ const int bs = num_4x4_blocks_wide_lookup[txb_size];
+ const int width = num_4x4_blocks_wide_lookup[bsize];
+ const int height = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ for (idy = 0; idy < height; idy += bs)
+ for (idx = 0; idx < width; idx += bs)
+ write_tx_size_vartx(cm, xd, mbmi, max_tx_size, idy, idx, w);
+ } else {
+ set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, xd);
+ write_selected_tx_size(cm, xd, w);
+ }
+ } else {
+ set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, xd);
+#else
+ write_selected_tx_size(cm, xd, w);
+#endif
+ }
+
+ if (!is_inter) {
+ if (bsize >= BLOCK_8X8) {
+ write_intra_mode(w, mode, cm->fc->y_mode_prob[size_group_lookup[bsize]]);
+ } else {
+ int idx, idy;
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
+ const PREDICTION_MODE b_mode = mi->bmi[idy * 2 + idx].as_mode;
+ write_intra_mode(w, b_mode, cm->fc->y_mode_prob[0]);
+ }
+ }
+ }
+ write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mode]);
+#if CONFIG_EXT_INTRA
+ write_intra_angle_info(cm, xd, w);
+#endif // CONFIG_EXT_INTRA
+ if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools)
+ write_palette_mode_info(cm, xd, mi, w);
+#if CONFIG_EXT_INTRA
+ if (bsize >= BLOCK_8X8) write_ext_intra_mode_info(cm, mbmi, w);
+#endif // CONFIG_EXT_INTRA
+ } else {
+ int16_t mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
+ write_ref_frames(cm, xd, w);
+
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (is_compound)
+ mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
+ else
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
+ mbmi->ref_frame, bsize, -1);
+#endif
+
+ // If segment skip is not enabled code the mode.
+ if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
+ if (bsize >= BLOCK_8X8) {
+#if CONFIG_EXT_INTER
+ if (is_inter_compound_mode(mode))
+ write_inter_compound_mode(cm, w, mode, mode_ctx);
+ else if (is_inter_singleref_mode(mode))
+#endif // CONFIG_EXT_INTER
+ write_inter_mode(cm, w, mode,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ is_compound,
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ mode_ctx);
+
+#if CONFIG_REF_MV
+ if (mode == NEARMV || mode == NEWMV)
+ write_drl_idx(cm, mbmi, mbmi_ext, w);
+#endif
+ }
+ }
+
+#if !CONFIG_EXT_INTERP && !CONFIG_DUAL_FILTER
+ write_switchable_interp_filter(cpi, xd, w);
+#endif // !CONFIG_EXT_INTERP
+
+ if (bsize < BLOCK_8X8) {
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
+ const int j = idy * 2 + idx;
+ const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (!is_compound)
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
+ mbmi->ref_frame, bsize, j);
+#endif
+#if CONFIG_EXT_INTER
+ if (is_inter_compound_mode(b_mode))
+ write_inter_compound_mode(cm, w, b_mode, mode_ctx);
+ else if (is_inter_singleref_mode(b_mode))
+#endif // CONFIG_EXT_INTER
+ write_inter_mode(cm, w, b_mode,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ has_second_ref(mbmi),
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ mode_ctx);
+
+#if CONFIG_EXT_INTER
+ if (b_mode == NEWMV || b_mode == NEWFROMNEARMV ||
+ b_mode == NEW_NEWMV) {
+#else
+ if (b_mode == NEWMV) {
+#endif // CONFIG_EXT_INTER
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+#if CONFIG_REF_MV
+ int nmv_ctx =
+ vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[ref]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[ref]]);
+ const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
+ vp10_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv,
+#if CONFIG_EXT_INTER
+ &mi->bmi[j].ref_mv[ref].as_mv,
+#if CONFIG_REF_MV
+ is_compound,
+#endif
+#else
+#if CONFIG_REF_MV
+ &mi->bmi[j].pred_mv_s8[ref].as_mv, is_compound,
+#else
+ &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv,
+#endif // CONFIG_REF_MV
+#endif // CONFIG_EXT_INTER
+ nmvc, allow_hp);
+ }
+ }
+#if CONFIG_EXT_INTER
+ else if (b_mode == NEAREST_NEWMV || b_mode == NEAR_NEWMV) {
+#if CONFIG_REF_MV
+ int nmv_ctx =
+ vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
+ const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
+ vp10_encode_mv(cpi, w, &mi->bmi[j].as_mv[1].as_mv,
+ &mi->bmi[j].ref_mv[1].as_mv,
+#if CONFIG_REF_MV
+ is_compound,
+#endif
+ nmvc, allow_hp);
+ } else if (b_mode == NEW_NEARESTMV || b_mode == NEW_NEARMV) {
+#if CONFIG_REF_MV
+ int nmv_ctx =
+ vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
+ const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
+ vp10_encode_mv(cpi, w, &mi->bmi[j].as_mv[0].as_mv,
+ &mi->bmi[j].ref_mv[0].as_mv,
+#if CONFIG_REF_MV
+ is_compound,
+#endif
+ nmvc, allow_hp);
+ }
+#endif // CONFIG_EXT_INTER
+ }
+ }
+ } else {
+#if CONFIG_EXT_INTER
+ if (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV) {
+#else
+ if (mode == NEWMV) {
+#endif // CONFIG_EXT_INTER
+ int_mv ref_mv;
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+#if CONFIG_REF_MV
+ int nmv_ctx =
+ vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[ref]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[ref]]);
+ const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
+ ref_mv = mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0];
+#if CONFIG_EXT_INTER
+ if (mode == NEWFROMNEARMV)
+ vp10_encode_mv(cpi, w, &mbmi->mv[ref].as_mv,
+ &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][1].as_mv,
+#if CONFIG_REF_MV
+ is_compound,
+#endif
+ nmvc, allow_hp);
+ else
+#endif // CONFIG_EXT_INTER
+ vp10_encode_mv(cpi, w, &mbmi->mv[ref].as_mv, &ref_mv.as_mv,
+#if CONFIG_REF_MV
+ is_compound,
+#endif
+ nmvc, allow_hp);
+ }
+#if CONFIG_EXT_INTER
+ } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
+ const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
+ vp10_encode_mv(cpi, w, &mbmi->mv[1].as_mv,
+ &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv,
+#if CONFIG_REF_MV
+ is_compound,
+#endif
+ nmvc, allow_hp);
+ } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
+ const nmv_context *nmvc = &cm->fc->nmvc[nmv_ctx];
+#endif
+ vp10_encode_mv(cpi, w, &mbmi->mv[0].as_mv,
+ &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv,
+#if CONFIG_REF_MV
+ is_compound,
+#endif
+ nmvc, allow_hp);
+#endif // CONFIG_EXT_INTER
+ }
+ }
+
+#if CONFIG_EXT_INTER
+ if (cpi->common.reference_mode != COMPOUND_REFERENCE &&
+#if CONFIG_SUPERTX
+ !supertx_enabled &&
+#endif // CONFIG_SUPERTX
+ is_interintra_allowed(mbmi)) {
+ const int interintra = mbmi->ref_frame[1] == INTRA_FRAME;
+ const int bsize_group = size_group_lookup[bsize];
+ vp10_write(w, interintra, cm->fc->interintra_prob[bsize_group]);
+ if (interintra) {
+ write_interintra_mode(w, mbmi->interintra_mode,
+ cm->fc->interintra_mode_prob[bsize_group]);
+ if (is_interintra_wedge_used(bsize)) {
+ vp10_write(w, mbmi->use_wedge_interintra,
+ cm->fc->wedge_interintra_prob[bsize]);
+ if (mbmi->use_wedge_interintra) {
+ vp10_write_literal(w, mbmi->interintra_wedge_index,
+ get_wedge_bits_lookup(bsize));
+ assert(mbmi->interintra_wedge_sign == 0);
+ }
+ }
+ }
+ }
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif // CONFIG_SUPERTX
+#if CONFIG_EXT_INTER
+ if (mbmi->ref_frame[1] != INTRA_FRAME)
+#endif // CONFIG_EXT_INTER
+ if (is_motvar_allowed(mbmi)) {
+ // TODO(debargha): Might want to only emit this if SEG_LVL_SKIP
+ // is not active, and assume SIMPLE_TRANSLATION in the decoder if
+ // it is active.
+ assert(mbmi->motion_variation < MOTION_VARIATIONS);
+ vp10_write_token(w, vp10_motvar_tree, cm->fc->motvar_prob[bsize],
+ &motvar_encodings[mbmi->motion_variation]);
+ }
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+#if CONFIG_EXT_INTER
+ if (cpi->common.reference_mode != SINGLE_REFERENCE &&
+ is_inter_compound_mode(mbmi->mode) &&
+#if CONFIG_OBMC
+ !(is_motvar_allowed(mbmi) &&
+ mbmi->motion_variation != SIMPLE_TRANSLATION) &&
+#endif // CONFIG_OBMC
+ is_interinter_wedge_used(bsize)) {
+ vp10_write(w, mbmi->use_wedge_interinter,
+ cm->fc->wedge_interinter_prob[bsize]);
+ if (mbmi->use_wedge_interinter) {
+ vp10_write_literal(w, mbmi->interinter_wedge_index,
+ get_wedge_bits_lookup(bsize));
+ vp10_write_bit(w, mbmi->interinter_wedge_sign);
+ }
+ }
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_EXT_INTERP || CONFIG_DUAL_FILTER
+ write_switchable_interp_filter(cpi, xd, w);
+#endif // CONFIG_EXT_INTERP
+ }
+
+ if (!FIXED_TX_TYPE) {
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+#if CONFIG_SUPERTX
+ !supertx_enabled &&
+#endif // CONFIG_SUPERTX
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ int eset = get_ext_tx_set(mbmi->tx_size, bsize, is_inter);
+ if (is_inter) {
+ assert(ext_tx_used_inter[eset][mbmi->tx_type]);
+ if (eset > 0)
+ vp10_write_token(
+ w, vp10_ext_tx_inter_tree[eset],
+ cm->fc->inter_ext_tx_prob[eset][txsize_sqr_map[mbmi->tx_size]],
+ &ext_tx_inter_encodings[eset][mbmi->tx_type]);
+ } else if (ALLOW_INTRA_EXT_TX) {
+ if (eset > 0)
+ vp10_write_token(
+ w, vp10_ext_tx_intra_tree[eset],
+ cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode],
+ &ext_tx_intra_encodings[eset][mbmi->tx_type]);
+ }
+ }
+#else
+ if (mbmi->tx_size < TX_32X32 && cm->base_qindex > 0 && !mbmi->skip &&
+#if CONFIG_SUPERTX
+ !supertx_enabled &&
+#endif // CONFIG_SUPERTX
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ if (is_inter) {
+ vp10_write_token(w, vp10_ext_tx_tree,
+ cm->fc->inter_ext_tx_prob[mbmi->tx_size],
+ &ext_tx_encodings[mbmi->tx_type]);
+ } else {
+ vp10_write_token(
+ w, vp10_ext_tx_tree,
+ cm->fc->intra_ext_tx_prob
+ [mbmi->tx_size][intra_mode_to_tx_type_context[mbmi->mode]],
+ &ext_tx_encodings[mbmi->tx_type]);
+ }
+ } else {
+ if (!mbmi->skip) {
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif // CONFIG_SUPERTX
+ assert(mbmi->tx_type == DCT_DCT);
+ }
+ }
+#endif // CONFIG_EXT_TX
+ }
+}
+
+static void write_mb_modes_kf(const VP10_COMMON *cm, const MACROBLOCKD *xd,
+ MODE_INFO **mi_8x8, vp10_writer *w) {
+ const struct segmentation *const seg = &cm->seg;
+ const struct segmentation_probs *const segp = &cm->fc->seg;
+ const MODE_INFO *const mi = mi_8x8[0];
+ const MODE_INFO *const above_mi = xd->above_mi;
+ const MODE_INFO *const left_mi = xd->left_mi;
+ const MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+
+ if (seg->update_map) write_segment_id(w, seg, segp, mbmi->segment_id);
+
+ write_skip(cm, xd, mbmi->segment_id, mi, w);
+
+ if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
+ !xd->lossless[mbmi->segment_id])
+ write_selected_tx_size(cm, xd, w);
+
+ if (bsize >= BLOCK_8X8) {
+ write_intra_mode(w, mbmi->mode,
+ get_y_mode_probs(cm, mi, above_mi, left_mi, 0));
+ } else {
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
+ const int block = idy * 2 + idx;
+ write_intra_mode(w, mi->bmi[block].as_mode,
+ get_y_mode_probs(cm, mi, above_mi, left_mi, block));
+ }
+ }
+ }
+
+ write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mbmi->mode]);
+#if CONFIG_EXT_INTRA
+ write_intra_angle_info(cm, xd, w);
+#endif // CONFIG_EXT_INTRA
+ if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools)
+ write_palette_mode_info(cm, xd, mi, w);
+#if CONFIG_EXT_INTRA
+ if (bsize >= BLOCK_8X8) write_ext_intra_mode_info(cm, mbmi, w);
+#endif // CONFIG_EXT_INTRA
+
+ if (!FIXED_TX_TYPE) {
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bsize, 0) > 1 && cm->base_qindex > 0 &&
+ !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
+ ALLOW_INTRA_EXT_TX) {
+ int eset = get_ext_tx_set(mbmi->tx_size, bsize, 0);
+ if (eset > 0)
+ vp10_write_token(
+ w, vp10_ext_tx_intra_tree[eset],
+ cm->fc->intra_ext_tx_prob[eset][mbmi->tx_size][mbmi->mode],
+ &ext_tx_intra_encodings[eset][mbmi->tx_type]);
+ }
+#else
+ if (mbmi->tx_size < TX_32X32 && cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ vp10_write_token(
+ w, vp10_ext_tx_tree,
+ cm->fc->intra_ext_tx_prob[mbmi->tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]],
+ &ext_tx_encodings[mbmi->tx_type]);
+ }
+#endif // CONFIG_EXT_TX
+ }
+}
+
+#if CONFIG_SUPERTX
+#define write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, \
+ mi_row, mi_col) \
+ write_modes_b(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col)
+#else
+#define write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, \
+ mi_row, mi_col) \
+ write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col)
+#endif // CONFIG_ANS && CONFIG_SUPERTX
+
+static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile,
+ vp10_writer *w, const TOKENEXTRA **tok,
+ const TOKENEXTRA *const tok_end,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif
+ int mi_row, int mi_col) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+ MODE_INFO *m;
+ int plane;
+ int bh, bw;
+#if CONFIG_ANS
+ (void)tok;
+ (void)tok_end;
+ (void)plane;
+#endif // !CONFIG_ANS
+
+ xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
+ m = xd->mi[0];
+
+ assert(m->mbmi.sb_type <= cm->sb_size);
+
+ bh = num_8x8_blocks_high_lookup[m->mbmi.sb_type];
+ bw = num_8x8_blocks_wide_lookup[m->mbmi.sb_type];
+
+ cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
+
+ set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
+ if (frame_is_intra_only(cm)) {
+ write_mb_modes_kf(cm, xd, xd->mi, w);
+ } else {
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+#endif
+#if CONFIG_EXT_INTERP
+ // vp10_is_interp_needed needs the ref frame buffers set up to look
+ // up if they are scaled. vp10_is_interp_needed is in turn needed by
+ // write_switchable_interp_filter, which is called by pack_inter_mode_mvs.
+ set_ref_ptrs(cm, xd, m->mbmi.ref_frame[0], m->mbmi.ref_frame[1]);
+#endif // CONFIG_EXT_INTERP
+#if 0
+ // NOTE(zoeliu): For debug
+ if (cm->current_video_frame == FRAME_TO_CHECK && cm->show_frame == 1) {
+ const PREDICTION_MODE mode = m->mbmi.mode;
+ const int segment_id = m->mbmi.segment_id;
+ const BLOCK_SIZE bsize = m->mbmi.sb_type;
+
+ // For sub8x8, simply dump out the first sub8x8 block info
+ const PREDICTION_MODE b_mode =
+ (bsize < BLOCK_8X8) ? m->bmi[0].as_mode : -1;
+ const int mv_x = (bsize < BLOCK_8X8) ?
+ m->bmi[0].as_mv[0].as_mv.row : m->mbmi.mv[0].as_mv.row;
+ const int mv_y = (bsize < BLOCK_8X8) ?
+ m->bmi[0].as_mv[0].as_mv.col : m->mbmi.mv[0].as_mv.col;
+
+ printf("Before pack_inter_mode_mvs(): "
+ "Frame=%d, (mi_row,mi_col)=(%d,%d), "
+ "mode=%d, segment_id=%d, bsize=%d, b_mode=%d, "
+ "mv[0]=(%d, %d), ref[0]=%d, ref[1]=%d\n",
+ cm->current_video_frame, mi_row, mi_col,
+ mode, segment_id, bsize, b_mode, mv_x, mv_y,
+ m->mbmi.ref_frame[0], m->mbmi.ref_frame[1]);
+ }
+#endif // 0
+ pack_inter_mode_mvs(cpi, m,
+#if CONFIG_SUPERTX
+ supertx_enabled,
+#endif
+ w);
+ }
+
+ for (plane = 0; plane <= 1; ++plane) {
+ if (m->mbmi.palette_mode_info.palette_size[plane] > 0) {
+ const int rows = (4 * num_4x4_blocks_high_lookup[m->mbmi.sb_type]) >>
+ (xd->plane[plane].subsampling_y);
+ const int cols = (4 * num_4x4_blocks_wide_lookup[m->mbmi.sb_type]) >>
+ (xd->plane[plane].subsampling_x);
+ assert(*tok < tok_end);
+ pack_palette_tokens(w, tok, m->mbmi.palette_mode_info.palette_size[plane],
+ rows * cols - 1);
+ assert(*tok < tok_end + m->mbmi.skip);
+ }
+ }
+
+#if CONFIG_SUPERTX
+ if (supertx_enabled) return;
+#endif // CONFIG_SUPERTX
+
+ if (!m->mbmi.skip) {
+ assert(*tok < tok_end);
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_VAR_TX
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ MB_MODE_INFO *mbmi = &m->mbmi;
+ BLOCK_SIZE bsize = mbmi->sb_type;
+ const BLOCK_SIZE plane_bsize =
+ get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), pd);
+
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ int row, col;
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ TX_SIZE tx_size =
+ plane ? get_uv_tx_size(mbmi, &xd->plane[plane]) : mbmi->tx_size;
+
+ if (is_inter_block(mbmi) && tx_size < TX_SIZES) {
+#else
+ if (is_inter_block(mbmi)) {
+#endif
+ const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bw = num_4x4_blocks_wide_lookup[txb_size];
+ int block = 0;
+ const int step = num_4x4_blocks_txsize_lookup[max_tx_size];
+ for (row = 0; row < num_4x4_h; row += bw) {
+ for (col = 0; col < num_4x4_w; col += bw) {
+ pack_txb_tokens(w, tok, tok_end, xd, mbmi, plane, plane_bsize,
+ cm->bit_depth, block, row, col, max_tx_size);
+ block += step;
+ }
+ }
+ } else {
+ TX_SIZE tx = plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane])
+ : m->mbmi.tx_size;
+ BLOCK_SIZE txb_size = txsize_to_bsize[tx];
+ int bw = num_4x4_blocks_wide_lookup[txb_size];
+ int bh = num_4x4_blocks_high_lookup[txb_size];
+
+ for (row = 0; row < num_4x4_h; row += bh)
+ for (col = 0; col < num_4x4_w; col += bw)
+ pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+ }
+#else
+ TX_SIZE tx =
+ plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane]) : m->mbmi.tx_size;
+ pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+#endif // CONFIG_VAR_TX
+ assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
+ (*tok)++;
+ }
+ }
+}
+
+static void write_partition(const VP10_COMMON *const cm,
+ const MACROBLOCKD *const xd, int hbs, int mi_row,
+ int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize,
+ vp10_writer *w) {
+ const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
+ const vpx_prob *const probs = cm->fc->partition_prob[ctx];
+ const int has_rows = (mi_row + hbs) < cm->mi_rows;
+ const int has_cols = (mi_col + hbs) < cm->mi_cols;
+
+ if (has_rows && has_cols) {
+#if CONFIG_EXT_PARTITION_TYPES
+ if (bsize <= BLOCK_8X8)
+ vp10_write_token(w, vp10_partition_tree, probs, &partition_encodings[p]);
+ else
+ vp10_write_token(w, vp10_ext_partition_tree, probs,
+ &ext_partition_encodings[p]);
+#else
+ vp10_write_token(w, vp10_partition_tree, probs, &partition_encodings[p]);
+#endif // CONFIG_EXT_PARTITION_TYPES
+ } else if (!has_rows && has_cols) {
+ assert(p == PARTITION_SPLIT || p == PARTITION_HORZ);
+ vp10_write(w, p == PARTITION_SPLIT, probs[1]);
+ } else if (has_rows && !has_cols) {
+ assert(p == PARTITION_SPLIT || p == PARTITION_VERT);
+ vp10_write(w, p == PARTITION_SPLIT, probs[2]);
+ } else {
+ assert(p == PARTITION_SPLIT);
+ }
+}
+
+#if CONFIG_SUPERTX
+#define write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, \
+ mi_row, mi_col, bsize) \
+ write_modes_sb(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row, mi_col, \
+ bsize)
+#else
+#define write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, \
+ mi_row, mi_col, bsize) \
+ write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, bsize)
+#endif // CONFIG_ANS && CONFIG_SUPERTX
+
+static void write_modes_sb(VP10_COMP *const cpi, const TileInfo *const tile,
+ vp10_writer *const w, const TOKENEXTRA **tok,
+ const TOKENEXTRA *const tok_end,
+#if CONFIG_SUPERTX
+ int supertx_enabled,
+#endif
+ int mi_row, int mi_col, BLOCK_SIZE bsize) {
+ const VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+ const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
+ const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize);
+ const BLOCK_SIZE subsize = get_subsize(bsize, partition);
+#if CONFIG_SUPERTX
+ const int mi_offset = mi_row * cm->mi_stride + mi_col;
+ MB_MODE_INFO *mbmi;
+ const int pack_token = !supertx_enabled;
+ TX_SIZE supertx_size;
+ int plane;
+#endif
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+ write_partition(cm, xd, hbs, mi_row, mi_col, partition, bsize, w);
+#if CONFIG_SUPERTX
+ mbmi = &cm->mi_grid_visible[mi_offset]->mbmi;
+ xd->mi = cm->mi_grid_visible + mi_offset;
+ set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[bsize], mi_col,
+ num_8x8_blocks_wide_lookup[bsize], cm->mi_rows, cm->mi_cols);
+ if (!supertx_enabled && !frame_is_intra_only(cm) &&
+ partition != PARTITION_NONE && bsize <= MAX_SUPERTX_BLOCK_SIZE &&
+ !xd->lossless[0]) {
+ vpx_prob prob;
+ supertx_size = max_txsize_lookup[bsize];
+ prob = cm->fc->supertx_prob[partition_supertx_context_lookup[partition]]
+ [supertx_size];
+ supertx_enabled = (xd->mi[0]->mbmi.tx_size == supertx_size);
+ vp10_write(w, supertx_enabled, prob);
+ }
+#endif // CONFIG_SUPERTX
+ if (subsize < BLOCK_8X8) {
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled, mi_row,
+ mi_col);
+ } else {
+ switch (partition) {
+ case PARTITION_NONE:
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, mi_col);
+ break;
+ case PARTITION_HORZ:
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, mi_col);
+ if (mi_row + hbs < cm->mi_rows)
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row + hbs, mi_col);
+ break;
+ case PARTITION_VERT:
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, mi_col);
+ if (mi_col + hbs < cm->mi_cols)
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, mi_col + hbs);
+ break;
+ case PARTITION_SPLIT:
+ write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, mi_col, subsize);
+ write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, mi_col + hbs, subsize);
+ write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row + hbs, mi_col, subsize);
+ write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row + hbs, mi_col + hbs, subsize);
+ break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, mi_col);
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, mi_col + hbs);
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row + hbs, mi_col);
+ break;
+ case PARTITION_HORZ_B:
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, mi_col);
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row + hbs, mi_col);
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row + hbs, mi_col + hbs);
+ break;
+ case PARTITION_VERT_A:
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, mi_col);
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row + hbs, mi_col);
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, mi_col + hbs);
+ break;
+ case PARTITION_VERT_B:
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, mi_col);
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row, mi_col + hbs);
+ write_modes_b_wrapper(cpi, tile, w, tok, tok_end, supertx_enabled,
+ mi_row + hbs, mi_col + hbs);
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
+ default: assert(0);
+ }
+ }
+#if CONFIG_SUPERTX
+ if (partition != PARTITION_NONE && supertx_enabled && pack_token) {
+ int skip;
+ xd->mi = cm->mi_grid_visible + mi_offset;
+ supertx_size = mbmi->tx_size;
+ set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[bsize], mi_col,
+ num_8x8_blocks_wide_lookup[bsize], cm->mi_rows, cm->mi_cols);
+
+ assert(IMPLIES(!cm->seg.enabled, mbmi->segment_id_supertx == 0));
+ assert(mbmi->segment_id_supertx < MAX_SEGMENTS);
+
+ skip = write_skip(cm, xd, mbmi->segment_id_supertx, xd->mi[0], w);
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(supertx_size, bsize, 1) > 1 && !skip) {
+ int eset = get_ext_tx_set(supertx_size, bsize, 1);
+ if (eset > 0) {
+ vp10_write_token(w, vp10_ext_tx_inter_tree[eset],
+ cm->fc->inter_ext_tx_prob[eset][supertx_size],
+ &ext_tx_inter_encodings[eset][mbmi->tx_type]);
+ }
+ }
+#else
+ if (supertx_size < TX_32X32 && !skip) {
+ vp10_write_token(w, vp10_ext_tx_tree,
+ cm->fc->inter_ext_tx_prob[supertx_size],
+ &ext_tx_encodings[mbmi->tx_type]);
+ }
+#endif // CONFIG_EXT_TX
+
+ if (!skip) {
+ assert(*tok < tok_end);
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const int mbmi_txb_size = txsize_to_bsize[mbmi->tx_size];
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi_txb_size];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[mbmi_txb_size];
+ int row, col;
+ TX_SIZE tx =
+ plane ? get_uv_tx_size(mbmi, &xd->plane[plane]) : mbmi->tx_size;
+ BLOCK_SIZE txb_size = txsize_to_bsize[tx];
+ int bw = num_4x4_blocks_wide_lookup[txb_size];
+
+ for (row = 0; row < num_4x4_h; row += bw)
+ for (col = 0; col < num_4x4_w; col += bw)
+ pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx);
+ assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
+ (*tok)++;
+ }
+ }
+ }
+#endif // CONFIG_SUPERTX
+
+// update partition context
+#if CONFIG_EXT_PARTITION_TYPES
+ update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
+#else
+ if (bsize >= BLOCK_8X8 &&
+ (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+
+#if DERING_REFINEMENT
+ if (bsize == BLOCK_64X64 && cm->dering_level != 0 &&
+ !sb_all_skip(cm, mi_row, mi_col)) {
+ vpx_write_literal(
+ w,
+ cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.dering_gain,
+ DERING_REFINEMENT_BITS);
+ }
+#endif
+#endif // CONFIG_EXT_PARTITION_TYPES
+}
+
+static void write_modes(VP10_COMP *const cpi, const TileInfo *const tile,
+ vp10_writer *const w, const TOKENEXTRA **tok,
+ const TOKENEXTRA *const tok_end) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+ const int mi_row_start = tile->mi_row_start;
+ const int mi_row_end = tile->mi_row_end;
+ const int mi_col_start = tile->mi_col_start;
+ const int mi_col_end = tile->mi_col_end;
+ int mi_row, mi_col;
+
+ vp10_zero_above_context(cm, mi_col_start, mi_col_end);
+
+ for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += cm->mib_size) {
+ vp10_zero_left_context(xd);
+
+ for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += cm->mib_size) {
+ write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, 0, mi_row, mi_col,
+ cm->sb_size);
+ }
+ }
+}
+
+static void build_tree_distribution(VP10_COMP *cpi, TX_SIZE tx_size,
+ vp10_coeff_stats *coef_branch_ct,
+ vp10_coeff_probs_model *coef_probs) {
+ vp10_coeff_count *coef_counts = cpi->td.rd_counts.coef_counts[tx_size];
+ unsigned int(*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
+ cpi->common.counts.eob_branch[tx_size];
+ int i, j, k, l, m;
+
+ for (i = 0; i < PLANE_TYPES; ++i) {
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+ vp10_tree_probs_from_distribution(vp10_coef_tree,
+ coef_branch_ct[i][j][k][l],
+ coef_counts[i][j][k][l]);
+ coef_branch_ct[i][j][k][l][0][1] =
+ eob_branch_ct[i][j][k][l] - coef_branch_ct[i][j][k][l][0][0];
+ for (m = 0; m < UNCONSTRAINED_NODES; ++m)
+ coef_probs[i][j][k][l][m] =
+ get_binary_prob(coef_branch_ct[i][j][k][l][m][0],
+ coef_branch_ct[i][j][k][l][m][1]);
+ }
+ }
+ }
+ }
+}
+
+static void update_coef_probs_common(vp10_writer *const bc, VP10_COMP *cpi,
+ TX_SIZE tx_size,
+ vp10_coeff_stats *frame_branch_ct,
+ vp10_coeff_probs_model *new_coef_probs) {
+ vp10_coeff_probs_model *old_coef_probs = cpi->common.fc->coef_probs[tx_size];
+ const vpx_prob upd = DIFF_UPDATE_PROB;
+ const int entropy_nodes_update = UNCONSTRAINED_NODES;
+ int i, j, k, l, t;
+ int stepsize = cpi->sf.coeff_prob_appx_step;
+
+ switch (cpi->sf.use_fast_coef_updates) {
+ case TWO_LOOP: {
+ /* dry run to see if there is any update at all needed */
+ int savings = 0;
+ int update[2] = { 0, 0 };
+ for (i = 0; i < PLANE_TYPES; ++i) {
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+ for (t = 0; t < entropy_nodes_update; ++t) {
+ vpx_prob newp = new_coef_probs[i][j][k][l][t];
+ const vpx_prob oldp = old_coef_probs[i][j][k][l][t];
+ int s;
+ int u = 0;
+ if (t == PIVOT_NODE)
+ s = vp10_prob_diff_update_savings_search_model(
+ frame_branch_ct[i][j][k][l][0],
+ old_coef_probs[i][j][k][l], &newp, upd, stepsize);
+ else
+ s = vp10_prob_diff_update_savings_search(
+ frame_branch_ct[i][j][k][l][t], oldp, &newp, upd);
+ if (s > 0 && newp != oldp) u = 1;
+ if (u)
+ savings += s - (int)(vp10_cost_zero(upd));
+ else
+ savings -= (int)(vp10_cost_zero(upd));
+ update[u]++;
+ }
+ }
+ }
+ }
+ }
+
+ /* Is coef updated at all */
+ if (update[1] == 0 || savings < 0) {
+ vp10_write_bit(bc, 0);
+ return;
+ }
+ vp10_write_bit(bc, 1);
+ for (i = 0; i < PLANE_TYPES; ++i) {
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+ // calc probs and branch cts for this frame only
+ for (t = 0; t < entropy_nodes_update; ++t) {
+ vpx_prob newp = new_coef_probs[i][j][k][l][t];
+ vpx_prob *oldp = old_coef_probs[i][j][k][l] + t;
+ const vpx_prob upd = DIFF_UPDATE_PROB;
+ int s;
+ int u = 0;
+ if (t == PIVOT_NODE)
+ s = vp10_prob_diff_update_savings_search_model(
+ frame_branch_ct[i][j][k][l][0],
+ old_coef_probs[i][j][k][l], &newp, upd, stepsize);
+ else
+ s = vp10_prob_diff_update_savings_search(
+ frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd);
+ if (s > 0 && newp != *oldp) u = 1;
+ vp10_write(bc, u, upd);
+ if (u) {
+ /* send/use new probability */
+ vp10_write_prob_diff_update(bc, newp, *oldp);
+ *oldp = newp;
+ }
+ }
+ }
+ }
+ }
+ }
+ return;
+ }
+
+ case ONE_LOOP_REDUCED: {
+ int updates = 0;
+ int noupdates_before_first = 0;
+ for (i = 0; i < PLANE_TYPES; ++i) {
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+ // calc probs and branch cts for this frame only
+ for (t = 0; t < entropy_nodes_update; ++t) {
+ vpx_prob newp = new_coef_probs[i][j][k][l][t];
+ vpx_prob *oldp = old_coef_probs[i][j][k][l] + t;
+ int s;
+ int u = 0;
+
+ if (t == PIVOT_NODE) {
+ s = vp10_prob_diff_update_savings_search_model(
+ frame_branch_ct[i][j][k][l][0],
+ old_coef_probs[i][j][k][l], &newp, upd, stepsize);
+ } else {
+ s = vp10_prob_diff_update_savings_search(
+ frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd);
+ }
+
+ if (s > 0 && newp != *oldp) u = 1;
+ updates += u;
+ if (u == 0 && updates == 0) {
+ noupdates_before_first++;
+ continue;
+ }
+ if (u == 1 && updates == 1) {
+ int v;
+ // first update
+ vp10_write_bit(bc, 1);
+ for (v = 0; v < noupdates_before_first; ++v)
+ vp10_write(bc, 0, upd);
+ }
+ vp10_write(bc, u, upd);
+ if (u) {
+ /* send/use new probability */
+ vp10_write_prob_diff_update(bc, newp, *oldp);
+ *oldp = newp;
+ }
+ }
+ }
+ }
+ }
+ }
+ if (updates == 0) {
+ vp10_write_bit(bc, 0); // no updates
+ }
+ return;
+ }
+ default: assert(0);
+ }
+}
+
+#if CONFIG_ENTROPY
+// Calculate the token counts between subsequent subframe updates.
+static void get_coef_counts_diff(
+ VP10_COMP *cpi, int index,
+ vp10_coeff_count coef_counts[TX_SIZES][PLANE_TYPES],
+ unsigned int eob_counts[TX_SIZES][PLANE_TYPES][REF_TYPES][COEF_BANDS]
+ [COEFF_CONTEXTS]) {
+ int i, j, k, l, m, tx_size, val;
+ const int max_idx = cpi->common.coef_probs_update_idx;
+ const TX_MODE tx_mode = cpi->common.tx_mode;
+ const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
+ const SUBFRAME_STATS *subframe_stats = &cpi->subframe_stats;
+
+ assert(max_idx < COEF_PROBS_BUFS);
+
+ for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
+ for (i = 0; i < PLANE_TYPES; ++i)
+ for (j = 0; j < REF_TYPES; ++j)
+ for (k = 0; k < COEF_BANDS; ++k)
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+ if (index == max_idx) {
+ val =
+ cpi->common.counts.eob_branch[tx_size][i][j][k][l] -
+ subframe_stats->eob_counts_buf[max_idx][tx_size][i][j][k][l];
+ } else {
+ val = subframe_stats
+ ->eob_counts_buf[index + 1][tx_size][i][j][k][l] -
+ subframe_stats->eob_counts_buf[index][tx_size][i][j][k][l];
+ }
+ assert(val >= 0);
+ eob_counts[tx_size][i][j][k][l] = val;
+
+ for (m = 0; m < ENTROPY_TOKENS; ++m) {
+ if (index == max_idx) {
+ val = cpi->td.rd_counts.coef_counts[tx_size][i][j][k][l][m] -
+ subframe_stats
+ ->coef_counts_buf[max_idx][tx_size][i][j][k][l][m];
+ } else {
+ val = subframe_stats
+ ->coef_counts_buf[index + 1][tx_size][i][j][k][l][m] -
+ subframe_stats
+ ->coef_counts_buf[index][tx_size][i][j][k][l][m];
+ }
+ assert(val >= 0);
+ coef_counts[tx_size][i][j][k][l][m] = val;
+ }
+ }
+}
+
+static void update_coef_probs_subframe(
+ vp10_writer *const bc, VP10_COMP *cpi, TX_SIZE tx_size,
+ vp10_coeff_stats branch_ct[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES],
+ vp10_coeff_probs_model *new_coef_probs) {
+ vp10_coeff_probs_model *old_coef_probs = cpi->common.fc->coef_probs[tx_size];
+ const vpx_prob upd = DIFF_UPDATE_PROB;
+ const int entropy_nodes_update = UNCONSTRAINED_NODES;
+ int i, j, k, l, t;
+ int stepsize = cpi->sf.coeff_prob_appx_step;
+ const int max_idx = cpi->common.coef_probs_update_idx;
+ int idx;
+ unsigned int this_branch_ct[ENTROPY_NODES][COEF_PROBS_BUFS][2];
+
+ switch (cpi->sf.use_fast_coef_updates) {
+ case TWO_LOOP: {
+ /* dry run to see if there is any update at all needed */
+ int savings = 0;
+ int update[2] = { 0, 0 };
+ for (i = 0; i < PLANE_TYPES; ++i) {
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+ for (t = 0; t < ENTROPY_NODES; ++t) {
+ for (idx = 0; idx <= max_idx; ++idx) {
+ memcpy(this_branch_ct[t][idx],
+ branch_ct[idx][tx_size][i][j][k][l][t],
+ 2 * sizeof(this_branch_ct[t][idx][0]));
+ }
+ }
+ for (t = 0; t < entropy_nodes_update; ++t) {
+ vpx_prob newp = new_coef_probs[i][j][k][l][t];
+ const vpx_prob oldp = old_coef_probs[i][j][k][l][t];
+ int s, u = 0;
+
+ if (t == PIVOT_NODE)
+ s = vp10_prob_update_search_model_subframe(
+ this_branch_ct, old_coef_probs[i][j][k][l], &newp, upd,
+ stepsize, max_idx);
+ else
+ s = vp10_prob_update_search_subframe(this_branch_ct[t], oldp,
+ &newp, upd, max_idx);
+ if (s > 0 && newp != oldp) u = 1;
+ if (u)
+ savings += s - (int)(vp10_cost_zero(upd));
+ else
+ savings -= (int)(vp10_cost_zero(upd));
+ update[u]++;
+ }
+ }
+ }
+ }
+ }
+
+ /* Is coef updated at all */
+ if (update[1] == 0 || savings < 0) {
+ vp10_write_bit(bc, 0);
+ return;
+ }
+ vp10_write_bit(bc, 1);
+ for (i = 0; i < PLANE_TYPES; ++i) {
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+ for (t = 0; t < ENTROPY_NODES; ++t) {
+ for (idx = 0; idx <= max_idx; ++idx) {
+ memcpy(this_branch_ct[t][idx],
+ branch_ct[idx][tx_size][i][j][k][l][t],
+ 2 * sizeof(this_branch_ct[t][idx][0]));
+ }
+ }
+ for (t = 0; t < entropy_nodes_update; ++t) {
+ vpx_prob newp = new_coef_probs[i][j][k][l][t];
+ vpx_prob *oldp = old_coef_probs[i][j][k][l] + t;
+ const vpx_prob upd = DIFF_UPDATE_PROB;
+ int s;
+ int u = 0;
+
+ if (t == PIVOT_NODE)
+ s = vp10_prob_update_search_model_subframe(
+ this_branch_ct, old_coef_probs[i][j][k][l], &newp, upd,
+ stepsize, max_idx);
+ else
+ s = vp10_prob_update_search_subframe(this_branch_ct[t], *oldp,
+ &newp, upd, max_idx);
+ if (s > 0 && newp != *oldp) u = 1;
+ vp10_write(bc, u, upd);
+ if (u) {
+ /* send/use new probability */
+ vp10_write_prob_diff_update(bc, newp, *oldp);
+ *oldp = newp;
+ }
+ }
+ }
+ }
+ }
+ }
+ return;
+ }
+
+ case ONE_LOOP_REDUCED: {
+ int updates = 0;
+ int noupdates_before_first = 0;
+ for (i = 0; i < PLANE_TYPES; ++i) {
+ for (j = 0; j < REF_TYPES; ++j) {
+ for (k = 0; k < COEF_BANDS; ++k) {
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+ for (t = 0; t < ENTROPY_NODES; ++t) {
+ for (idx = 0; idx <= max_idx; ++idx) {
+ memcpy(this_branch_ct[t][idx],
+ branch_ct[idx][tx_size][i][j][k][l][t],
+ 2 * sizeof(this_branch_ct[t][idx][0]));
+ }
+ }
+ for (t = 0; t < entropy_nodes_update; ++t) {
+ vpx_prob newp = new_coef_probs[i][j][k][l][t];
+ vpx_prob *oldp = old_coef_probs[i][j][k][l] + t;
+ int s;
+ int u = 0;
+
+ if (t == PIVOT_NODE)
+ s = vp10_prob_update_search_model_subframe(
+ this_branch_ct, old_coef_probs[i][j][k][l], &newp, upd,
+ stepsize, max_idx);
+ else
+ s = vp10_prob_update_search_subframe(this_branch_ct[t], *oldp,
+ &newp, upd, max_idx);
+ if (s > 0 && newp != *oldp) u = 1;
+ updates += u;
+ if (u == 0 && updates == 0) {
+ noupdates_before_first++;
+ continue;
+ }
+ if (u == 1 && updates == 1) {
+ int v;
+ // first update
+ vp10_write_bit(bc, 1);
+ for (v = 0; v < noupdates_before_first; ++v)
+ vp10_write(bc, 0, upd);
+ }
+ vp10_write(bc, u, upd);
+ if (u) {
+ /* send/use new probability */
+ vp10_write_prob_diff_update(bc, newp, *oldp);
+ *oldp = newp;
+ }
+ }
+ }
+ }
+ }
+ }
+ if (updates == 0) {
+ vp10_write_bit(bc, 0); // no updates
+ }
+ return;
+ }
+ default: assert(0);
+ }
+}
+#endif // CONFIG_ENTROPY
+
+static void update_coef_probs(VP10_COMP *cpi, vp10_writer *w) {
+ const TX_MODE tx_mode = cpi->common.tx_mode;
+ const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode];
+ TX_SIZE tx_size;
+#if CONFIG_ANS
+ int update = 0;
+#endif // CONFIG_ANS
+#if CONFIG_ENTROPY
+ VP10_COMMON *cm = &cpi->common;
+ SUBFRAME_STATS *subframe_stats = &cpi->subframe_stats;
+ unsigned int eob_counts_copy[TX_SIZES][PLANE_TYPES][REF_TYPES][COEF_BANDS]
+ [COEFF_CONTEXTS];
+ int i;
+ vp10_coeff_probs_model dummy_frame_coef_probs[PLANE_TYPES];
+
+ if (cm->do_subframe_update &&
+ cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+ vp10_copy(cpi->common.fc->coef_probs,
+ subframe_stats->enc_starting_coef_probs);
+ for (i = 0; i <= cpi->common.coef_probs_update_idx; ++i) {
+ get_coef_counts_diff(cpi, i, cpi->wholeframe_stats.coef_counts_buf[i],
+ cpi->wholeframe_stats.eob_counts_buf[i]);
+ }
+ }
+#endif // CONFIG_ENTROPY
+
+ for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) {
+ vp10_coeff_stats frame_branch_ct[PLANE_TYPES];
+ vp10_coeff_probs_model frame_coef_probs[PLANE_TYPES];
+ if (cpi->td.counts->tx_size_totals[tx_size] <= 20 ||
+ (tx_size >= TX_16X16 && cpi->sf.tx_size_search_method == USE_TX_8X8)) {
+ vp10_write_bit(w, 0);
+ } else {
+#if CONFIG_ENTROPY
+ if (cm->do_subframe_update &&
+ cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+ unsigned int
+ eob_counts_copy[PLANE_TYPES][REF_TYPES][COEF_BANDS][COEFF_CONTEXTS];
+ vp10_coeff_count coef_counts_copy[PLANE_TYPES];
+ vp10_copy(eob_counts_copy, cpi->common.counts.eob_branch[tx_size]);
+ vp10_copy(coef_counts_copy, cpi->td.rd_counts.coef_counts[tx_size]);
+ build_tree_distribution(cpi, tx_size, frame_branch_ct,
+ frame_coef_probs);
+ for (i = 0; i <= cpi->common.coef_probs_update_idx; ++i) {
+ vp10_copy(cpi->common.counts.eob_branch[tx_size],
+ cpi->wholeframe_stats.eob_counts_buf[i][tx_size]);
+ vp10_copy(cpi->td.rd_counts.coef_counts[tx_size],
+ cpi->wholeframe_stats.coef_counts_buf[i][tx_size]);
+ build_tree_distribution(cpi, tx_size, cpi->branch_ct_buf[i][tx_size],
+ dummy_frame_coef_probs);
+ }
+ vp10_copy(cpi->common.counts.eob_branch[tx_size], eob_counts_copy);
+ vp10_copy(cpi->td.rd_counts.coef_counts[tx_size], coef_counts_copy);
+
+ update_coef_probs_subframe(w, cpi, tx_size, cpi->branch_ct_buf,
+ frame_coef_probs);
+#if CONFIG_ANS
+ update = 1;
+#endif // CONFIG_ANS
+ } else {
+#endif // CONFIG_ENTROPY
+ build_tree_distribution(cpi, tx_size, frame_branch_ct,
+ frame_coef_probs);
+ update_coef_probs_common(w, cpi, tx_size, frame_branch_ct,
+ frame_coef_probs);
+#if CONFIG_ANS
+ update = 1;
+#endif // CONFIG_ANS
+#if CONFIG_ENTROPY
+ }
+#endif // CONFIG_ENTROPY
+ }
+ }
+
+#if CONFIG_ENTROPY
+ vp10_copy(cm->starting_coef_probs, cm->fc->coef_probs);
+ vp10_copy(subframe_stats->coef_probs_buf[0], cm->fc->coef_probs);
+ if (cm->do_subframe_update &&
+ cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+ vp10_copy(eob_counts_copy, cm->counts.eob_branch);
+ for (i = 1; i <= cpi->common.coef_probs_update_idx; ++i) {
+ for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
+ vp10_full_to_model_counts(cm->counts.coef[tx_size],
+ subframe_stats->coef_counts_buf[i][tx_size]);
+ vp10_copy(cm->counts.eob_branch, subframe_stats->eob_counts_buf[i]);
+ vp10_partial_adapt_probs(cm, 0, 0);
+ vp10_copy(subframe_stats->coef_probs_buf[i], cm->fc->coef_probs);
+ }
+ vp10_copy(cm->fc->coef_probs, subframe_stats->coef_probs_buf[0]);
+ vp10_copy(cm->counts.eob_branch, eob_counts_copy);
+ }
+#endif // CONFIG_ENTROPY
+#if CONFIG_ANS
+ if (update) vp10_coef_pareto_cdfs(cpi->common.fc);
+#endif // CONFIG_ANS
+}
+
+#if CONFIG_LOOP_RESTORATION
+static void encode_restoration(VP10_COMMON *cm,
+ struct vpx_write_bit_buffer *wb) {
+ int i;
+ RestorationInfo *rst = &cm->rst_info;
+ vpx_wb_write_bit(wb, rst->restoration_type != RESTORE_NONE);
+ if (rst->restoration_type != RESTORE_NONE) {
+ if (rst->restoration_type == RESTORE_BILATERAL) {
+ vpx_wb_write_bit(wb, 1);
+ for (i = 0; i < cm->rst_internal.ntiles; ++i) {
+ if (rst->bilateral_level[i] >= 0) {
+ vpx_wb_write_bit(wb, 1);
+ vpx_wb_write_literal(wb, rst->bilateral_level[i],
+ vp10_bilateral_level_bits(cm));
+ } else {
+ vpx_wb_write_bit(wb, 0);
+ }
+ }
+ } else {
+ vpx_wb_write_bit(wb, 0);
+ for (i = 0; i < cm->rst_internal.ntiles; ++i) {
+ if (rst->wiener_level[i]) {
+ vpx_wb_write_bit(wb, 1);
+ vpx_wb_write_literal(wb, rst->vfilter[i][0] - WIENER_FILT_TAP0_MINV,
+ WIENER_FILT_TAP0_BITS);
+ vpx_wb_write_literal(wb, rst->vfilter[i][1] - WIENER_FILT_TAP1_MINV,
+ WIENER_FILT_TAP1_BITS);
+ vpx_wb_write_literal(wb, rst->vfilter[i][2] - WIENER_FILT_TAP2_MINV,
+ WIENER_FILT_TAP2_BITS);
+ vpx_wb_write_literal(wb, rst->hfilter[i][0] - WIENER_FILT_TAP0_MINV,
+ WIENER_FILT_TAP0_BITS);
+ vpx_wb_write_literal(wb, rst->hfilter[i][1] - WIENER_FILT_TAP1_MINV,
+ WIENER_FILT_TAP1_BITS);
+ vpx_wb_write_literal(wb, rst->hfilter[i][2] - WIENER_FILT_TAP2_MINV,
+ WIENER_FILT_TAP2_BITS);
+ } else {
+ vpx_wb_write_bit(wb, 0);
+ }
+ }
+ }
+ }
+}
+#endif // CONFIG_LOOP_RESTORATION
+
+static void encode_loopfilter(VP10_COMMON *cm,
+ struct vpx_write_bit_buffer *wb) {
+ int i;
+ struct loopfilter *lf = &cm->lf;
+
+ // Encode the loop filter level and type
+ vpx_wb_write_literal(wb, lf->filter_level, 6);
+ vpx_wb_write_literal(wb, lf->sharpness_level, 3);
+
+ // Write out loop filter deltas applied at the MB level based on mode or
+ // ref frame (if they are enabled).
+ vpx_wb_write_bit(wb, lf->mode_ref_delta_enabled);
+
+ if (lf->mode_ref_delta_enabled) {
+ vpx_wb_write_bit(wb, lf->mode_ref_delta_update);
+ if (lf->mode_ref_delta_update) {
+ for (i = 0; i < TOTAL_REFS_PER_FRAME; i++) {
+ const int delta = lf->ref_deltas[i];
+ const int changed = delta != lf->last_ref_deltas[i];
+ vpx_wb_write_bit(wb, changed);
+ if (changed) {
+ lf->last_ref_deltas[i] = delta;
+ vpx_wb_write_inv_signed_literal(wb, delta, 6);
+ }
+ }
+
+ for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
+ const int delta = lf->mode_deltas[i];
+ const int changed = delta != lf->last_mode_deltas[i];
+ vpx_wb_write_bit(wb, changed);
+ if (changed) {
+ lf->last_mode_deltas[i] = delta;
+ vpx_wb_write_inv_signed_literal(wb, delta, 6);
+ }
+ }
+ }
+ }
+}
+
+#if CONFIG_CLPF
+static void encode_clpf(const VP10_COMMON *cm,
+ struct vpx_write_bit_buffer *wb) {
+ vpx_wb_write_literal(wb, cm->clpf, 1);
+}
+#endif
+
+#if CONFIG_DERING
+static void encode_dering(int level, struct vpx_write_bit_buffer *wb) {
+ vpx_wb_write_literal(wb, level, DERING_LEVEL_BITS);
+}
+#endif // CONFIG_DERING
+
+static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) {
+ if (delta_q != 0) {
+ vpx_wb_write_bit(wb, 1);
+ vpx_wb_write_inv_signed_literal(wb, delta_q, 6);
+ } else {
+ vpx_wb_write_bit(wb, 0);
+ }
+}
+
+static void encode_quantization(const VP10_COMMON *const cm,
+ struct vpx_write_bit_buffer *wb) {
+ vpx_wb_write_literal(wb, cm->base_qindex, QINDEX_BITS);
+ write_delta_q(wb, cm->y_dc_delta_q);
+ write_delta_q(wb, cm->uv_dc_delta_q);
+ write_delta_q(wb, cm->uv_ac_delta_q);
+#if CONFIG_AOM_QM
+ vpx_wb_write_bit(wb, cm->using_qmatrix);
+ if (cm->using_qmatrix) {
+ vpx_wb_write_literal(wb, cm->min_qmlevel, QM_LEVEL_BITS);
+ vpx_wb_write_literal(wb, cm->max_qmlevel, QM_LEVEL_BITS);
+ }
+#endif
+}
+
+static void encode_segmentation(VP10_COMMON *cm, MACROBLOCKD *xd,
+ struct vpx_write_bit_buffer *wb) {
+ int i, j;
+ const struct segmentation *seg = &cm->seg;
+
+ vpx_wb_write_bit(wb, seg->enabled);
+ if (!seg->enabled) return;
+
+ // Segmentation map
+ if (!frame_is_intra_only(cm) && !cm->error_resilient_mode) {
+ vpx_wb_write_bit(wb, seg->update_map);
+ } else {
+ assert(seg->update_map == 1);
+ }
+ if (seg->update_map) {
+ // Select the coding strategy (temporal or spatial)
+ vp10_choose_segmap_coding_method(cm, xd);
+
+ // Write out the chosen coding method.
+ if (!frame_is_intra_only(cm) && !cm->error_resilient_mode) {
+ vpx_wb_write_bit(wb, seg->temporal_update);
+ } else {
+ assert(seg->temporal_update == 0);
+ }
+ }
+
+ // Segmentation data
+ vpx_wb_write_bit(wb, seg->update_data);
+ if (seg->update_data) {
+ vpx_wb_write_bit(wb, seg->abs_delta);
+
+ for (i = 0; i < MAX_SEGMENTS; i++) {
+ for (j = 0; j < SEG_LVL_MAX; j++) {
+ const int active = segfeature_active(seg, i, j);
+ vpx_wb_write_bit(wb, active);
+ if (active) {
+ const int data = get_segdata(seg, i, j);
+ const int data_max = vp10_seg_feature_data_max(j);
+
+ if (vp10_is_segfeature_signed(j)) {
+ encode_unsigned_max(wb, abs(data), data_max);
+ vpx_wb_write_bit(wb, data < 0);
+ } else {
+ encode_unsigned_max(wb, data, data_max);
+ }
+ }
+ }
+ }
+ }
+}
+
+static void update_seg_probs(VP10_COMP *cpi, vp10_writer *w) {
+ VP10_COMMON *cm = &cpi->common;
+
+ if (!cm->seg.enabled || !cm->seg.update_map) return;
+
+ if (cm->seg.temporal_update) {
+ int i;
+
+ for (i = 0; i < PREDICTION_PROBS; i++)
+ vp10_cond_prob_diff_update(w, &cm->fc->seg.pred_probs[i],
+ cm->counts.seg.pred[i]);
+
+ prob_diff_update(vp10_segment_tree, cm->fc->seg.tree_probs,
+ cm->counts.seg.tree_mispred, MAX_SEGMENTS, w);
+ } else {
+ prob_diff_update(vp10_segment_tree, cm->fc->seg.tree_probs,
+ cm->counts.seg.tree_total, MAX_SEGMENTS, w);
+ }
+}
+
+static void write_txfm_mode(TX_MODE mode, struct vpx_write_bit_buffer *wb) {
+ vpx_wb_write_bit(wb, mode == TX_MODE_SELECT);
+ if (mode != TX_MODE_SELECT) vpx_wb_write_literal(wb, mode, 2);
+}
+
+static void update_txfm_probs(VP10_COMMON *cm, vp10_writer *w,
+ FRAME_COUNTS *counts) {
+ if (cm->tx_mode == TX_MODE_SELECT) {
+ int i, j;
+ for (i = 0; i < TX_SIZES - 1; ++i)
+ for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
+ prob_diff_update(vp10_tx_size_tree[i], cm->fc->tx_size_probs[i][j],
+ counts->tx_size[i][j], i + 2, w);
+ }
+}
+
+static void write_interp_filter(INTERP_FILTER filter,
+ struct vpx_write_bit_buffer *wb) {
+ vpx_wb_write_bit(wb, filter == SWITCHABLE);
+ if (filter != SWITCHABLE)
+ vpx_wb_write_literal(wb, filter, 2 + CONFIG_EXT_INTERP);
+}
+
+static void fix_interp_filter(VP10_COMMON *cm, FRAME_COUNTS *counts) {
+ if (cm->interp_filter == SWITCHABLE) {
+ // Check to see if only one of the filters is actually used
+ int count[SWITCHABLE_FILTERS];
+ int i, j, c = 0;
+ for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
+ count[i] = 0;
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
+ count[i] += counts->switchable_interp[j][i];
+ c += (count[i] > 0);
+ }
+ if (c == 1) {
+ // Only one filter is used. So set the filter at frame level
+ for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
+ if (count[i]) {
+ cm->interp_filter = i;
+ break;
+ }
+ }
+ }
+ }
+}
+
+static void write_tile_info(const VP10_COMMON *const cm,
+ struct vpx_write_bit_buffer *wb) {
+#if CONFIG_EXT_TILE
+ const int tile_width =
+ ALIGN_POWER_OF_TWO(cm->tile_width, cm->mib_size_log2) >>
+ cm->mib_size_log2;
+ const int tile_height =
+ ALIGN_POWER_OF_TWO(cm->tile_height, cm->mib_size_log2) >>
+ cm->mib_size_log2;
+
+ assert(tile_width > 0);
+ assert(tile_height > 0);
+
+// Write the tile sizes
+#if CONFIG_EXT_PARTITION
+ if (cm->sb_size == BLOCK_128X128) {
+ assert(tile_width <= 32);
+ assert(tile_height <= 32);
+ vpx_wb_write_literal(wb, tile_width - 1, 5);
+ vpx_wb_write_literal(wb, tile_height - 1, 5);
+ } else
+#endif // CONFIG_EXT_PARTITION
+ {
+ assert(tile_width <= 64);
+ assert(tile_height <= 64);
+ vpx_wb_write_literal(wb, tile_width - 1, 6);
+ vpx_wb_write_literal(wb, tile_height - 1, 6);
+ }
+#else
+ int min_log2_tile_cols, max_log2_tile_cols, ones;
+ vp10_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
+
+ // columns
+ ones = cm->log2_tile_cols - min_log2_tile_cols;
+ while (ones--) vpx_wb_write_bit(wb, 1);
+
+ if (cm->log2_tile_cols < max_log2_tile_cols) vpx_wb_write_bit(wb, 0);
+
+ // rows
+ vpx_wb_write_bit(wb, cm->log2_tile_rows != 0);
+ if (cm->log2_tile_rows != 0) vpx_wb_write_bit(wb, cm->log2_tile_rows != 1);
+#endif // CONFIG_EXT_TILE
+}
+
+static int get_refresh_mask(VP10_COMP *cpi) {
+ int refresh_mask = 0;
+
+#if CONFIG_EXT_REFS
+ // NOTE(zoeliu): When LAST_FRAME is to get refreshed, the decoder will be
+ // notified to get LAST3_FRAME refreshed and then the virtual indexes for all
+ // the 3 LAST reference frames will be updated accordingly, i.e.:
+ // (1) The original virtual index for LAST3_FRAME will become the new virtual
+ // index for LAST_FRAME; and
+ // (2) The original virtual indexes for LAST_FRAME and LAST2_FRAME will be
+ // shifted and become the new virtual indexes for LAST2_FRAME and
+ // LAST3_FRAME.
+ refresh_mask |=
+ (cpi->refresh_last_frame << cpi->lst_fb_idxes[LAST_REF_FRAMES - 1]);
+ if (cpi->rc.is_bwd_ref_frame && cpi->num_extra_arfs) {
+ // We have swapped the virtual indices
+ refresh_mask |= (cpi->refresh_bwd_ref_frame << cpi->arf_map[0]);
+ } else {
+ refresh_mask |= (cpi->refresh_bwd_ref_frame << cpi->bwd_fb_idx);
+ }
+#else
+ refresh_mask |= (cpi->refresh_last_frame << cpi->lst_fb_idx);
+#endif // CONFIG_EXT_REFS
+
+ if (vp10_preserve_existing_gf(cpi)) {
+ // We have decided to preserve the previously existing golden frame as our
+ // new ARF frame. However, in the short term we leave it in the GF slot and,
+ // if we're updating the GF with the current decoded frame, we save it
+ // instead to the ARF slot.
+ // Later, in the function vp10_encoder.c:vp10_update_reference_frames() we
+ // will swap gld_fb_idx and alt_fb_idx to achieve our objective. We do it
+ // there so that it can be done outside of the recode loop.
+ // Note: This is highly specific to the use of ARF as a forward reference,
+ // and this needs to be generalized as other uses are implemented
+ // (like RTC/temporal scalability).
+ return refresh_mask | (cpi->refresh_golden_frame << cpi->alt_fb_idx);
+ } else {
+ int arf_idx = cpi->alt_fb_idx;
+#if CONFIG_EXT_REFS
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ arf_idx = cpi->arf_map[gf_group->arf_update_idx[gf_group->index]];
+#else
+ if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ arf_idx = gf_group->arf_update_idx[gf_group->index];
+ }
+#endif // CONFIG_EXT_REFS
+ return refresh_mask | (cpi->refresh_golden_frame << cpi->gld_fb_idx) |
+ (cpi->refresh_alt_ref_frame << arf_idx);
+ }
+}
+
+#if CONFIG_EXT_TILE
+static INLINE int find_identical_tile(
+ const int tile_row, const int tile_col,
+ TileBufferEnc (*const tile_buffers)[1024]) {
+ const MV32 candidate_offset[1] = { { 1, 0 } };
+ const uint8_t *const cur_tile_data =
+ tile_buffers[tile_row][tile_col].data + 4;
+ const unsigned int cur_tile_size = tile_buffers[tile_row][tile_col].size;
+
+ int i;
+
+ if (tile_row == 0) return 0;
+
+ // (TODO: yunqingwang) For now, only above tile is checked and used.
+ // More candidates such as left tile can be added later.
+ for (i = 0; i < 1; i++) {
+ int row_offset = candidate_offset[0].row;
+ int col_offset = candidate_offset[0].col;
+ int row = tile_row - row_offset;
+ int col = tile_col - col_offset;
+ uint8_t tile_hdr;
+ const uint8_t *tile_data;
+ TileBufferEnc *candidate;
+
+ if (row < 0 || col < 0) continue;
+
+ tile_hdr = *(tile_buffers[row][col].data);
+
+ // Read out tcm bit
+ if ((tile_hdr >> 7) == 1) {
+ // The candidate is a copy tile itself
+ row_offset += tile_hdr & 0x7f;
+ row = tile_row - row_offset;
+ }
+
+ candidate = &tile_buffers[row][col];
+
+ if (row_offset >= 128 || candidate->size != cur_tile_size) continue;
+
+ tile_data = candidate->data + 4;
+
+ if (memcmp(tile_data, cur_tile_data, cur_tile_size) != 0) continue;
+
+ // Identical tile found
+ assert(row_offset > 0);
+ return row_offset;
+ }
+
+ // No identical tile found
+ return 0;
+}
+#endif // CONFIG_EXT_TILE
+
+static uint32_t write_tiles(VP10_COMP *const cpi, uint8_t *const dst,
+ unsigned int *max_tile_size,
+ unsigned int *max_tile_col_size) {
+ const VP10_COMMON *const cm = &cpi->common;
+#if CONFIG_ANS
+ struct AnsCoder token_ans;
+#else
+ vp10_writer mode_bc;
+#endif // CONFIG_ANS
+ int tile_row, tile_col;
+ TOKENEXTRA *(*const tok_buffers)[MAX_TILE_COLS] = cpi->tile_tok;
+ TileBufferEnc (*const tile_buffers)[MAX_TILE_COLS] = cpi->tile_buffers;
+ size_t total_size = 0;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
+#if CONFIG_EXT_TILE
+ const int have_tiles = tile_cols * tile_rows > 1;
+#endif // CONFIG_EXT_TILE
+#if CONFIG_ANS
+ BufAnsCoder *buf_ans = &cpi->buf_ans;
+#endif // CONFIG_ANS
+
+ *max_tile_size = 0;
+ *max_tile_col_size = 0;
+
+// All tile size fields are output on 4 bytes. A call to remux_tiles will
+// later compact the data if smaller headers are adequate.
+
+#if CONFIG_EXT_TILE
+ for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+ TileInfo tile_info;
+ const int is_last_col = (tile_col == tile_cols - 1);
+ const size_t col_offset = total_size;
+
+ vp10_tile_set_col(&tile_info, cm, tile_col);
+
+ // The last column does not have a column header
+ if (!is_last_col) total_size += 4;
+
+ for (tile_row = 0; tile_row < tile_rows; tile_row++) {
+ TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
+ unsigned int tile_size;
+ const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col];
+ const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col];
+ const int data_offset = have_tiles ? 4 : 0;
+
+ vp10_tile_set_row(&tile_info, cm, tile_row);
+
+ buf->data = dst + total_size;
+
+ // Is CONFIG_EXT_TILE = 1, every tile in the row has a header,
+ // even for the last one, unless no tiling is used at all.
+ total_size += data_offset;
+#if !CONFIG_ANS
+ vpx_start_encode(&mode_bc, buf->data + data_offset);
+ write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
+ assert(tok == tok_end);
+ vpx_stop_encode(&mode_bc);
+ tile_size = mode_bc.pos;
+#else
+ buf_ans_write_reset(buf_ans);
+ write_modes(cpi, &tile_info, buf_ans, &tok, tok_end);
+ assert(tok == tok_end);
+ ans_write_init(&token_ans, buf->data + data_offset);
+ buf_ans_flush(buf_ans, &token_ans);
+ tile_size = ans_write_end(&token_ans);
+#endif // !CONFIG_ANS
+
+ buf->size = tile_size;
+
+ // Record the maximum tile size we see, so we can compact headers later.
+ *max_tile_size = VPXMAX(*max_tile_size, tile_size);
+
+ if (have_tiles) {
+ // tile header: size of this tile, or copy offset
+ uint32_t tile_header = tile_size;
+
+ // Check if this tile is a copy tile.
+ // Very low chances to have copy tiles on the key frames, so don't
+ // search on key frames to reduce unnecessary search.
+ if (cm->frame_type != KEY_FRAME) {
+ const int idendical_tile_offset =
+ find_identical_tile(tile_row, tile_col, tile_buffers);
+
+ if (idendical_tile_offset > 0) {
+ tile_size = 0;
+ tile_header = idendical_tile_offset | 0x80;
+ tile_header <<= 24;
+ }
+ }
+
+ mem_put_le32(buf->data, tile_header);
+ }
+
+ total_size += tile_size;
+ }
+
+ if (!is_last_col) {
+ size_t col_size = total_size - col_offset - 4;
+ mem_put_le32(dst + col_offset, col_size);
+
+ // If it is not final packing, record the maximum tile column size we see,
+ // otherwise, check if the tile size is out of the range.
+ *max_tile_col_size = VPXMAX(*max_tile_col_size, col_size);
+ }
+ }
+#else
+ for (tile_row = 0; tile_row < tile_rows; tile_row++) {
+ TileInfo tile_info;
+ const int is_last_row = (tile_row == tile_rows - 1);
+
+ vp10_tile_set_row(&tile_info, cm, tile_row);
+
+ for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+ TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
+ const int is_last_col = (tile_col == tile_cols - 1);
+ const int is_last_tile = is_last_col && is_last_row;
+ unsigned int tile_size;
+ const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col];
+ const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col];
+
+ vp10_tile_set_col(&tile_info, cm, tile_col);
+
+ buf->data = dst + total_size;
+
+ // The last tile does not have a header.
+ if (!is_last_tile) total_size += 4;
+
+#if !CONFIG_ANS
+ vpx_start_encode(&mode_bc, dst + total_size);
+ write_modes(cpi, &tile_info, &mode_bc, &tok, tok_end);
+ assert(tok == tok_end);
+ vpx_stop_encode(&mode_bc);
+ tile_size = mode_bc.pos;
+#else
+ buf_ans_write_reset(buf_ans);
+ write_modes(cpi, &tile_info, buf_ans, &tok, tok_end);
+ assert(tok == tok_end);
+ ans_write_init(&token_ans, dst + total_size);
+ buf_ans_flush(buf_ans, &token_ans);
+ tile_size = ans_write_end(&token_ans);
+#endif // !CONFIG_ANS
+
+ assert(tile_size > 0);
+
+ buf->size = tile_size;
+
+ if (!is_last_tile) {
+ *max_tile_size = VPXMAX(*max_tile_size, tile_size);
+ // size of this tile
+ mem_put_le32(buf->data, tile_size);
+ }
+
+ total_size += tile_size;
+ }
+ }
+#endif // CONFIG_EXT_TILE
+ return (uint32_t)total_size;
+}
+
+static void write_render_size(const VP10_COMMON *cm,
+ struct vpx_write_bit_buffer *wb) {
+ const int scaling_active =
+ cm->width != cm->render_width || cm->height != cm->render_height;
+ vpx_wb_write_bit(wb, scaling_active);
+ if (scaling_active) {
+ vpx_wb_write_literal(wb, cm->render_width - 1, 16);
+ vpx_wb_write_literal(wb, cm->render_height - 1, 16);
+ }
+}
+
+static void write_frame_size(const VP10_COMMON *cm,
+ struct vpx_write_bit_buffer *wb) {
+ vpx_wb_write_literal(wb, cm->width - 1, 16);
+ vpx_wb_write_literal(wb, cm->height - 1, 16);
+
+ write_render_size(cm, wb);
+}
+
+static void write_frame_size_with_refs(VP10_COMP *cpi,
+ struct vpx_write_bit_buffer *wb) {
+ VP10_COMMON *const cm = &cpi->common;
+ int found = 0;
+
+ MV_REFERENCE_FRAME ref_frame;
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, ref_frame);
+
+ if (cfg != NULL) {
+ found =
+ cm->width == cfg->y_crop_width && cm->height == cfg->y_crop_height;
+ found &= cm->render_width == cfg->render_width &&
+ cm->render_height == cfg->render_height;
+ }
+ vpx_wb_write_bit(wb, found);
+ if (found) {
+ break;
+ }
+ }
+
+ if (!found) {
+ vpx_wb_write_literal(wb, cm->width - 1, 16);
+ vpx_wb_write_literal(wb, cm->height - 1, 16);
+ write_render_size(cm, wb);
+ }
+}
+
+static void write_sync_code(struct vpx_write_bit_buffer *wb) {
+ vpx_wb_write_literal(wb, VP10_SYNC_CODE_0, 8);
+ vpx_wb_write_literal(wb, VP10_SYNC_CODE_1, 8);
+ vpx_wb_write_literal(wb, VP10_SYNC_CODE_2, 8);
+}
+
+static void write_profile(BITSTREAM_PROFILE profile,
+ struct vpx_write_bit_buffer *wb) {
+ switch (profile) {
+ case PROFILE_0: vpx_wb_write_literal(wb, 0, 2); break;
+ case PROFILE_1: vpx_wb_write_literal(wb, 2, 2); break;
+ case PROFILE_2: vpx_wb_write_literal(wb, 1, 2); break;
+ case PROFILE_3: vpx_wb_write_literal(wb, 6, 3); break;
+ default: assert(0);
+ }
+}
+
+static void write_bitdepth_colorspace_sampling(
+ VP10_COMMON *const cm, struct vpx_write_bit_buffer *wb) {
+ if (cm->profile >= PROFILE_2) {
+ assert(cm->bit_depth > VPX_BITS_8);
+ vpx_wb_write_bit(wb, cm->bit_depth == VPX_BITS_10 ? 0 : 1);
+ }
+ vpx_wb_write_literal(wb, cm->color_space, 3);
+ if (cm->color_space != VPX_CS_SRGB) {
+ // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
+ vpx_wb_write_bit(wb, cm->color_range);
+ if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
+ assert(cm->subsampling_x != 1 || cm->subsampling_y != 1);
+ vpx_wb_write_bit(wb, cm->subsampling_x);
+ vpx_wb_write_bit(wb, cm->subsampling_y);
+ vpx_wb_write_bit(wb, 0); // unused
+ } else {
+ assert(cm->subsampling_x == 1 && cm->subsampling_y == 1);
+ }
+ } else {
+ assert(cm->profile == PROFILE_1 || cm->profile == PROFILE_3);
+ vpx_wb_write_bit(wb, 0); // unused
+ }
+}
+
+static void write_uncompressed_header(VP10_COMP *cpi,
+ struct vpx_write_bit_buffer *wb) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+
+ vpx_wb_write_literal(wb, VPX_FRAME_MARKER, 2);
+
+ write_profile(cm->profile, wb);
+
+#if CONFIG_EXT_REFS
+ // NOTE: By default all coded frames to be used as a reference
+ cm->is_reference_frame = 1;
+
+ if (cm->show_existing_frame) {
+ RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+ const int frame_to_show = cm->ref_frame_map[cpi->existing_fb_idx_to_show];
+
+ if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) {
+ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
+ "Buffer %d does not contain a reconstructed frame",
+ frame_to_show);
+ }
+ ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show);
+
+ vpx_wb_write_bit(wb, 1); // show_existing_frame
+ vpx_wb_write_literal(wb, cpi->existing_fb_idx_to_show, 3);
+
+ return;
+ } else {
+#endif // CONFIG_EXT_REFS
+ vpx_wb_write_bit(wb, 0); // show_existing_frame
+#if CONFIG_EXT_REFS
+ }
+#endif // CONFIG_EXT_REFS
+
+ vpx_wb_write_bit(wb, cm->frame_type);
+ vpx_wb_write_bit(wb, cm->show_frame);
+ vpx_wb_write_bit(wb, cm->error_resilient_mode);
+
+ if (cm->frame_type == KEY_FRAME) {
+ write_sync_code(wb);
+ write_bitdepth_colorspace_sampling(cm, wb);
+ write_frame_size(cm, wb);
+ if (frame_is_intra_only(cm))
+ vpx_wb_write_bit(wb, cm->allow_screen_content_tools);
+ } else {
+ if (!cm->show_frame) vpx_wb_write_bit(wb, cm->intra_only);
+
+ if (!cm->error_resilient_mode) {
+ if (cm->intra_only) {
+ vpx_wb_write_bit(wb,
+ cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL);
+ } else {
+ vpx_wb_write_bit(wb,
+ cm->reset_frame_context != RESET_FRAME_CONTEXT_NONE);
+ if (cm->reset_frame_context != RESET_FRAME_CONTEXT_NONE)
+ vpx_wb_write_bit(wb,
+ cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL);
+ }
+ }
+
+#if CONFIG_EXT_REFS
+ cpi->refresh_frame_mask = get_refresh_mask(cpi);
+#endif // CONFIG_EXT_REFS
+
+ if (cm->intra_only) {
+ write_sync_code(wb);
+ write_bitdepth_colorspace_sampling(cm, wb);
+
+#if CONFIG_EXT_REFS
+ vpx_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES);
+#else
+ vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+#endif // CONFIG_EXT_REFS
+ write_frame_size(cm, wb);
+ } else {
+ MV_REFERENCE_FRAME ref_frame;
+
+#if CONFIG_EXT_REFS
+ vpx_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES);
+#else
+ vpx_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES);
+#endif // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+ if (!cpi->refresh_frame_mask) {
+ // NOTE: "cpi->refresh_frame_mask == 0" indicates that the coded frame
+ // will not be used as a reference
+ cm->is_reference_frame = 0;
+ }
+#endif // CONFIG_EXT_REFS
+
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX);
+ vpx_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame),
+ REF_FRAMES_LOG2);
+ vpx_wb_write_bit(wb, cm->ref_frame_sign_bias[ref_frame]);
+ }
+
+ write_frame_size_with_refs(cpi, wb);
+
+ vpx_wb_write_bit(wb, cm->allow_high_precision_mv);
+
+ fix_interp_filter(cm, cpi->td.counts);
+ write_interp_filter(cm->interp_filter, wb);
+ }
+ }
+
+ if (!cm->error_resilient_mode) {
+ vpx_wb_write_bit(
+ wb, cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_FORWARD);
+ }
+
+ vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2);
+
+ assert(cm->mib_size == num_8x8_blocks_wide_lookup[cm->sb_size]);
+ assert(cm->mib_size == 1 << cm->mib_size_log2);
+#if CONFIG_EXT_PARTITION
+ assert(cm->sb_size == BLOCK_128X128 || cm->sb_size == BLOCK_64X64);
+ vpx_wb_write_bit(wb, cm->sb_size == BLOCK_128X128 ? 1 : 0);
+#else
+ assert(cm->sb_size == BLOCK_64X64);
+#endif // CONFIG_EXT_PARTITION
+
+ encode_loopfilter(cm, wb);
+#if CONFIG_CLPF
+ encode_clpf(cm, wb);
+#endif
+#if CONFIG_DERING
+ encode_dering(cm->dering_level, wb);
+#endif // CONFIG_DERING
+#if CONFIG_LOOP_RESTORATION
+ encode_restoration(cm, wb);
+#endif // CONFIG_LOOP_RESTORATION
+ encode_quantization(cm, wb);
+ encode_segmentation(cm, xd, wb);
+ if (!cm->seg.enabled && xd->lossless[0])
+ cm->tx_mode = TX_4X4;
+ else
+ write_txfm_mode(cm->tx_mode, wb);
+
+ if (cpi->allow_comp_inter_inter) {
+ const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
+ const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE;
+
+ vpx_wb_write_bit(wb, use_hybrid_pred);
+ if (!use_hybrid_pred) vpx_wb_write_bit(wb, use_compound_pred);
+ }
+
+ write_tile_info(cm, wb);
+}
+
+#if CONFIG_GLOBAL_MOTION
+static void write_global_motion_params(Global_Motion_Params *params,
+ vpx_prob *probs, vp10_writer *w) {
+ GLOBAL_MOTION_TYPE gmtype = get_gmtype(params);
+ vp10_write_token(w, vp10_global_motion_types_tree, probs,
+ &global_motion_types_encodings[gmtype]);
+ switch (gmtype) {
+ case GLOBAL_ZERO: break;
+ case GLOBAL_AFFINE:
+ vp10_write_primitive_symmetric(
+ w, params->motion_params.wmmat[4] >> GM_ALPHA_PREC_DIFF,
+ GM_ABS_ALPHA_BITS);
+ vp10_write_primitive_symmetric(
+ w, (params->motion_params.wmmat[5] >> GM_ALPHA_PREC_DIFF) -
+ (1 << GM_ALPHA_PREC_BITS),
+ GM_ABS_ALPHA_BITS);
+ // fallthrough intended
+ case GLOBAL_ROTZOOM:
+ vp10_write_primitive_symmetric(
+ w, (params->motion_params.wmmat[2] >> GM_ALPHA_PREC_DIFF) -
+ (1 << GM_ALPHA_PREC_BITS),
+ GM_ABS_ALPHA_BITS);
+ vp10_write_primitive_symmetric(
+ w, params->motion_params.wmmat[3] >> GM_ALPHA_PREC_DIFF,
+ GM_ABS_ALPHA_BITS);
+ // fallthrough intended
+ case GLOBAL_TRANSLATION:
+ vp10_write_primitive_symmetric(
+ w, params->motion_params.wmmat[0] >> GM_TRANS_PREC_DIFF,
+ GM_ABS_TRANS_BITS);
+ vp10_write_primitive_symmetric(
+ w, params->motion_params.wmmat[1] >> GM_TRANS_PREC_DIFF,
+ GM_ABS_TRANS_BITS);
+ break;
+ default: assert(0);
+ }
+}
+
+static void write_global_motion(VP10_COMP *cpi, vp10_writer *w) {
+ VP10_COMMON *const cm = &cpi->common;
+ int frame;
+ for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
+ if (!cpi->global_motion_used[frame]) {
+ memset(&cm->global_motion[frame], 0, sizeof(*cm->global_motion));
+ }
+ write_global_motion_params(&cm->global_motion[frame],
+ cm->fc->global_motion_types_prob, w);
+ }
+}
+#endif
+
+static uint32_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) {
+ VP10_COMMON *const cm = &cpi->common;
+#if CONFIG_SUPERTX
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+#endif // CONFIG_SUPERTX
+ FRAME_CONTEXT *const fc = cm->fc;
+ FRAME_COUNTS *counts = cpi->td.counts;
+ vp10_writer *header_bc;
+ int i, j;
+
+#if CONFIG_ANS
+ struct AnsCoder header_ans;
+ int header_size;
+ header_bc = &cpi->buf_ans;
+ buf_ans_write_reset(header_bc);
+#else
+ vp10_writer real_header_bc;
+ header_bc = &real_header_bc;
+ vpx_start_encode(header_bc, data);
+#endif
+ update_txfm_probs(cm, header_bc, counts);
+ update_coef_probs(cpi, header_bc);
+
+#if CONFIG_VAR_TX
+ update_txfm_partition_probs(cm, header_bc, counts);
+#endif
+
+ update_skip_probs(cm, header_bc, counts);
+ update_seg_probs(cpi, header_bc);
+
+ for (i = 0; i < INTRA_MODES; ++i)
+ prob_diff_update(vp10_intra_mode_tree, fc->uv_mode_prob[i],
+ counts->uv_mode[i], INTRA_MODES, header_bc);
+
+#if CONFIG_EXT_PARTITION_TYPES
+ prob_diff_update(vp10_partition_tree, fc->partition_prob[0],
+ counts->partition[0], PARTITION_TYPES, header_bc);
+ for (i = 1; i < PARTITION_CONTEXTS; ++i)
+ prob_diff_update(vp10_ext_partition_tree, fc->partition_prob[i],
+ counts->partition[i], EXT_PARTITION_TYPES, header_bc);
+#else
+ for (i = 0; i < PARTITION_CONTEXTS; ++i)
+ prob_diff_update(vp10_partition_tree, fc->partition_prob[i],
+ counts->partition[i], PARTITION_TYPES, header_bc);
+#endif // CONFIG_EXT_PARTITION_TYPES
+
+#if CONFIG_EXT_INTRA
+ for (i = 0; i < INTRA_FILTERS + 1; ++i)
+ prob_diff_update(vp10_intra_filter_tree, fc->intra_filter_probs[i],
+ counts->intra_filter[i], INTRA_FILTERS, header_bc);
+#endif // CONFIG_EXT_INTRA
+
+ if (frame_is_intra_only(cm)) {
+ vp10_copy(cm->kf_y_prob, vp10_kf_y_mode_prob);
+ for (i = 0; i < INTRA_MODES; ++i)
+ for (j = 0; j < INTRA_MODES; ++j)
+ prob_diff_update(vp10_intra_mode_tree, cm->kf_y_prob[i][j],
+ counts->kf_y_mode[i][j], INTRA_MODES, header_bc);
+ } else {
+#if CONFIG_REF_MV
+ update_inter_mode_probs(cm, header_bc, counts);
+#else
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+ prob_diff_update(vp10_inter_mode_tree, cm->fc->inter_mode_probs[i],
+ counts->inter_mode[i], INTER_MODES, header_bc);
+#endif
+
+#if CONFIG_EXT_INTER
+ update_inter_compound_mode_probs(cm, header_bc);
+
+ if (cm->reference_mode != COMPOUND_REFERENCE) {
+ for (i = 0; i < BLOCK_SIZE_GROUPS; i++) {
+ if (is_interintra_allowed_bsize_group(i)) {
+ vp10_cond_prob_diff_update(header_bc, &fc->interintra_prob[i],
+ cm->counts.interintra[i]);
+ }
+ }
+ for (i = 0; i < BLOCK_SIZE_GROUPS; i++) {
+ prob_diff_update(
+ vp10_interintra_mode_tree, cm->fc->interintra_mode_prob[i],
+ counts->interintra_mode[i], INTERINTRA_MODES, header_bc);
+ }
+ for (i = 0; i < BLOCK_SIZES; i++) {
+ if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i))
+ vp10_cond_prob_diff_update(header_bc, &fc->wedge_interintra_prob[i],
+ cm->counts.wedge_interintra[i]);
+ }
+ }
+ if (cm->reference_mode != SINGLE_REFERENCE) {
+ for (i = 0; i < BLOCK_SIZES; i++)
+ if (is_interinter_wedge_used(i))
+ vp10_cond_prob_diff_update(header_bc, &fc->wedge_interinter_prob[i],
+ cm->counts.wedge_interinter[i]);
+ }
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ for (i = BLOCK_8X8; i < BLOCK_SIZES; ++i)
+ prob_diff_update(vp10_motvar_tree, fc->motvar_prob[i], counts->motvar[i],
+ MOTION_VARIATIONS, header_bc);
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+ if (cm->interp_filter == SWITCHABLE)
+ update_switchable_interp_probs(cm, header_bc, counts);
+
+ for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
+ vp10_cond_prob_diff_update(header_bc, &fc->intra_inter_prob[i],
+ counts->intra_inter[i]);
+
+ if (cpi->allow_comp_inter_inter) {
+ const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT;
+ if (use_hybrid_pred)
+ for (i = 0; i < COMP_INTER_CONTEXTS; i++)
+ vp10_cond_prob_diff_update(header_bc, &fc->comp_inter_prob[i],
+ counts->comp_inter[i]);
+ }
+
+ if (cm->reference_mode != COMPOUND_REFERENCE) {
+ for (i = 0; i < REF_CONTEXTS; i++) {
+ for (j = 0; j < (SINGLE_REFS - 1); j++) {
+ vp10_cond_prob_diff_update(header_bc, &fc->single_ref_prob[i][j],
+ counts->single_ref[i][j]);
+ }
+ }
+ }
+
+ if (cm->reference_mode != SINGLE_REFERENCE) {
+ for (i = 0; i < REF_CONTEXTS; i++) {
+#if CONFIG_EXT_REFS
+ for (j = 0; j < (FWD_REFS - 1); j++) {
+ vp10_cond_prob_diff_update(header_bc, &fc->comp_ref_prob[i][j],
+ counts->comp_ref[i][j]);
+ }
+ for (j = 0; j < (BWD_REFS - 1); j++) {
+ vp10_cond_prob_diff_update(header_bc, &fc->comp_bwdref_prob[i][j],
+ counts->comp_bwdref[i][j]);
+ }
+#else
+ for (j = 0; j < (COMP_REFS - 1); j++) {
+ vp10_cond_prob_diff_update(header_bc, &fc->comp_ref_prob[i][j],
+ counts->comp_ref[i][j]);
+ }
+#endif // CONFIG_EXT_REFS
+ }
+ }
+
+ for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
+ prob_diff_update(vp10_intra_mode_tree, cm->fc->y_mode_prob[i],
+ counts->y_mode[i], INTRA_MODES, header_bc);
+
+ vp10_write_nmv_probs(cm, cm->allow_high_precision_mv, header_bc,
+#if CONFIG_REF_MV
+ counts->mv);
+#else
+ &counts->mv);
+#endif
+ update_ext_tx_probs(cm, header_bc);
+#if CONFIG_SUPERTX
+ if (!xd->lossless[0]) update_supertx_probs(cm, header_bc);
+#endif // CONFIG_SUPERTX
+ }
+#if CONFIG_GLOBAL_MOTION
+ write_global_motion(cpi, header_bc);
+#endif // CONFIG_GLOBAL_MOTION
+#if CONFIG_ANS
+ ans_write_init(&header_ans, data);
+ buf_ans_flush(header_bc, &header_ans);
+ header_size = ans_write_end(&header_ans);
+ assert(header_size <= 0xffff);
+ return header_size;
+#else
+ vpx_stop_encode(header_bc);
+ assert(header_bc->pos <= 0xffff);
+ return header_bc->pos;
+#endif // CONFIG_ANS
+}
+
+static int choose_size_bytes(uint32_t size, int spare_msbs) {
+ // Choose the number of bytes required to represent size, without
+ // using the 'spare_msbs' number of most significant bits.
+
+ // Make sure we will fit in 4 bytes to start with..
+ if (spare_msbs > 0 && size >> (32 - spare_msbs) != 0) return -1;
+
+ // Normalise to 32 bits
+ size <<= spare_msbs;
+
+ if (size >> 24 != 0)
+ return 4;
+ else if (size >> 16 != 0)
+ return 3;
+ else if (size >> 8 != 0)
+ return 2;
+ else
+ return 1;
+}
+
+static void mem_put_varsize(uint8_t *const dst, const int sz, const int val) {
+ switch (sz) {
+ case 1: dst[0] = (uint8_t)(val & 0xff); break;
+ case 2: mem_put_le16(dst, val); break;
+ case 3: mem_put_le24(dst, val); break;
+ case 4: mem_put_le32(dst, val); break;
+ default: assert("Invalid size" && 0); break;
+ }
+}
+
+static int remux_tiles(const VP10_COMMON *const cm, uint8_t *dst,
+ const uint32_t data_size, const uint32_t max_tile_size,
+ const uint32_t max_tile_col_size,
+ int *const tile_size_bytes,
+ int *const tile_col_size_bytes) {
+// Choose the tile size bytes (tsb) and tile column size bytes (tcsb)
+#if CONFIG_EXT_TILE
+ // The top bit in the tile size field indicates tile copy mode, so we
+ // have 1 less bit to code the tile size
+ const int tsb = choose_size_bytes(max_tile_size, 1);
+ const int tcsb = choose_size_bytes(max_tile_col_size, 0);
+#else
+ const int tsb = choose_size_bytes(max_tile_size, 0);
+ const int tcsb = 4; // This is ignored
+ (void)max_tile_col_size;
+#endif // CONFIG_EXT_TILE
+
+ assert(tsb > 0);
+ assert(tcsb > 0);
+
+ *tile_size_bytes = tsb;
+ *tile_col_size_bytes = tcsb;
+
+ if (tsb == 4 && tcsb == 4) {
+ return data_size;
+ } else {
+ uint32_t wpos = 0;
+ uint32_t rpos = 0;
+
+#if CONFIG_EXT_TILE
+ int tile_row;
+ int tile_col;
+
+ for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) {
+ // All but the last column has a column header
+ if (tile_col < cm->tile_cols - 1) {
+ uint32_t tile_col_size = mem_get_le32(dst + rpos);
+ rpos += 4;
+
+ // Adjust the tile column size by the number of bytes removed
+ // from the tile size fields.
+ tile_col_size -= (4 - tsb) * cm->tile_rows;
+
+ mem_put_varsize(dst + wpos, tcsb, tile_col_size);
+ wpos += tcsb;
+ }
+
+ for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
+ // All, including the last row has a header
+ uint32_t tile_header = mem_get_le32(dst + rpos);
+ rpos += 4;
+
+ // If this is a copy tile, we need to shift the MSB to the
+ // top bit of the new width, and there is no data to copy.
+ if (tile_header >> 31 != 0) {
+ if (tsb < 4) tile_header >>= 32 - 8 * tsb;
+ mem_put_varsize(dst + wpos, tsb, tile_header);
+ wpos += tsb;
+ } else {
+ mem_put_varsize(dst + wpos, tsb, tile_header);
+ wpos += tsb;
+
+ memmove(dst + wpos, dst + rpos, tile_header);
+ rpos += tile_header;
+ wpos += tile_header;
+ }
+ }
+ }
+#else
+ const int n_tiles = cm->tile_cols * cm->tile_rows;
+ int n;
+
+ for (n = 0; n < n_tiles; n++) {
+ int tile_size;
+
+ if (n == n_tiles - 1) {
+ tile_size = data_size - rpos;
+ } else {
+ tile_size = mem_get_le32(dst + rpos);
+ rpos += 4;
+ mem_put_varsize(dst + wpos, tsb, tile_size);
+ wpos += tsb;
+ }
+
+ memmove(dst + wpos, dst + rpos, tile_size);
+
+ rpos += tile_size;
+ wpos += tile_size;
+ }
+#endif // CONFIG_EXT_TILE
+
+ assert(rpos > wpos);
+ assert(rpos == data_size);
+
+ return wpos;
+ }
+}
+
+void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dst, size_t *size) {
+ uint8_t *data = dst;
+ uint32_t compressed_header_size;
+ uint32_t uncompressed_header_size;
+ uint32_t data_size;
+ struct vpx_write_bit_buffer wb = { data, 0 };
+ struct vpx_write_bit_buffer saved_wb;
+ unsigned int max_tile_size;
+ unsigned int max_tile_col_size;
+ int tile_size_bytes;
+ int tile_col_size_bytes;
+
+ VP10_COMMON *const cm = &cpi->common;
+ const int have_tiles = cm->tile_cols * cm->tile_rows > 1;
+
+#if CONFIG_BITSTREAM_DEBUG
+ bitstream_queue_reset_write();
+#endif
+
+ // Write the uncompressed header
+ write_uncompressed_header(cpi, &wb);
+
+#if CONFIG_EXT_REFS
+ if (cm->show_existing_frame) {
+ *size = vpx_wb_bytes_written(&wb);
+ return;
+ }
+#endif // CONFIG_EXT_REFS
+
+ // We do not know these in advance. Output placeholder bit.
+ saved_wb = wb;
+ // Write tile size magnitudes
+ if (have_tiles) {
+// Note that the last item in the uncompressed header is the data
+// describing tile configuration.
+#if CONFIG_EXT_TILE
+ // Number of bytes in tile column size - 1
+ vpx_wb_write_literal(&wb, 0, 2);
+#endif // CONFIG_EXT_TILE
+ // Number of bytes in tile size - 1
+ vpx_wb_write_literal(&wb, 0, 2);
+ }
+ // Size of compressed header
+ vpx_wb_write_literal(&wb, 0, 16);
+
+ uncompressed_header_size = (uint32_t)vpx_wb_bytes_written(&wb);
+ data += uncompressed_header_size;
+
+ vpx_clear_system_state();
+
+ // Write the compressed header
+ compressed_header_size = write_compressed_header(cpi, data);
+ data += compressed_header_size;
+
+ // Write the encoded tile data
+ data_size = write_tiles(cpi, data, &max_tile_size, &max_tile_col_size);
+
+ if (have_tiles) {
+ data_size =
+ remux_tiles(cm, data, data_size, max_tile_size, max_tile_col_size,
+ &tile_size_bytes, &tile_col_size_bytes);
+ }
+
+ data += data_size;
+
+ // Now fill in the gaps in the uncompressed header.
+ if (have_tiles) {
+#if CONFIG_EXT_TILE
+ assert(tile_col_size_bytes >= 1 && tile_col_size_bytes <= 4);
+ vpx_wb_write_literal(&saved_wb, tile_col_size_bytes - 1, 2);
+#endif // CONFIG_EXT_TILE
+ assert(tile_size_bytes >= 1 && tile_size_bytes <= 4);
+ vpx_wb_write_literal(&saved_wb, tile_size_bytes - 1, 2);
+ }
+ // TODO(jbb): Figure out what to do if compressed_header_size > 16 bits.
+ assert(compressed_header_size <= 0xffff);
+ vpx_wb_write_literal(&saved_wb, compressed_header_size, 16);
+
+ *size = data - dst;
+}
diff --git a/av1/encoder/bitstream.h b/av1/encoder/bitstream.h
new file mode 100644
index 0000000..01d2c8d
--- /dev/null
+++ b/av1/encoder/bitstream.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_BITSTREAM_H_
+#define VP10_ENCODER_BITSTREAM_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "av1/encoder/encoder.h"
+
+void vp10_pack_bitstream(VP10_COMP *const cpi, uint8_t *dest, size_t *size);
+
+void vp10_encode_token_init(void);
+
+static INLINE int vp10_preserve_existing_gf(VP10_COMP *cpi) {
+#if CONFIG_EXT_REFS
+ // Do not swap gf and arf indices for internal overlay frames
+ return !cpi->multi_arf_allowed && cpi->rc.is_src_frame_alt_ref &&
+ !cpi->rc.is_src_frame_ext_arf;
+#else
+ return !cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
+ cpi->rc.is_src_frame_alt_ref;
+#endif // CONFIG_EXT_REFS
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_BITSTREAM_H_
diff --git a/av1/encoder/bitwriter.h b/av1/encoder/bitwriter.h
new file mode 100644
index 0000000..8cc674b
--- /dev/null
+++ b/av1/encoder/bitwriter.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* The purpose of this header is to provide compile time pluggable bit writer
+ * implementations with a common interface. */
+
+#ifndef VPX10_ENCODER_BITWRITER_H_
+#define VPX10_ENCODER_BITWRITER_H_
+
+#include "./vpx_config.h"
+#include "aom_dsp/prob.h"
+
+#if CONFIG_ANS
+typedef struct BufAnsCoder BufAnsCoder;
+#include "av1/encoder/buf_ans.h"
+#define vp10_writer BufAnsCoder
+#define vp10_write buf_uabs_write
+#define vp10_write_bit buf_uabs_write_bit
+#define vp10_write_literal buf_uabs_write_literal
+#else
+#include "aom_dsp/bitwriter.h"
+#define vp10_writer vpx_writer
+#define vp10_write vpx_write
+#define vp10_write_bit vpx_write_bit
+#define vp10_write_literal vpx_write_literal
+#endif
+
+#endif // VPX10_ENCODER_BITWRITER_H_
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
new file mode 100644
index 0000000..65bb1e2
--- /dev/null
+++ b/av1/encoder/block.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_BLOCK_H_
+#define VP10_ENCODER_BLOCK_H_
+
+#include "av1/common/entropymv.h"
+#include "av1/common/entropy.h"
+#if CONFIG_REF_MV
+#include "av1/common/mvref_common.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ unsigned int sse;
+ int sum;
+ unsigned int var;
+} diff;
+
+typedef struct macroblock_plane {
+ DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]);
+ tran_low_t *qcoeff;
+ tran_low_t *coeff;
+ uint16_t *eobs;
+ struct buf_2d src;
+
+ // Quantizer setings
+ const int16_t *quant_fp;
+ const int16_t *round_fp;
+ const int16_t *quant;
+ const int16_t *quant_shift;
+ const int16_t *zbin;
+ const int16_t *round;
+#if CONFIG_NEW_QUANT
+ const cuml_bins_type_nuq *cuml_bins_nuq[QUANT_PROFILES];
+#endif // CONFIG_NEW_QUANT
+
+ int64_t quant_thred[2];
+} MACROBLOCK_PLANE;
+
+/* The [2] dimension is for whether we skip the EOB node (i.e. if previous
+ * coefficient in this block was zero) or not. */
+typedef unsigned int vp10_coeff_cost[PLANE_TYPES][REF_TYPES][COEF_BANDS][2]
+ [COEFF_CONTEXTS][ENTROPY_TOKENS];
+
+typedef struct {
+ int_mv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+ int16_t mode_context[MODE_CTX_REF_FRAMES];
+#if CONFIG_REF_MV
+ uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
+ CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
+#if CONFIG_EXT_INTER
+ int16_t compound_mode_context[MODE_CTX_REF_FRAMES];
+#endif // CONFIG_EXT_INTER
+#endif
+} MB_MODE_INFO_EXT;
+
+typedef struct {
+ uint8_t best_palette_color_map[MAX_SB_SQUARE];
+ float kmeans_data_buf[2 * MAX_SB_SQUARE];
+} PALETTE_BUFFER;
+
+typedef struct macroblock MACROBLOCK;
+struct macroblock {
+ struct macroblock_plane plane[MAX_MB_PLANE];
+
+ MACROBLOCKD e_mbd;
+ MB_MODE_INFO_EXT *mbmi_ext;
+ int skip_block;
+ int select_tx_size;
+ int skip_optimize;
+ int q_index;
+
+ // The equivalent error at the current rdmult of one whole bit (not one
+ // bitcost unit).
+ int errorperbit;
+ // The equivalend SAD error of one (whole) bit at the current quantizer
+ // for large blocks.
+ int sadperbit16;
+ // The equivalend SAD error of one (whole) bit at the current quantizer
+ // for sub-8x8 blocks.
+ int sadperbit4;
+ int rddiv;
+ int rdmult;
+ int mb_energy;
+ int *m_search_count_ptr;
+ int *ex_search_count_ptr;
+
+ // These are set to their default values at the beginning, and then adjusted
+ // further in the encoding process.
+ BLOCK_SIZE min_partition_size;
+ BLOCK_SIZE max_partition_size;
+
+ int mv_best_ref_index[TOTAL_REFS_PER_FRAME];
+ unsigned int max_mv_context[TOTAL_REFS_PER_FRAME];
+ unsigned int source_variance;
+ unsigned int recon_variance;
+ unsigned int pred_sse[TOTAL_REFS_PER_FRAME];
+ int pred_mv_sad[TOTAL_REFS_PER_FRAME];
+
+#if CONFIG_REF_MV
+ int *nmvjointcost;
+ int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
+ int *nmvcost[NMV_CONTEXTS][2];
+ int *nmvcost_hp[NMV_CONTEXTS][2];
+ int **mv_cost_stack[NMV_CONTEXTS];
+ int *nmvjointsadcost;
+ int zero_rmv_cost[NMV_CONTEXTS][2];
+ int comp_rmv_cost[2];
+#else
+ int nmvjointcost[MV_JOINTS];
+ int *nmvcost[2];
+ int *nmvcost_hp[2];
+ int nmvjointsadcost[MV_JOINTS];
+#endif
+
+ int **mvcost;
+ int *nmvsadcost[2];
+ int *nmvsadcost_hp[2];
+ int **mvsadcost;
+
+ PALETTE_BUFFER *palette_buffer;
+
+ // These define limits to motion vector components to prevent them
+ // from extending outside the UMV borders
+ int mv_col_min;
+ int mv_col_max;
+ int mv_row_min;
+ int mv_row_max;
+
+#if CONFIG_VAR_TX
+ uint8_t blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
+#if CONFIG_REF_MV
+ uint8_t blk_skip_drl[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
+#endif
+#endif
+
+ int skip;
+
+ int encode_breakout;
+
+ // note that token_costs is the cost when eob node is skipped
+ vp10_coeff_cost token_costs[TX_SIZES];
+
+ int optimize;
+
+ // indicate if it is in the rd search loop or encoding process
+ int use_lp32x32fdct;
+
+ // Used to store sub partition's choices.
+ MV pred_mv[TOTAL_REFS_PER_FRAME];
+
+ // Store the best motion vector during motion search
+ int_mv best_mv;
+ // Store the second best motion vector during full-pixel motion search
+ int_mv second_best_mv;
+
+ // Strong color activity detection. Used in RTC coding mode to enhance
+ // the visual quality at the boundary of moving color objects.
+ uint8_t color_sensitivity[2];
+
+ // use default transform and skip transform type search for intra modes
+ int use_default_intra_tx_type;
+ // use default transform and skip transform type search for inter modes
+ int use_default_inter_tx_type;
+};
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_BLOCK_H_
diff --git a/av1/encoder/blockiness.c b/av1/encoder/blockiness.c
new file mode 100644
index 0000000..97e201a
--- /dev/null
+++ b/av1/encoder/blockiness.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "av1/common/common.h"
+#include "av1/common/filter.h"
+#include "aom/vpx_integer.h"
+#include "aom_dsp/vpx_convolve.h"
+#include "aom_dsp/vpx_filter.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/system_state.h"
+
+static int horizontal_filter(const uint8_t *s) {
+ return (s[1] - s[-2]) * 2 + (s[-1] - s[0]) * 6;
+}
+
+static int vertical_filter(const uint8_t *s, int p) {
+ return (s[p] - s[-2 * p]) * 2 + (s[-p] - s[0]) * 6;
+}
+
+static int variance(int sum, int sum_squared, int size) {
+ return sum_squared / size - (sum / size) * (sum / size);
+}
+// Calculate a blockiness level for a vertical block edge.
+// This function returns a new blockiness metric that's defined as
+
+// p0 p1 p2 p3
+// q0 q1 q2 q3
+// block edge ->
+// r0 r1 r2 r3
+// s0 s1 s2 s3
+
+// blockiness = p0*-2+q0*6+r0*-6+s0*2 +
+// p1*-2+q1*6+r1*-6+s1*2 +
+// p2*-2+q2*6+r2*-6+s2*2 +
+// p3*-2+q3*6+r3*-6+s3*2 ;
+
+// reconstructed_blockiness = abs(blockiness from reconstructed buffer -
+// blockiness from source buffer,0)
+//
+// I make the assumption that flat blocks are much more visible than high
+// contrast blocks. As such, I scale the result of the blockiness calc
+// by dividing the blockiness by the variance of the pixels on either side
+// of the edge as follows:
+// var_0 = (q0^2+q1^2+q2^2+q3^2) - ((q0 + q1 + q2 + q3) / 4 )^2
+// var_1 = (r0^2+r1^2+r2^2+r3^2) - ((r0 + r1 + r2 + r3) / 4 )^2
+// The returned blockiness is the scaled value
+// Reconstructed blockiness / ( 1 + var_0 + var_1 ) ;
+static int blockiness_vertical(const uint8_t *s, int sp, const uint8_t *r,
+ int rp, int size) {
+ int s_blockiness = 0;
+ int r_blockiness = 0;
+ int sum_0 = 0;
+ int sum_sq_0 = 0;
+ int sum_1 = 0;
+ int sum_sq_1 = 0;
+ int i;
+ int var_0;
+ int var_1;
+ for (i = 0; i < size; ++i, s += sp, r += rp) {
+ s_blockiness += horizontal_filter(s);
+ r_blockiness += horizontal_filter(r);
+ sum_0 += s[0];
+ sum_sq_0 += s[0] * s[0];
+ sum_1 += s[-1];
+ sum_sq_1 += s[-1] * s[-1];
+ }
+ var_0 = variance(sum_0, sum_sq_0, size);
+ var_1 = variance(sum_1, sum_sq_1, size);
+ r_blockiness = abs(r_blockiness);
+ s_blockiness = abs(s_blockiness);
+
+ if (r_blockiness > s_blockiness)
+ return (r_blockiness - s_blockiness) / (1 + var_0 + var_1);
+ else
+ return 0;
+}
+
+// Calculate a blockiness level for a horizontal block edge
+// same as above.
+static int blockiness_horizontal(const uint8_t *s, int sp, const uint8_t *r,
+ int rp, int size) {
+ int s_blockiness = 0;
+ int r_blockiness = 0;
+ int sum_0 = 0;
+ int sum_sq_0 = 0;
+ int sum_1 = 0;
+ int sum_sq_1 = 0;
+ int i;
+ int var_0;
+ int var_1;
+ for (i = 0; i < size; ++i, ++s, ++r) {
+ s_blockiness += vertical_filter(s, sp);
+ r_blockiness += vertical_filter(r, rp);
+ sum_0 += s[0];
+ sum_sq_0 += s[0] * s[0];
+ sum_1 += s[-sp];
+ sum_sq_1 += s[-sp] * s[-sp];
+ }
+ var_0 = variance(sum_0, sum_sq_0, size);
+ var_1 = variance(sum_1, sum_sq_1, size);
+ r_blockiness = abs(r_blockiness);
+ s_blockiness = abs(s_blockiness);
+
+ if (r_blockiness > s_blockiness)
+ return (r_blockiness - s_blockiness) / (1 + var_0 + var_1);
+ else
+ return 0;
+}
+
+// This function returns the blockiness for the entire frame currently by
+// looking at all borders in steps of 4.
+double vp10_get_blockiness(const unsigned char *img1, int img1_pitch,
+ const unsigned char *img2, int img2_pitch, int width,
+ int height) {
+ double blockiness = 0;
+ int i, j;
+ vpx_clear_system_state();
+ for (i = 0; i < height;
+ i += 4, img1 += img1_pitch * 4, img2 += img2_pitch * 4) {
+ for (j = 0; j < width; j += 4) {
+ if (i > 0 && i < height && j > 0 && j < width) {
+ blockiness +=
+ blockiness_vertical(img1 + j, img1_pitch, img2 + j, img2_pitch, 4);
+ blockiness += blockiness_horizontal(img1 + j, img1_pitch, img2 + j,
+ img2_pitch, 4);
+ }
+ }
+ }
+ blockiness /= width * height / 16;
+ return blockiness;
+}
diff --git a/av1/encoder/buf_ans.c b/av1/encoder/buf_ans.c
new file mode 100644
index 0000000..f87c1e1
--- /dev/null
+++ b/av1/encoder/buf_ans.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string.h>
+
+#include "av1/common/common.h"
+#include "av1/encoder/buf_ans.h"
+#include "av1/encoder/encoder.h"
+#include "aom_mem/vpx_mem.h"
+
+void vp10_buf_ans_alloc(struct BufAnsCoder *c, struct VP10Common *cm,
+ int size_hint) {
+ c->cm = cm;
+ c->size = size_hint;
+ CHECK_MEM_ERROR(cm, c->buf, vpx_malloc(c->size * sizeof(*c->buf)));
+ // Initialize to overfull to trigger the assert in write.
+ c->offset = c->size + 1;
+}
+
+void vp10_buf_ans_free(struct BufAnsCoder *c) {
+ vpx_free(c->buf);
+ c->buf = NULL;
+ c->size = 0;
+}
+
+void vp10_buf_ans_grow(struct BufAnsCoder *c) {
+ struct buffered_ans_symbol *new_buf = NULL;
+ int new_size = c->size * 2;
+ CHECK_MEM_ERROR(c->cm, new_buf, vpx_malloc(new_size * sizeof(*new_buf)));
+ memcpy(new_buf, c->buf, c->size * sizeof(*c->buf));
+ vpx_free(c->buf);
+ c->buf = new_buf;
+ c->size = new_size;
+}
diff --git a/av1/encoder/buf_ans.h b/av1/encoder/buf_ans.h
new file mode 100644
index 0000000..8a88c32
--- /dev/null
+++ b/av1/encoder/buf_ans.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_BUF_ANS_H_
+#define VP10_ENCODER_BUF_ANS_H_
+// Buffered forward ANS writer.
+// Symbols are written to the writer in forward (decode) order and serialzed
+// backwards due to ANS's stack like behavior.
+
+#include <assert.h>
+#include "./vpx_config.h"
+#include "aom/vpx_integer.h"
+#include "av1/common/ans.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+#define ANS_METHOD_UABS 0
+#define ANS_METHOD_RANS 1
+
+struct buffered_ans_symbol {
+ uint8_t method; // one of ANS_METHOD_UABS or ANS_METHOD_RANS
+ // TODO(aconverse): Should be possible to write this interms of start for ABS
+ AnsP10 val_start; // Boolean value for ABS, start in symbol cycle for Rans
+ AnsP10 prob; // Probability of this symbol
+};
+
+struct BufAnsCoder {
+ struct VP10Common *cm;
+ struct buffered_ans_symbol *buf;
+ int size;
+ int offset;
+};
+
+void vp10_buf_ans_alloc(struct BufAnsCoder *c, struct VP10Common *cm,
+ int size_hint);
+
+void vp10_buf_ans_free(struct BufAnsCoder *c);
+
+void vp10_buf_ans_grow(struct BufAnsCoder *c);
+
+static INLINE void buf_ans_write_reset(struct BufAnsCoder *const c) {
+ c->offset = 0;
+}
+
+static INLINE void buf_uabs_write(struct BufAnsCoder *const c, uint8_t val,
+ AnsP8 prob) {
+ assert(c->offset <= c->size);
+ if (c->offset == c->size) {
+ vp10_buf_ans_grow(c);
+ }
+ c->buf[c->offset].method = ANS_METHOD_UABS;
+ c->buf[c->offset].val_start = val;
+ c->buf[c->offset].prob = prob;
+ ++c->offset;
+}
+
+static INLINE void buf_rans_write(struct BufAnsCoder *const c,
+ const struct rans_sym *const sym) {
+ assert(c->offset <= c->size);
+ if (c->offset == c->size) {
+ vp10_buf_ans_grow(c);
+ }
+ c->buf[c->offset].method = ANS_METHOD_RANS;
+ c->buf[c->offset].val_start = sym->cum_prob;
+ c->buf[c->offset].prob = sym->prob;
+ ++c->offset;
+}
+
+static INLINE void buf_ans_flush(const struct BufAnsCoder *const c,
+ struct AnsCoder *ans) {
+ int offset;
+ for (offset = c->offset - 1; offset >= 0; --offset) {
+ if (c->buf[offset].method == ANS_METHOD_RANS) {
+ struct rans_sym sym;
+ sym.prob = c->buf[offset].prob;
+ sym.cum_prob = c->buf[offset].val_start;
+ rans_write(ans, &sym);
+ } else {
+ uabs_write(ans, (uint8_t)c->buf[offset].val_start,
+ (AnsP8)c->buf[offset].prob);
+ }
+ }
+}
+
+static INLINE void buf_uabs_write_bit(struct BufAnsCoder *c, int bit) {
+ buf_uabs_write(c, bit, 128);
+}
+
+static INLINE void buf_uabs_write_literal(struct BufAnsCoder *c, int literal,
+ int bits) {
+ int bit;
+
+ assert(bits < 31);
+ for (bit = bits - 1; bit >= 0; bit--)
+ buf_uabs_write_bit(c, 1 & (literal >> bit));
+}
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+#endif // VP10_ENCODER_BUF_ANS_H_
diff --git a/av1/encoder/context_tree.c b/av1/encoder/context_tree.c
new file mode 100644
index 0000000..9346e1c
--- /dev/null
+++ b/av1/encoder/context_tree.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/encoder/context_tree.h"
+#include "av1/encoder/encoder.h"
+
+static const BLOCK_SIZE square[MAX_SB_SIZE_LOG2 - 2] = {
+ BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64,
+#if CONFIG_EXT_PARTITION
+ BLOCK_128X128,
+#endif // CONFIG_EXT_PARTITION
+};
+
+static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition,
+#endif
+ PICK_MODE_CONTEXT *ctx) {
+ const int num_blk = (num_4x4_blk < 4 ? 4 : num_4x4_blk);
+ const int num_pix = num_blk << 4;
+ int i, k;
+ ctx->num_4x4_blk = num_blk;
+#if CONFIG_EXT_PARTITION_TYPES
+ ctx->partition = partition;
+#endif
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+#if CONFIG_VAR_TX
+ CHECK_MEM_ERROR(cm, ctx->blk_skip[i], vpx_calloc(num_blk, sizeof(uint8_t)));
+#endif
+ for (k = 0; k < 3; ++k) {
+ CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
+ vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k])));
+ CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
+ vpx_memalign(32, num_pix * sizeof(*ctx->qcoeff[i][k])));
+ CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
+ vpx_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k])));
+ CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
+ vpx_memalign(32, num_blk * sizeof(*ctx->eobs[i][k])));
+ }
+ }
+
+ if (cm->allow_screen_content_tools) {
+ for (i = 0; i < 2; ++i) {
+ CHECK_MEM_ERROR(
+ cm, ctx->color_index_map[i],
+ vpx_memalign(32, num_pix * sizeof(*ctx->color_index_map[i])));
+ }
+ }
+}
+
+static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
+ int i, k;
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+#if CONFIG_VAR_TX
+ vpx_free(ctx->blk_skip[i]);
+ ctx->blk_skip[i] = 0;
+#endif
+ for (k = 0; k < 3; ++k) {
+ vpx_free(ctx->coeff[i][k]);
+ ctx->coeff[i][k] = 0;
+ vpx_free(ctx->qcoeff[i][k]);
+ ctx->qcoeff[i][k] = 0;
+ vpx_free(ctx->dqcoeff[i][k]);
+ ctx->dqcoeff[i][k] = 0;
+ vpx_free(ctx->eobs[i][k]);
+ ctx->eobs[i][k] = 0;
+ }
+ }
+
+ for (i = 0; i < 2; ++i) {
+ vpx_free(ctx->color_index_map[i]);
+ ctx->color_index_map[i] = 0;
+ }
+}
+
+static void alloc_tree_contexts(VP10_COMMON *cm, PC_TREE *tree,
+ int num_4x4_blk) {
+#if CONFIG_EXT_PARTITION_TYPES
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_NONE, &tree->none);
+ alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_HORZ, &tree->horizontal[0]);
+ alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT, &tree->vertical[0]);
+ alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT, &tree->horizontal[1]);
+ alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT, &tree->vertical[1]);
+
+ alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_A,
+ &tree->horizontala[0]);
+ alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_A,
+ &tree->horizontala[1]);
+ alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_HORZ_A,
+ &tree->horizontala[2]);
+ alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_HORZ_B,
+ &tree->horizontalb[0]);
+ alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_B,
+ &tree->horizontalb[1]);
+ alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_HORZ_B,
+ &tree->horizontalb[2]);
+ alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_A,
+ &tree->verticala[0]);
+ alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_A,
+ &tree->verticala[1]);
+ alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT_A,
+ &tree->verticala[2]);
+ alloc_mode_context(cm, num_4x4_blk / 2, PARTITION_VERT_B,
+ &tree->verticalb[0]);
+ alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_B,
+ &tree->verticalb[1]);
+ alloc_mode_context(cm, num_4x4_blk / 4, PARTITION_VERT_B,
+ &tree->verticalb[2]);
+#ifdef CONFIG_SUPERTX
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_HORZ,
+ &tree->horizontal_supertx);
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_VERT, &tree->vertical_supertx);
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_SPLIT, &tree->split_supertx);
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_HORZ_A,
+ &tree->horizontala_supertx);
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_HORZ_B,
+ &tree->horizontalb_supertx);
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_VERT_A,
+ &tree->verticala_supertx);
+ alloc_mode_context(cm, num_4x4_blk, PARTITION_VERT_B,
+ &tree->verticalb_supertx);
+#endif // CONFIG_SUPERTX
+#else
+ alloc_mode_context(cm, num_4x4_blk, &tree->none);
+ alloc_mode_context(cm, num_4x4_blk / 2, &tree->horizontal[0]);
+ alloc_mode_context(cm, num_4x4_blk / 2, &tree->vertical[0]);
+#ifdef CONFIG_SUPERTX
+ alloc_mode_context(cm, num_4x4_blk, &tree->horizontal_supertx);
+ alloc_mode_context(cm, num_4x4_blk, &tree->vertical_supertx);
+ alloc_mode_context(cm, num_4x4_blk, &tree->split_supertx);
+#endif
+
+ if (num_4x4_blk > 4) {
+ alloc_mode_context(cm, num_4x4_blk / 2, &tree->horizontal[1]);
+ alloc_mode_context(cm, num_4x4_blk / 2, &tree->vertical[1]);
+ } else {
+ memset(&tree->horizontal[1], 0, sizeof(tree->horizontal[1]));
+ memset(&tree->vertical[1], 0, sizeof(tree->vertical[1]));
+ }
+#endif // CONFIG_EXT_PARTITION_TYPES
+}
+
+static void free_tree_contexts(PC_TREE *tree) {
+#if CONFIG_EXT_PARTITION_TYPES
+ int i;
+ for (i = 0; i < 3; i++) {
+ free_mode_context(&tree->horizontala[i]);
+ free_mode_context(&tree->horizontalb[i]);
+ free_mode_context(&tree->verticala[i]);
+ free_mode_context(&tree->verticalb[i]);
+ }
+#endif // CONFIG_EXT_PARTITION_TYPES
+ free_mode_context(&tree->none);
+ free_mode_context(&tree->horizontal[0]);
+ free_mode_context(&tree->horizontal[1]);
+ free_mode_context(&tree->vertical[0]);
+ free_mode_context(&tree->vertical[1]);
+#ifdef CONFIG_SUPERTX
+ free_mode_context(&tree->horizontal_supertx);
+ free_mode_context(&tree->vertical_supertx);
+ free_mode_context(&tree->split_supertx);
+#if CONFIG_EXT_PARTITION_TYPES
+ free_mode_context(&tree->horizontala_supertx);
+ free_mode_context(&tree->horizontalb_supertx);
+ free_mode_context(&tree->verticala_supertx);
+ free_mode_context(&tree->verticalb_supertx);
+#endif // CONFIG_EXT_PARTITION_TYPES
+#endif // CONFIG_SUPERTX
+}
+
+// This function sets up a tree of contexts such that at each square
+// partition level. There are contexts for none, horizontal, vertical, and
+// split. Along with a block_size value and a selected block_size which
+// represents the state of our search.
+void vp10_setup_pc_tree(VP10_COMMON *cm, ThreadData *td) {
+ int i, j;
+#if CONFIG_EXT_PARTITION
+ const int leaf_nodes = 256;
+ const int tree_nodes = 256 + 64 + 16 + 4 + 1;
+#else
+ const int leaf_nodes = 64;
+ const int tree_nodes = 64 + 16 + 4 + 1;
+#endif // CONFIG_EXT_PARTITION
+ int pc_tree_index = 0;
+ PC_TREE *this_pc;
+ PICK_MODE_CONTEXT *this_leaf;
+ int square_index = 1;
+ int nodes;
+
+ vpx_free(td->leaf_tree);
+ CHECK_MEM_ERROR(cm, td->leaf_tree,
+ vpx_calloc(leaf_nodes, sizeof(*td->leaf_tree)));
+ vpx_free(td->pc_tree);
+ CHECK_MEM_ERROR(cm, td->pc_tree,
+ vpx_calloc(tree_nodes, sizeof(*td->pc_tree)));
+
+ this_pc = &td->pc_tree[0];
+ this_leaf = &td->leaf_tree[0];
+
+ // 4x4 blocks smaller than 8x8 but in the same 8x8 block share the same
+ // context so we only need to allocate 1 for each 8x8 block.
+ for (i = 0; i < leaf_nodes; ++i) {
+#if CONFIG_EXT_PARTITION_TYPES
+ alloc_mode_context(cm, 1, PARTITION_NONE, &td->leaf_tree[i]);
+#else
+ alloc_mode_context(cm, 1, &td->leaf_tree[i]);
+#endif
+ }
+
+ // Sets up all the leaf nodes in the tree.
+ for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) {
+ PC_TREE *const tree = &td->pc_tree[pc_tree_index];
+ tree->block_size = square[0];
+ alloc_tree_contexts(cm, tree, 4);
+ tree->leaf_split[0] = this_leaf++;
+ for (j = 1; j < 4; j++) tree->leaf_split[j] = tree->leaf_split[0];
+ }
+
+ // Each node has 4 leaf nodes, fill each block_size level of the tree
+ // from leafs to the root.
+ for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) {
+ for (i = 0; i < nodes; ++i) {
+ PC_TREE *const tree = &td->pc_tree[pc_tree_index];
+ alloc_tree_contexts(cm, tree, 4 << (2 * square_index));
+ tree->block_size = square[square_index];
+ for (j = 0; j < 4; j++) tree->split[j] = this_pc++;
+ ++pc_tree_index;
+ }
+ ++square_index;
+ }
+
+ // Set up the root node for the largest superblock size
+ i = MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2;
+ td->pc_root[i] = &td->pc_tree[tree_nodes - 1];
+ td->pc_root[i]->none.best_mode_index = 2;
+ // Set up the root nodes for the rest of the possible superblock sizes
+ while (--i >= 0) {
+ td->pc_root[i] = td->pc_root[i + 1]->split[0];
+ td->pc_root[i]->none.best_mode_index = 2;
+ }
+}
+
+void vp10_free_pc_tree(ThreadData *td) {
+#if CONFIG_EXT_PARTITION
+ const int leaf_nodes = 256;
+ const int tree_nodes = 256 + 64 + 16 + 4 + 1;
+#else
+ const int leaf_nodes = 64;
+ const int tree_nodes = 64 + 16 + 4 + 1;
+#endif // CONFIG_EXT_PARTITION
+ int i;
+
+ // Set up all 4x4 mode contexts
+ for (i = 0; i < leaf_nodes; ++i) free_mode_context(&td->leaf_tree[i]);
+
+ // Sets up all the leaf nodes in the tree.
+ for (i = 0; i < tree_nodes; ++i) free_tree_contexts(&td->pc_tree[i]);
+
+ vpx_free(td->pc_tree);
+ td->pc_tree = NULL;
+ vpx_free(td->leaf_tree);
+ td->leaf_tree = NULL;
+}
diff --git a/av1/encoder/context_tree.h b/av1/encoder/context_tree.h
new file mode 100644
index 0000000..18f00bb
--- /dev/null
+++ b/av1/encoder/context_tree.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_CONTEXT_TREE_H_
+#define VP10_ENCODER_CONTEXT_TREE_H_
+
+#include "av1/common/blockd.h"
+#include "av1/encoder/block.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP10_COMP;
+struct VP10Common;
+struct ThreadData;
+
+// Structure to hold snapshot of coding context during the mode picking process
+typedef struct {
+ MODE_INFO mic;
+ MB_MODE_INFO_EXT mbmi_ext;
+ uint8_t *color_index_map[2];
+#if CONFIG_VAR_TX
+ uint8_t *blk_skip[MAX_MB_PLANE];
+#endif
+
+ // dual buffer pointers, 0: in use, 1: best in store
+ tran_low_t *coeff[MAX_MB_PLANE][3];
+ tran_low_t *qcoeff[MAX_MB_PLANE][3];
+ tran_low_t *dqcoeff[MAX_MB_PLANE][3];
+ uint16_t *eobs[MAX_MB_PLANE][3];
+
+ int is_coded;
+ int num_4x4_blk;
+ int skip;
+ int pred_pixel_ready;
+ // For current partition, only if all Y, U, and V transform blocks'
+ // coefficients are quantized to 0, skippable is set to 0.
+ int skippable;
+ int best_mode_index;
+ int hybrid_pred_diff;
+ int comp_pred_diff;
+ int single_pred_diff;
+
+ // TODO(jingning) Use RD_COST struct here instead. This involves a boarder
+ // scope of refactoring.
+ int rate;
+ int64_t dist;
+
+ // motion vector cache for adaptive motion search control in partition
+ // search loop
+ MV pred_mv[TOTAL_REFS_PER_FRAME];
+ INTERP_FILTER pred_interp_filter;
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition;
+#endif
+} PICK_MODE_CONTEXT;
+
+typedef struct PC_TREE {
+ int index;
+ PARTITION_TYPE partitioning;
+ BLOCK_SIZE block_size;
+ PICK_MODE_CONTEXT none;
+ PICK_MODE_CONTEXT horizontal[2];
+ PICK_MODE_CONTEXT vertical[2];
+#if CONFIG_EXT_PARTITION_TYPES
+ PICK_MODE_CONTEXT horizontala[3];
+ PICK_MODE_CONTEXT horizontalb[3];
+ PICK_MODE_CONTEXT verticala[3];
+ PICK_MODE_CONTEXT verticalb[3];
+#endif
+ union {
+ struct PC_TREE *split[4];
+ PICK_MODE_CONTEXT *leaf_split[4];
+ };
+#ifdef CONFIG_SUPERTX
+ PICK_MODE_CONTEXT horizontal_supertx;
+ PICK_MODE_CONTEXT vertical_supertx;
+ PICK_MODE_CONTEXT split_supertx;
+#if CONFIG_EXT_PARTITION_TYPES
+ PICK_MODE_CONTEXT horizontala_supertx;
+ PICK_MODE_CONTEXT horizontalb_supertx;
+ PICK_MODE_CONTEXT verticala_supertx;
+ PICK_MODE_CONTEXT verticalb_supertx;
+#endif
+#endif
+} PC_TREE;
+
+void vp10_setup_pc_tree(struct VP10Common *cm, struct ThreadData *td);
+void vp10_free_pc_tree(struct ThreadData *td);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* VP10_ENCODER_CONTEXT_TREE_H_ */
diff --git a/av1/encoder/corner_detect.c b/av1/encoder/corner_detect.c
new file mode 100644
index 0000000..2b2d82d
--- /dev/null
+++ b/av1/encoder/corner_detect.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <math.h>
+#include <assert.h>
+
+#include "vp10/encoder/corner_detect.h"
+#include "third_party/fastfeat/fast.h"
+
+// Fast_9 wrapper
+#define FAST_BARRIER 40
+int FastCornerDetect(unsigned char *buf, int width, int height, int stride,
+ int *points, int max_points) {
+ int num_points;
+ xy *frm_corners_xy = fast9_detect_nonmax(buf, width, height, stride,
+ FAST_BARRIER, &num_points);
+ num_points = (num_points <= max_points ? num_points : max_points);
+ if (num_points > 0 && frm_corners_xy) {
+ memcpy(points, frm_corners_xy, sizeof(xy) * num_points);
+ free(frm_corners_xy);
+ return num_points;
+ } else {
+ return 0;
+ }
+}
diff --git a/av1/encoder/corner_detect.h b/av1/encoder/corner_detect.h
new file mode 100644
index 0000000..8db713e
--- /dev/null
+++ b/av1/encoder/corner_detect.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_CORNER_DETECT_H_
+#define VP10_ENCODER_CORNER_DETECT_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+
+int FastCornerDetect(unsigned char *buf, int width, int height, int stride,
+ int *points, int max_points);
+
+#endif // VP10_ENCODER_CORNER_DETECT_H
diff --git a/av1/encoder/corner_match.c b/av1/encoder/corner_match.c
new file mode 100644
index 0000000..6b19d5b
--- /dev/null
+++ b/av1/encoder/corner_match.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <math.h>
+
+#include "vp10/encoder/corner_match.h"
+
+#define MATCH_SZ 15
+#define MATCH_SZ_BY2 ((MATCH_SZ - 1) / 2)
+#define MATCH_SZ_SQ (MATCH_SZ * MATCH_SZ)
+#define SEARCH_SZ 9
+#define SEARCH_SZ_BY2 ((SEARCH_SZ - 1) / 2)
+
+#define THRESHOLD_NCC 0.80
+
+static double compute_variance(unsigned char *im, int stride, int x, int y,
+ double *mean) {
+ double sum = 0.0;
+ double sumsq = 0.0;
+ double var;
+ int i, j;
+ for (i = 0; i < MATCH_SZ; ++i)
+ for (j = 0; j < MATCH_SZ; ++j) {
+ sum += im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)];
+ sumsq += im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)] *
+ im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)];
+ }
+ var = (sumsq * MATCH_SZ_SQ - sum * sum) / (MATCH_SZ_SQ * MATCH_SZ_SQ);
+ if (mean) *mean = sum / MATCH_SZ_SQ;
+ return var;
+}
+
+static double compute_cross_correlation(unsigned char *im1, int stride1, int x1,
+ int y1, unsigned char *im2, int stride2,
+ int x2, int y2) {
+ double sum1 = 0;
+ double sum2 = 0;
+ double cross = 0;
+ double corr;
+ int i, j;
+ for (i = 0; i < MATCH_SZ; ++i)
+ for (j = 0; j < MATCH_SZ; ++j) {
+ sum1 += im1[(i + y1 - MATCH_SZ_BY2) * stride1 + (j + x1 - MATCH_SZ_BY2)];
+ sum2 += im2[(i + y2 - MATCH_SZ_BY2) * stride2 + (j + x2 - MATCH_SZ_BY2)];
+ cross +=
+ im1[(i + y1 - MATCH_SZ_BY2) * stride1 + (j + x1 - MATCH_SZ_BY2)] *
+ im2[(i + y2 - MATCH_SZ_BY2) * stride2 + (j + x2 - MATCH_SZ_BY2)];
+ }
+ corr = (cross * MATCH_SZ_SQ - sum1 * sum2) / (MATCH_SZ_SQ * MATCH_SZ_SQ);
+ return corr;
+}
+
+static int is_eligible_point(double pointx, double pointy, int width,
+ int height) {
+ return (pointx >= MATCH_SZ_BY2 && pointy >= MATCH_SZ_BY2 &&
+ pointx + MATCH_SZ_BY2 < width && pointy + MATCH_SZ_BY2 < height);
+}
+
+static int is_eligible_distance(double point1x, double point1y, double point2x,
+ double point2y, int width, int height) {
+ const int thresh = (width < height ? height : width) >> 4;
+ return ((point1x - point2x) * (point1x - point2x) +
+ (point1y - point2y) * (point1y - point2y)) <= thresh * thresh;
+}
+
+static void improve_correspondence(unsigned char *frm, unsigned char *ref,
+ int width, int height, int frm_stride,
+ int ref_stride,
+ correspondence *correspondences,
+ int num_correspondences) {
+ int i;
+ for (i = 0; i < num_correspondences; ++i) {
+ double template_norm =
+ compute_variance(frm, frm_stride, (int)correspondences[i].x,
+ (int)correspondences[i].y, NULL);
+ int x, y, best_x = 0, best_y = 0;
+ double best_match_ncc = 0.0;
+ for (y = -SEARCH_SZ_BY2; y <= SEARCH_SZ_BY2; ++y) {
+ for (x = -SEARCH_SZ_BY2; x <= SEARCH_SZ_BY2; ++x) {
+ double match_ncc;
+ double subimage_norm;
+ if (!is_eligible_point((int)correspondences[i].rx + x,
+ (int)correspondences[i].ry + y, width, height))
+ continue;
+ if (!is_eligible_distance(
+ (int)correspondences[i].x, (int)correspondences[i].y,
+ (int)correspondences[i].rx + x, (int)correspondences[i].ry + y,
+ width, height))
+ continue;
+ subimage_norm =
+ compute_variance(ref, ref_stride, (int)correspondences[i].rx + x,
+ (int)correspondences[i].ry + y, NULL);
+ match_ncc = compute_cross_correlation(
+ frm, frm_stride, (int)correspondences[i].x,
+ (int)correspondences[i].y, ref, ref_stride,
+ (int)correspondences[i].rx + x,
+ (int)correspondences[i].ry + y) /
+ sqrt(template_norm * subimage_norm);
+ if (match_ncc > best_match_ncc) {
+ best_match_ncc = match_ncc;
+ best_y = y;
+ best_x = x;
+ }
+ }
+ }
+ correspondences[i].rx += (double)best_x;
+ correspondences[i].ry += (double)best_y;
+ }
+ for (i = 0; i < num_correspondences; ++i) {
+ double template_norm =
+ compute_variance(ref, ref_stride, (int)correspondences[i].rx,
+ (int)correspondences[i].ry, NULL);
+ int x, y, best_x = 0, best_y = 0;
+ double best_match_ncc = 0.0;
+ for (y = -SEARCH_SZ_BY2; y <= SEARCH_SZ_BY2; ++y)
+ for (x = -SEARCH_SZ_BY2; x <= SEARCH_SZ_BY2; ++x) {
+ double match_ncc;
+ double subimage_norm;
+ if (!is_eligible_point((int)correspondences[i].x + x,
+ (int)correspondences[i].y + y, width, height))
+ continue;
+ if (!is_eligible_distance((int)correspondences[i].x + x,
+ (int)correspondences[i].y + y,
+ (int)correspondences[i].rx,
+ (int)correspondences[i].ry, width, height))
+ continue;
+ subimage_norm =
+ compute_variance(frm, frm_stride, (int)correspondences[i].x + x,
+ (int)correspondences[i].y + y, NULL);
+ match_ncc =
+ compute_cross_correlation(
+ frm, frm_stride, (int)correspondences[i].x + x,
+ (int)correspondences[i].y + y, ref, ref_stride,
+ (int)correspondences[i].rx, (int)correspondences[i].ry) /
+ sqrt(template_norm * subimage_norm);
+ if (match_ncc > best_match_ncc) {
+ best_match_ncc = match_ncc;
+ best_y = y;
+ best_x = x;
+ }
+ }
+ correspondences[i].x += best_x;
+ correspondences[i].y += best_y;
+ }
+}
+
+int determine_correspondence(unsigned char *frm, int *frm_corners,
+ int num_frm_corners, unsigned char *ref,
+ int *ref_corners, int num_ref_corners, int width,
+ int height, int frm_stride, int ref_stride,
+ double *correspondence_pts) {
+ // TODO(sarahparker) Improve this to include 2-way match
+ int i, j;
+ correspondence *correspondences = (correspondence *)correspondence_pts;
+ int num_correspondences = 0;
+ for (i = 0; i < num_frm_corners; ++i) {
+ double best_match_ncc = 0.0;
+ double template_norm;
+ int best_match_j = -1;
+ if (!is_eligible_point(frm_corners[2 * i], frm_corners[2 * i + 1], width,
+ height))
+ continue;
+ template_norm = compute_variance(frm, frm_stride, frm_corners[2 * i],
+ frm_corners[2 * i + 1], NULL);
+ for (j = 0; j < num_ref_corners; ++j) {
+ double match_ncc;
+ double subimage_norm;
+ if (!is_eligible_point(ref_corners[2 * j], ref_corners[2 * j + 1], width,
+ height))
+ continue;
+ if (!is_eligible_distance(frm_corners[2 * i], frm_corners[2 * i + 1],
+ ref_corners[2 * j], ref_corners[2 * j + 1],
+ width, height))
+ continue;
+ subimage_norm = compute_variance(ref, ref_stride, ref_corners[2 * j],
+ ref_corners[2 * j + 1], NULL);
+ match_ncc = compute_cross_correlation(frm, frm_stride, frm_corners[2 * i],
+ frm_corners[2 * i + 1], ref,
+ ref_stride, ref_corners[2 * j],
+ ref_corners[2 * j + 1]) /
+ sqrt(template_norm * subimage_norm);
+ if (match_ncc > best_match_ncc) {
+ best_match_ncc = match_ncc;
+ best_match_j = j;
+ }
+ }
+ if (best_match_ncc > THRESHOLD_NCC) {
+ correspondences[num_correspondences].x = (double)frm_corners[2 * i];
+ correspondences[num_correspondences].y = (double)frm_corners[2 * i + 1];
+ correspondences[num_correspondences].rx =
+ (double)ref_corners[2 * best_match_j];
+ correspondences[num_correspondences].ry =
+ (double)ref_corners[2 * best_match_j + 1];
+ num_correspondences++;
+ }
+ }
+ improve_correspondence(frm, ref, width, height, frm_stride, ref_stride,
+ correspondences, num_correspondences);
+ return num_correspondences;
+}
diff --git a/av1/encoder/corner_match.h b/av1/encoder/corner_match.h
new file mode 100644
index 0000000..3bc8cb9
--- /dev/null
+++ b/av1/encoder/corner_match.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_CORNER_MATCH_H_
+#define VP10_ENCODER_CORNER_MATCH_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+
+typedef struct {
+ double x, y;
+ double rx, ry;
+} correspondence;
+
+int determine_correspondence(unsigned char *frm, int *frm_corners,
+ int num_frm_corners, unsigned char *ref,
+ int *ref_corners, int num_ref_corners, int width,
+ int height, int frm_stride, int ref_stride,
+ double *correspondence_pts);
+
+#endif // VP10_ENCODER_CORNER_MATCH_H
diff --git a/av1/encoder/cost.c b/av1/encoder/cost.c
new file mode 100644
index 0000000..4542638
--- /dev/null
+++ b/av1/encoder/cost.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <assert.h>
+
+#include "av1/encoder/cost.h"
+#if CONFIG_ANS
+#include "av1/common/ans.h"
+#endif // CONFIG_ANS
+#include "av1/common/entropy.h"
+
+/* round(-log2(i/256.) * (1 << VP10_PROB_COST_SHIFT))
+ Begins with a bogus entry for simpler addressing. */
+const uint16_t vp10_prob_cost[256] = {
+ 4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325, 2260,
+ 2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780, 1748, 1718,
+ 1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470, 1449, 1429, 1409,
+ 1390, 1371, 1353, 1335, 1318, 1301, 1284, 1268, 1252, 1236, 1221, 1206, 1192,
+ 1177, 1163, 1149, 1136, 1123, 1110, 1097, 1084, 1072, 1059, 1047, 1036, 1024,
+ 1013, 1001, 990, 979, 968, 958, 947, 937, 927, 917, 907, 897, 887,
+ 878, 868, 859, 850, 841, 832, 823, 814, 806, 797, 789, 780, 772,
+ 764, 756, 748, 740, 732, 724, 717, 709, 702, 694, 687, 680, 673,
+ 665, 658, 651, 644, 637, 631, 624, 617, 611, 604, 598, 591, 585,
+ 578, 572, 566, 560, 554, 547, 541, 535, 530, 524, 518, 512, 506,
+ 501, 495, 489, 484, 478, 473, 467, 462, 456, 451, 446, 441, 435,
+ 430, 425, 420, 415, 410, 405, 400, 395, 390, 385, 380, 375, 371,
+ 366, 361, 356, 352, 347, 343, 338, 333, 329, 324, 320, 316, 311,
+ 307, 302, 298, 294, 289, 285, 281, 277, 273, 268, 264, 260, 256,
+ 252, 248, 244, 240, 236, 232, 228, 224, 220, 216, 212, 209, 205,
+ 201, 197, 194, 190, 186, 182, 179, 175, 171, 168, 164, 161, 157,
+ 153, 150, 146, 143, 139, 136, 132, 129, 125, 122, 119, 115, 112,
+ 109, 105, 102, 99, 95, 92, 89, 86, 82, 79, 76, 73, 70,
+ 66, 63, 60, 57, 54, 51, 48, 45, 42, 38, 35, 32, 29,
+ 26, 23, 20, 18, 15, 12, 9, 6, 3
+};
+
+#if CONFIG_ANS
+// round(-log2(i/1024.) * (1 << VP10_PROB_COST_SHIFT))
+static const uint16_t vp10_prob_cost10[1024] = {
+ 5120, 5120, 4608, 4308, 4096, 3931, 3796, 3683, 3584, 3497, 3419, 3349, 3284,
+ 3225, 3171, 3120, 3072, 3027, 2985, 2945, 2907, 2871, 2837, 2804, 2772, 2742,
+ 2713, 2685, 2659, 2633, 2608, 2583, 2560, 2537, 2515, 2494, 2473, 2453, 2433,
+ 2414, 2395, 2377, 2359, 2342, 2325, 2308, 2292, 2276, 2260, 2245, 2230, 2216,
+ 2201, 2187, 2173, 2160, 2147, 2134, 2121, 2108, 2096, 2083, 2071, 2060, 2048,
+ 2037, 2025, 2014, 2003, 1992, 1982, 1971, 1961, 1951, 1941, 1931, 1921, 1911,
+ 1902, 1892, 1883, 1874, 1865, 1856, 1847, 1838, 1830, 1821, 1813, 1804, 1796,
+ 1788, 1780, 1772, 1764, 1756, 1748, 1741, 1733, 1726, 1718, 1711, 1704, 1697,
+ 1689, 1682, 1675, 1668, 1661, 1655, 1648, 1641, 1635, 1628, 1622, 1615, 1609,
+ 1602, 1596, 1590, 1584, 1578, 1571, 1565, 1559, 1554, 1548, 1542, 1536, 1530,
+ 1525, 1519, 1513, 1508, 1502, 1497, 1491, 1486, 1480, 1475, 1470, 1465, 1459,
+ 1454, 1449, 1444, 1439, 1434, 1429, 1424, 1419, 1414, 1409, 1404, 1399, 1395,
+ 1390, 1385, 1380, 1376, 1371, 1367, 1362, 1357, 1353, 1348, 1344, 1340, 1335,
+ 1331, 1326, 1322, 1318, 1313, 1309, 1305, 1301, 1297, 1292, 1288, 1284, 1280,
+ 1276, 1272, 1268, 1264, 1260, 1256, 1252, 1248, 1244, 1240, 1236, 1233, 1229,
+ 1225, 1221, 1218, 1214, 1210, 1206, 1203, 1199, 1195, 1192, 1188, 1185, 1181,
+ 1177, 1174, 1170, 1167, 1163, 1160, 1156, 1153, 1149, 1146, 1143, 1139, 1136,
+ 1133, 1129, 1126, 1123, 1119, 1116, 1113, 1110, 1106, 1103, 1100, 1097, 1094,
+ 1090, 1087, 1084, 1081, 1078, 1075, 1072, 1069, 1066, 1062, 1059, 1056, 1053,
+ 1050, 1047, 1044, 1042, 1039, 1036, 1033, 1030, 1027, 1024, 1021, 1018, 1015,
+ 1013, 1010, 1007, 1004, 1001, 998, 996, 993, 990, 987, 985, 982, 979,
+ 977, 974, 971, 968, 966, 963, 960, 958, 955, 953, 950, 947, 945,
+ 942, 940, 937, 934, 932, 929, 927, 924, 922, 919, 917, 914, 912,
+ 909, 907, 904, 902, 899, 897, 895, 892, 890, 887, 885, 883, 880,
+ 878, 876, 873, 871, 868, 866, 864, 861, 859, 857, 855, 852, 850,
+ 848, 845, 843, 841, 839, 836, 834, 832, 830, 828, 825, 823, 821,
+ 819, 817, 814, 812, 810, 808, 806, 804, 801, 799, 797, 795, 793,
+ 791, 789, 787, 785, 783, 780, 778, 776, 774, 772, 770, 768, 766,
+ 764, 762, 760, 758, 756, 754, 752, 750, 748, 746, 744, 742, 740,
+ 738, 736, 734, 732, 730, 728, 726, 724, 723, 721, 719, 717, 715,
+ 713, 711, 709, 707, 706, 704, 702, 700, 698, 696, 694, 693, 691,
+ 689, 687, 685, 683, 682, 680, 678, 676, 674, 673, 671, 669, 667,
+ 665, 664, 662, 660, 658, 657, 655, 653, 651, 650, 648, 646, 644,
+ 643, 641, 639, 637, 636, 634, 632, 631, 629, 627, 626, 624, 622,
+ 621, 619, 617, 616, 614, 612, 611, 609, 607, 606, 604, 602, 601,
+ 599, 598, 596, 594, 593, 591, 590, 588, 586, 585, 583, 582, 580,
+ 578, 577, 575, 574, 572, 571, 569, 567, 566, 564, 563, 561, 560,
+ 558, 557, 555, 554, 552, 550, 549, 547, 546, 544, 543, 541, 540,
+ 538, 537, 535, 534, 532, 531, 530, 528, 527, 525, 524, 522, 521,
+ 519, 518, 516, 515, 513, 512, 511, 509, 508, 506, 505, 503, 502,
+ 501, 499, 498, 496, 495, 493, 492, 491, 489, 488, 486, 485, 484,
+ 482, 481, 480, 478, 477, 475, 474, 473, 471, 470, 469, 467, 466,
+ 465, 463, 462, 460, 459, 458, 456, 455, 454, 452, 451, 450, 448,
+ 447, 446, 444, 443, 442, 441, 439, 438, 437, 435, 434, 433, 431,
+ 430, 429, 428, 426, 425, 424, 422, 421, 420, 419, 417, 416, 415,
+ 414, 412, 411, 410, 409, 407, 406, 405, 404, 402, 401, 400, 399,
+ 397, 396, 395, 394, 392, 391, 390, 389, 387, 386, 385, 384, 383,
+ 381, 380, 379, 378, 377, 375, 374, 373, 372, 371, 369, 368, 367,
+ 366, 365, 364, 362, 361, 360, 359, 358, 356, 355, 354, 353, 352,
+ 351, 349, 348, 347, 346, 345, 344, 343, 341, 340, 339, 338, 337,
+ 336, 335, 333, 332, 331, 330, 329, 328, 327, 326, 324, 323, 322,
+ 321, 320, 319, 318, 317, 316, 314, 313, 312, 311, 310, 309, 308,
+ 307, 306, 305, 303, 302, 301, 300, 299, 298, 297, 296, 295, 294,
+ 293, 292, 291, 289, 288, 287, 286, 285, 284, 283, 282, 281, 280,
+ 279, 278, 277, 276, 275, 274, 273, 272, 271, 269, 268, 267, 266,
+ 265, 264, 263, 262, 261, 260, 259, 258, 257, 256, 255, 254, 253,
+ 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240,
+ 239, 238, 237, 236, 235, 234, 233, 232, 231, 230, 229, 228, 227,
+ 226, 225, 224, 223, 222, 221, 220, 219, 218, 217, 216, 215, 214,
+ 213, 212, 212, 211, 210, 209, 208, 207, 206, 205, 204, 203, 202,
+ 201, 200, 199, 198, 197, 196, 195, 194, 194, 193, 192, 191, 190,
+ 189, 188, 187, 186, 185, 184, 183, 182, 181, 181, 180, 179, 178,
+ 177, 176, 175, 174, 173, 172, 171, 170, 170, 169, 168, 167, 166,
+ 165, 164, 163, 162, 161, 161, 160, 159, 158, 157, 156, 155, 154,
+ 153, 152, 152, 151, 150, 149, 148, 147, 146, 145, 145, 144, 143,
+ 142, 141, 140, 139, 138, 138, 137, 136, 135, 134, 133, 132, 132,
+ 131, 130, 129, 128, 127, 126, 125, 125, 124, 123, 122, 121, 120,
+ 120, 119, 118, 117, 116, 115, 114, 114, 113, 112, 111, 110, 109,
+ 109, 108, 107, 106, 105, 104, 104, 103, 102, 101, 100, 99, 99,
+ 98, 97, 96, 95, 95, 94, 93, 92, 91, 90, 90, 89, 88,
+ 87, 86, 86, 85, 84, 83, 82, 82, 81, 80, 79, 78, 78,
+ 77, 76, 75, 74, 74, 73, 72, 71, 70, 70, 69, 68, 67,
+ 66, 66, 65, 64, 63, 62, 62, 61, 60, 59, 59, 58, 57,
+ 56, 55, 55, 54, 53, 52, 52, 51, 50, 49, 48, 48, 47,
+ 46, 45, 45, 44, 43, 42, 42, 41, 40, 39, 38, 38, 37,
+ 36, 35, 35, 34, 33, 32, 32, 31, 30, 29, 29, 28, 27,
+ 26, 26, 25, 24, 23, 23, 22, 21, 20, 20, 19, 18, 18,
+ 17, 16, 15, 15, 14, 13, 12, 12, 11, 10, 9, 9, 8,
+ 7, 7, 6, 5, 4, 4, 3, 2, 1, 1
+};
+#endif // CONFIG_ANS
+
+static void cost(int *costs, vpx_tree tree, const vpx_prob *probs, int i,
+ int c) {
+ const vpx_prob prob = probs[i / 2];
+ int b;
+
+ assert(prob != 0);
+ for (b = 0; b <= 1; ++b) {
+ const int cc = c + vp10_cost_bit(prob, b);
+ const vpx_tree_index ii = tree[i + b];
+
+ if (ii <= 0)
+ costs[-ii] = cc;
+ else
+ cost(costs, tree, probs, ii, cc);
+ }
+}
+
+#if CONFIG_ANS
+void vp10_cost_tokens_ans(int *costs, const vpx_prob *tree_probs,
+ const rans_dec_lut token_cdf, int skip_eob) {
+ int c_tree = 0; // Cost of the "tree" nodes EOB and ZERO.
+ int i;
+ costs[EOB_TOKEN] = vp10_cost_bit(tree_probs[0], 0);
+ if (!skip_eob) c_tree = vp10_cost_bit(tree_probs[0], 1);
+ for (i = ZERO_TOKEN; i <= CATEGORY6_TOKEN; ++i) {
+ const int p = token_cdf[i + 1] - token_cdf[i];
+ costs[i] = c_tree + vp10_prob_cost10[p];
+ }
+}
+#endif // CONFIG_ANS
+
+void vp10_cost_tokens(int *costs, const vpx_prob *probs, vpx_tree tree) {
+ cost(costs, tree, probs, 0, 0);
+}
+
+void vp10_cost_tokens_skip(int *costs, const vpx_prob *probs, vpx_tree tree) {
+ assert(tree[0] <= 0 && tree[1] > 0);
+
+ costs[-tree[0]] = vp10_cost_bit(probs[0], 0);
+ cost(costs, tree, probs, 2, 0);
+}
diff --git a/av1/encoder/cost.h b/av1/encoder/cost.h
new file mode 100644
index 0000000..5ae2a79
--- /dev/null
+++ b/av1/encoder/cost.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_COST_H_
+#define VP10_ENCODER_COST_H_
+
+#include "aom_dsp/prob.h"
+#include "aom/vpx_integer.h"
+#if CONFIG_ANS
+#include "av1/common/ans.h"
+#endif // CONFIG_ANS
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern const uint16_t vp10_prob_cost[256];
+
+// The factor to scale from cost in bits to cost in vp10_prob_cost units.
+#define VP10_PROB_COST_SHIFT 9
+
+#define vp10_cost_zero(prob) (vp10_prob_cost[prob])
+
+#define vp10_cost_one(prob) vp10_cost_zero(256 - (prob))
+
+#define vp10_cost_bit(prob, bit) vp10_cost_zero((bit) ? 256 - (prob) : (prob))
+
+// Cost of coding an n bit literal, using 128 (i.e. 50%) probability
+// for each bit.
+#define vp10_cost_literal(n) ((n) * (1 << VP10_PROB_COST_SHIFT))
+
+static INLINE unsigned int cost_branch256(const unsigned int ct[2],
+ vpx_prob p) {
+ return ct[0] * vp10_cost_zero(p) + ct[1] * vp10_cost_one(p);
+}
+
+static INLINE int treed_cost(vpx_tree tree, const vpx_prob *probs, int bits,
+ int len) {
+ int cost = 0;
+ vpx_tree_index i = 0;
+
+ do {
+ const int bit = (bits >> --len) & 1;
+ cost += vp10_cost_bit(probs[i >> 1], bit);
+ i = tree[i + bit];
+ } while (len);
+
+ return cost;
+}
+
+void vp10_cost_tokens(int *costs, const vpx_prob *probs, vpx_tree tree);
+void vp10_cost_tokens_skip(int *costs, const vpx_prob *probs, vpx_tree tree);
+
+#if CONFIG_ANS
+void vp10_cost_tokens_ans(int *costs, const vpx_prob *tree_probs,
+ const rans_dec_lut token_cdf, int skip_eob);
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_COST_H_
diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c
new file mode 100644
index 0000000..8f7812e
--- /dev/null
+++ b/av1/encoder/dct.c
@@ -0,0 +1,1896 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <math.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "av1/common/blockd.h"
+#include "av1/common/idct.h"
+#include "aom_dsp/fwd_txfm.h"
+#include "aom_ports/mem.h"
+
+static INLINE void range_check(const tran_low_t *input, const int size,
+ const int bit) {
+#if 0 // CONFIG_COEFFICIENT_RANGE_CHECKING
+// TODO(angiebird): the range_check is not used because the bit range
+// in fdct# is not correct. Since we are going to merge in a new version
+// of fdct# from nextgenv2, we won't fix the incorrect bit range now.
+ int i;
+ for (i = 0; i < size; ++i) {
+ assert(abs(input[i]) < (1 << bit));
+ }
+#else
+ (void)input;
+ (void)size;
+ (void)bit;
+#endif
+}
+
+static void fdct4(const tran_low_t *input, tran_low_t *output) {
+ tran_high_t temp;
+ tran_low_t step[4];
+
+ // stage 0
+ range_check(input, 4, 14);
+
+ // stage 1
+ output[0] = input[0] + input[3];
+ output[1] = input[1] + input[2];
+ output[2] = input[1] - input[2];
+ output[3] = input[0] - input[3];
+
+ range_check(output, 4, 15);
+
+ // stage 2
+ temp = output[0] * cospi_16_64 + output[1] * cospi_16_64;
+ step[0] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[1] * -cospi_16_64 + output[0] * cospi_16_64;
+ step[1] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[2] * cospi_24_64 + output[3] * cospi_8_64;
+ step[2] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[3] * cospi_24_64 + output[2] * -cospi_8_64;
+ step[3] = (tran_low_t)fdct_round_shift(temp);
+
+ range_check(step, 4, 16);
+
+ // stage 3
+ output[0] = step[0];
+ output[1] = step[2];
+ output[2] = step[1];
+ output[3] = step[3];
+
+ range_check(output, 4, 16);
+}
+
+static void fdct8(const tran_low_t *input, tran_low_t *output) {
+ tran_high_t temp;
+ tran_low_t step[8];
+
+ // stage 0
+ range_check(input, 8, 13);
+
+ // stage 1
+ output[0] = input[0] + input[7];
+ output[1] = input[1] + input[6];
+ output[2] = input[2] + input[5];
+ output[3] = input[3] + input[4];
+ output[4] = input[3] - input[4];
+ output[5] = input[2] - input[5];
+ output[6] = input[1] - input[6];
+ output[7] = input[0] - input[7];
+
+ range_check(output, 8, 14);
+
+ // stage 2
+ step[0] = output[0] + output[3];
+ step[1] = output[1] + output[2];
+ step[2] = output[1] - output[2];
+ step[3] = output[0] - output[3];
+ step[4] = output[4];
+ temp = output[5] * -cospi_16_64 + output[6] * cospi_16_64;
+ step[5] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[6] * cospi_16_64 + output[5] * cospi_16_64;
+ step[6] = (tran_low_t)fdct_round_shift(temp);
+ step[7] = output[7];
+
+ range_check(step, 8, 15);
+
+ // stage 3
+ temp = step[0] * cospi_16_64 + step[1] * cospi_16_64;
+ output[0] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[1] * -cospi_16_64 + step[0] * cospi_16_64;
+ output[1] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[2] * cospi_24_64 + step[3] * cospi_8_64;
+ output[2] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[3] * cospi_24_64 + step[2] * -cospi_8_64;
+ output[3] = (tran_low_t)fdct_round_shift(temp);
+ output[4] = step[4] + step[5];
+ output[5] = step[4] - step[5];
+ output[6] = step[7] - step[6];
+ output[7] = step[7] + step[6];
+
+ range_check(output, 8, 16);
+
+ // stage 4
+ step[0] = output[0];
+ step[1] = output[1];
+ step[2] = output[2];
+ step[3] = output[3];
+ temp = output[4] * cospi_28_64 + output[7] * cospi_4_64;
+ step[4] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[5] * cospi_12_64 + output[6] * cospi_20_64;
+ step[5] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[6] * cospi_12_64 + output[5] * -cospi_20_64;
+ step[6] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[7] * cospi_28_64 + output[4] * -cospi_4_64;
+ step[7] = (tran_low_t)fdct_round_shift(temp);
+
+ range_check(step, 8, 16);
+
+ // stage 5
+ output[0] = step[0];
+ output[1] = step[4];
+ output[2] = step[2];
+ output[3] = step[6];
+ output[4] = step[1];
+ output[5] = step[5];
+ output[6] = step[3];
+ output[7] = step[7];
+
+ range_check(output, 8, 16);
+}
+
+static void fdct16(const tran_low_t *input, tran_low_t *output) {
+ tran_high_t temp;
+ tran_low_t step[16];
+
+ // stage 0
+ range_check(input, 16, 13);
+
+ // stage 1
+ output[0] = input[0] + input[15];
+ output[1] = input[1] + input[14];
+ output[2] = input[2] + input[13];
+ output[3] = input[3] + input[12];
+ output[4] = input[4] + input[11];
+ output[5] = input[5] + input[10];
+ output[6] = input[6] + input[9];
+ output[7] = input[7] + input[8];
+ output[8] = input[7] - input[8];
+ output[9] = input[6] - input[9];
+ output[10] = input[5] - input[10];
+ output[11] = input[4] - input[11];
+ output[12] = input[3] - input[12];
+ output[13] = input[2] - input[13];
+ output[14] = input[1] - input[14];
+ output[15] = input[0] - input[15];
+
+ range_check(output, 16, 14);
+
+ // stage 2
+ step[0] = output[0] + output[7];
+ step[1] = output[1] + output[6];
+ step[2] = output[2] + output[5];
+ step[3] = output[3] + output[4];
+ step[4] = output[3] - output[4];
+ step[5] = output[2] - output[5];
+ step[6] = output[1] - output[6];
+ step[7] = output[0] - output[7];
+ step[8] = output[8];
+ step[9] = output[9];
+ temp = output[10] * -cospi_16_64 + output[13] * cospi_16_64;
+ step[10] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[11] * -cospi_16_64 + output[12] * cospi_16_64;
+ step[11] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[12] * cospi_16_64 + output[11] * cospi_16_64;
+ step[12] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[13] * cospi_16_64 + output[10] * cospi_16_64;
+ step[13] = (tran_low_t)fdct_round_shift(temp);
+ step[14] = output[14];
+ step[15] = output[15];
+
+ range_check(step, 16, 15);
+
+ // stage 3
+ output[0] = step[0] + step[3];
+ output[1] = step[1] + step[2];
+ output[2] = step[1] - step[2];
+ output[3] = step[0] - step[3];
+ output[4] = step[4];
+ temp = step[5] * -cospi_16_64 + step[6] * cospi_16_64;
+ output[5] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[6] * cospi_16_64 + step[5] * cospi_16_64;
+ output[6] = (tran_low_t)fdct_round_shift(temp);
+ output[7] = step[7];
+ output[8] = step[8] + step[11];
+ output[9] = step[9] + step[10];
+ output[10] = step[9] - step[10];
+ output[11] = step[8] - step[11];
+ output[12] = step[15] - step[12];
+ output[13] = step[14] - step[13];
+ output[14] = step[14] + step[13];
+ output[15] = step[15] + step[12];
+
+ range_check(output, 16, 16);
+
+ // stage 4
+ temp = output[0] * cospi_16_64 + output[1] * cospi_16_64;
+ step[0] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[1] * -cospi_16_64 + output[0] * cospi_16_64;
+ step[1] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[2] * cospi_24_64 + output[3] * cospi_8_64;
+ step[2] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[3] * cospi_24_64 + output[2] * -cospi_8_64;
+ step[3] = (tran_low_t)fdct_round_shift(temp);
+ step[4] = output[4] + output[5];
+ step[5] = output[4] - output[5];
+ step[6] = output[7] - output[6];
+ step[7] = output[7] + output[6];
+ step[8] = output[8];
+ temp = output[9] * -cospi_8_64 + output[14] * cospi_24_64;
+ step[9] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[10] * -cospi_24_64 + output[13] * -cospi_8_64;
+ step[10] = (tran_low_t)fdct_round_shift(temp);
+ step[11] = output[11];
+ step[12] = output[12];
+ temp = output[13] * cospi_24_64 + output[10] * -cospi_8_64;
+ step[13] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[14] * cospi_8_64 + output[9] * cospi_24_64;
+ step[14] = (tran_low_t)fdct_round_shift(temp);
+ step[15] = output[15];
+
+ range_check(step, 16, 16);
+
+ // stage 5
+ output[0] = step[0];
+ output[1] = step[1];
+ output[2] = step[2];
+ output[3] = step[3];
+ temp = step[4] * cospi_28_64 + step[7] * cospi_4_64;
+ output[4] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[5] * cospi_12_64 + step[6] * cospi_20_64;
+ output[5] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[6] * cospi_12_64 + step[5] * -cospi_20_64;
+ output[6] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[7] * cospi_28_64 + step[4] * -cospi_4_64;
+ output[7] = (tran_low_t)fdct_round_shift(temp);
+ output[8] = step[8] + step[9];
+ output[9] = step[8] - step[9];
+ output[10] = step[11] - step[10];
+ output[11] = step[11] + step[10];
+ output[12] = step[12] + step[13];
+ output[13] = step[12] - step[13];
+ output[14] = step[15] - step[14];
+ output[15] = step[15] + step[14];
+
+ range_check(output, 16, 16);
+
+ // stage 6
+ step[0] = output[0];
+ step[1] = output[1];
+ step[2] = output[2];
+ step[3] = output[3];
+ step[4] = output[4];
+ step[5] = output[5];
+ step[6] = output[6];
+ step[7] = output[7];
+ temp = output[8] * cospi_30_64 + output[15] * cospi_2_64;
+ step[8] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[9] * cospi_14_64 + output[14] * cospi_18_64;
+ step[9] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[10] * cospi_22_64 + output[13] * cospi_10_64;
+ step[10] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[11] * cospi_6_64 + output[12] * cospi_26_64;
+ step[11] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[12] * cospi_6_64 + output[11] * -cospi_26_64;
+ step[12] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[13] * cospi_22_64 + output[10] * -cospi_10_64;
+ step[13] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[14] * cospi_14_64 + output[9] * -cospi_18_64;
+ step[14] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[15] * cospi_30_64 + output[8] * -cospi_2_64;
+ step[15] = (tran_low_t)fdct_round_shift(temp);
+
+ range_check(step, 16, 16);
+
+ // stage 7
+ output[0] = step[0];
+ output[1] = step[8];
+ output[2] = step[4];
+ output[3] = step[12];
+ output[4] = step[2];
+ output[5] = step[10];
+ output[6] = step[6];
+ output[7] = step[14];
+ output[8] = step[1];
+ output[9] = step[9];
+ output[10] = step[5];
+ output[11] = step[13];
+ output[12] = step[3];
+ output[13] = step[11];
+ output[14] = step[7];
+ output[15] = step[15];
+
+ range_check(output, 16, 16);
+}
+
+#if CONFIG_EXT_TX
+static void fdct32(const tran_low_t *input, tran_low_t *output) {
+ tran_high_t temp;
+ tran_low_t step[32];
+
+ // stage 0
+ range_check(input, 32, 14);
+
+ // stage 1
+ output[0] = input[0] + input[31];
+ output[1] = input[1] + input[30];
+ output[2] = input[2] + input[29];
+ output[3] = input[3] + input[28];
+ output[4] = input[4] + input[27];
+ output[5] = input[5] + input[26];
+ output[6] = input[6] + input[25];
+ output[7] = input[7] + input[24];
+ output[8] = input[8] + input[23];
+ output[9] = input[9] + input[22];
+ output[10] = input[10] + input[21];
+ output[11] = input[11] + input[20];
+ output[12] = input[12] + input[19];
+ output[13] = input[13] + input[18];
+ output[14] = input[14] + input[17];
+ output[15] = input[15] + input[16];
+ output[16] = input[15] - input[16];
+ output[17] = input[14] - input[17];
+ output[18] = input[13] - input[18];
+ output[19] = input[12] - input[19];
+ output[20] = input[11] - input[20];
+ output[21] = input[10] - input[21];
+ output[22] = input[9] - input[22];
+ output[23] = input[8] - input[23];
+ output[24] = input[7] - input[24];
+ output[25] = input[6] - input[25];
+ output[26] = input[5] - input[26];
+ output[27] = input[4] - input[27];
+ output[28] = input[3] - input[28];
+ output[29] = input[2] - input[29];
+ output[30] = input[1] - input[30];
+ output[31] = input[0] - input[31];
+
+ range_check(output, 32, 15);
+
+ // stage 2
+ step[0] = output[0] + output[15];
+ step[1] = output[1] + output[14];
+ step[2] = output[2] + output[13];
+ step[3] = output[3] + output[12];
+ step[4] = output[4] + output[11];
+ step[5] = output[5] + output[10];
+ step[6] = output[6] + output[9];
+ step[7] = output[7] + output[8];
+ step[8] = output[7] - output[8];
+ step[9] = output[6] - output[9];
+ step[10] = output[5] - output[10];
+ step[11] = output[4] - output[11];
+ step[12] = output[3] - output[12];
+ step[13] = output[2] - output[13];
+ step[14] = output[1] - output[14];
+ step[15] = output[0] - output[15];
+ step[16] = output[16];
+ step[17] = output[17];
+ step[18] = output[18];
+ step[19] = output[19];
+ temp = output[20] * -cospi_16_64 + output[27] * cospi_16_64;
+ step[20] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[21] * -cospi_16_64 + output[26] * cospi_16_64;
+ step[21] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[22] * -cospi_16_64 + output[25] * cospi_16_64;
+ step[22] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[23] * -cospi_16_64 + output[24] * cospi_16_64;
+ step[23] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[24] * cospi_16_64 + output[23] * cospi_16_64;
+ step[24] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[25] * cospi_16_64 + output[22] * cospi_16_64;
+ step[25] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[26] * cospi_16_64 + output[21] * cospi_16_64;
+ step[26] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[27] * cospi_16_64 + output[20] * cospi_16_64;
+ step[27] = (tran_low_t)fdct_round_shift(temp);
+ step[28] = output[28];
+ step[29] = output[29];
+ step[30] = output[30];
+ step[31] = output[31];
+
+ range_check(step, 32, 16);
+
+ // stage 3
+ output[0] = step[0] + step[7];
+ output[1] = step[1] + step[6];
+ output[2] = step[2] + step[5];
+ output[3] = step[3] + step[4];
+ output[4] = step[3] - step[4];
+ output[5] = step[2] - step[5];
+ output[6] = step[1] - step[6];
+ output[7] = step[0] - step[7];
+ output[8] = step[8];
+ output[9] = step[9];
+ temp = step[10] * -cospi_16_64 + step[13] * cospi_16_64;
+ output[10] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[11] * -cospi_16_64 + step[12] * cospi_16_64;
+ output[11] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[12] * cospi_16_64 + step[11] * cospi_16_64;
+ output[12] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[13] * cospi_16_64 + step[10] * cospi_16_64;
+ output[13] = (tran_low_t)fdct_round_shift(temp);
+ output[14] = step[14];
+ output[15] = step[15];
+ output[16] = step[16] + step[23];
+ output[17] = step[17] + step[22];
+ output[18] = step[18] + step[21];
+ output[19] = step[19] + step[20];
+ output[20] = step[19] - step[20];
+ output[21] = step[18] - step[21];
+ output[22] = step[17] - step[22];
+ output[23] = step[16] - step[23];
+ output[24] = step[31] - step[24];
+ output[25] = step[30] - step[25];
+ output[26] = step[29] - step[26];
+ output[27] = step[28] - step[27];
+ output[28] = step[28] + step[27];
+ output[29] = step[29] + step[26];
+ output[30] = step[30] + step[25];
+ output[31] = step[31] + step[24];
+
+ range_check(output, 32, 17);
+
+ // stage 4
+ step[0] = output[0] + output[3];
+ step[1] = output[1] + output[2];
+ step[2] = output[1] - output[2];
+ step[3] = output[0] - output[3];
+ step[4] = output[4];
+ temp = output[5] * -cospi_16_64 + output[6] * cospi_16_64;
+ step[5] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[6] * cospi_16_64 + output[5] * cospi_16_64;
+ step[6] = (tran_low_t)fdct_round_shift(temp);
+ step[7] = output[7];
+ step[8] = output[8] + output[11];
+ step[9] = output[9] + output[10];
+ step[10] = output[9] - output[10];
+ step[11] = output[8] - output[11];
+ step[12] = output[15] - output[12];
+ step[13] = output[14] - output[13];
+ step[14] = output[14] + output[13];
+ step[15] = output[15] + output[12];
+ step[16] = output[16];
+ step[17] = output[17];
+ temp = output[18] * -cospi_8_64 + output[29] * cospi_24_64;
+ step[18] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[19] * -cospi_8_64 + output[28] * cospi_24_64;
+ step[19] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[20] * -cospi_24_64 + output[27] * -cospi_8_64;
+ step[20] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[21] * -cospi_24_64 + output[26] * -cospi_8_64;
+ step[21] = (tran_low_t)fdct_round_shift(temp);
+ step[22] = output[22];
+ step[23] = output[23];
+ step[24] = output[24];
+ step[25] = output[25];
+ temp = output[26] * cospi_24_64 + output[21] * -cospi_8_64;
+ step[26] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[27] * cospi_24_64 + output[20] * -cospi_8_64;
+ step[27] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[28] * cospi_8_64 + output[19] * cospi_24_64;
+ step[28] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[29] * cospi_8_64 + output[18] * cospi_24_64;
+ step[29] = (tran_low_t)fdct_round_shift(temp);
+ step[30] = output[30];
+ step[31] = output[31];
+
+ range_check(step, 32, 18);
+
+ // stage 5
+ temp = step[0] * cospi_16_64 + step[1] * cospi_16_64;
+ output[0] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[1] * -cospi_16_64 + step[0] * cospi_16_64;
+ output[1] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[2] * cospi_24_64 + step[3] * cospi_8_64;
+ output[2] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[3] * cospi_24_64 + step[2] * -cospi_8_64;
+ output[3] = (tran_low_t)fdct_round_shift(temp);
+ output[4] = step[4] + step[5];
+ output[5] = step[4] - step[5];
+ output[6] = step[7] - step[6];
+ output[7] = step[7] + step[6];
+ output[8] = step[8];
+ temp = step[9] * -cospi_8_64 + step[14] * cospi_24_64;
+ output[9] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[10] * -cospi_24_64 + step[13] * -cospi_8_64;
+ output[10] = (tran_low_t)fdct_round_shift(temp);
+ output[11] = step[11];
+ output[12] = step[12];
+ temp = step[13] * cospi_24_64 + step[10] * -cospi_8_64;
+ output[13] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[14] * cospi_8_64 + step[9] * cospi_24_64;
+ output[14] = (tran_low_t)fdct_round_shift(temp);
+ output[15] = step[15];
+ output[16] = step[16] + step[19];
+ output[17] = step[17] + step[18];
+ output[18] = step[17] - step[18];
+ output[19] = step[16] - step[19];
+ output[20] = step[23] - step[20];
+ output[21] = step[22] - step[21];
+ output[22] = step[22] + step[21];
+ output[23] = step[23] + step[20];
+ output[24] = step[24] + step[27];
+ output[25] = step[25] + step[26];
+ output[26] = step[25] - step[26];
+ output[27] = step[24] - step[27];
+ output[28] = step[31] - step[28];
+ output[29] = step[30] - step[29];
+ output[30] = step[30] + step[29];
+ output[31] = step[31] + step[28];
+
+ range_check(output, 32, 18);
+
+ // stage 6
+ step[0] = output[0];
+ step[1] = output[1];
+ step[2] = output[2];
+ step[3] = output[3];
+ temp = output[4] * cospi_28_64 + output[7] * cospi_4_64;
+ step[4] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[5] * cospi_12_64 + output[6] * cospi_20_64;
+ step[5] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[6] * cospi_12_64 + output[5] * -cospi_20_64;
+ step[6] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[7] * cospi_28_64 + output[4] * -cospi_4_64;
+ step[7] = (tran_low_t)fdct_round_shift(temp);
+ step[8] = output[8] + output[9];
+ step[9] = output[8] - output[9];
+ step[10] = output[11] - output[10];
+ step[11] = output[11] + output[10];
+ step[12] = output[12] + output[13];
+ step[13] = output[12] - output[13];
+ step[14] = output[15] - output[14];
+ step[15] = output[15] + output[14];
+ step[16] = output[16];
+ temp = output[17] * -cospi_4_64 + output[30] * cospi_28_64;
+ step[17] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[18] * -cospi_28_64 + output[29] * -cospi_4_64;
+ step[18] = (tran_low_t)fdct_round_shift(temp);
+ step[19] = output[19];
+ step[20] = output[20];
+ temp = output[21] * -cospi_20_64 + output[26] * cospi_12_64;
+ step[21] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[22] * -cospi_12_64 + output[25] * -cospi_20_64;
+ step[22] = (tran_low_t)fdct_round_shift(temp);
+ step[23] = output[23];
+ step[24] = output[24];
+ temp = output[25] * cospi_12_64 + output[22] * -cospi_20_64;
+ step[25] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[26] * cospi_20_64 + output[21] * cospi_12_64;
+ step[26] = (tran_low_t)fdct_round_shift(temp);
+ step[27] = output[27];
+ step[28] = output[28];
+ temp = output[29] * cospi_28_64 + output[18] * -cospi_4_64;
+ step[29] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[30] * cospi_4_64 + output[17] * cospi_28_64;
+ step[30] = (tran_low_t)fdct_round_shift(temp);
+ step[31] = output[31];
+
+ range_check(step, 32, 18);
+
+ // stage 7
+ output[0] = step[0];
+ output[1] = step[1];
+ output[2] = step[2];
+ output[3] = step[3];
+ output[4] = step[4];
+ output[5] = step[5];
+ output[6] = step[6];
+ output[7] = step[7];
+ temp = step[8] * cospi_30_64 + step[15] * cospi_2_64;
+ output[8] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[9] * cospi_14_64 + step[14] * cospi_18_64;
+ output[9] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[10] * cospi_22_64 + step[13] * cospi_10_64;
+ output[10] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[11] * cospi_6_64 + step[12] * cospi_26_64;
+ output[11] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[12] * cospi_6_64 + step[11] * -cospi_26_64;
+ output[12] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[13] * cospi_22_64 + step[10] * -cospi_10_64;
+ output[13] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[14] * cospi_14_64 + step[9] * -cospi_18_64;
+ output[14] = (tran_low_t)fdct_round_shift(temp);
+ temp = step[15] * cospi_30_64 + step[8] * -cospi_2_64;
+ output[15] = (tran_low_t)fdct_round_shift(temp);
+ output[16] = step[16] + step[17];
+ output[17] = step[16] - step[17];
+ output[18] = step[19] - step[18];
+ output[19] = step[19] + step[18];
+ output[20] = step[20] + step[21];
+ output[21] = step[20] - step[21];
+ output[22] = step[23] - step[22];
+ output[23] = step[23] + step[22];
+ output[24] = step[24] + step[25];
+ output[25] = step[24] - step[25];
+ output[26] = step[27] - step[26];
+ output[27] = step[27] + step[26];
+ output[28] = step[28] + step[29];
+ output[29] = step[28] - step[29];
+ output[30] = step[31] - step[30];
+ output[31] = step[31] + step[30];
+
+ range_check(output, 32, 18);
+
+ // stage 8
+ step[0] = output[0];
+ step[1] = output[1];
+ step[2] = output[2];
+ step[3] = output[3];
+ step[4] = output[4];
+ step[5] = output[5];
+ step[6] = output[6];
+ step[7] = output[7];
+ step[8] = output[8];
+ step[9] = output[9];
+ step[10] = output[10];
+ step[11] = output[11];
+ step[12] = output[12];
+ step[13] = output[13];
+ step[14] = output[14];
+ step[15] = output[15];
+ temp = output[16] * cospi_31_64 + output[31] * cospi_1_64;
+ step[16] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[17] * cospi_15_64 + output[30] * cospi_17_64;
+ step[17] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[18] * cospi_23_64 + output[29] * cospi_9_64;
+ step[18] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[19] * cospi_7_64 + output[28] * cospi_25_64;
+ step[19] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[20] * cospi_27_64 + output[27] * cospi_5_64;
+ step[20] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[21] * cospi_11_64 + output[26] * cospi_21_64;
+ step[21] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[22] * cospi_19_64 + output[25] * cospi_13_64;
+ step[22] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[23] * cospi_3_64 + output[24] * cospi_29_64;
+ step[23] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[24] * cospi_3_64 + output[23] * -cospi_29_64;
+ step[24] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[25] * cospi_19_64 + output[22] * -cospi_13_64;
+ step[25] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[26] * cospi_11_64 + output[21] * -cospi_21_64;
+ step[26] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[27] * cospi_27_64 + output[20] * -cospi_5_64;
+ step[27] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[28] * cospi_7_64 + output[19] * -cospi_25_64;
+ step[28] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[29] * cospi_23_64 + output[18] * -cospi_9_64;
+ step[29] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[30] * cospi_15_64 + output[17] * -cospi_17_64;
+ step[30] = (tran_low_t)fdct_round_shift(temp);
+ temp = output[31] * cospi_31_64 + output[16] * -cospi_1_64;
+ step[31] = (tran_low_t)fdct_round_shift(temp);
+
+ range_check(step, 32, 18);
+
+ // stage 9
+ output[0] = step[0];
+ output[1] = step[16];
+ output[2] = step[8];
+ output[3] = step[24];
+ output[4] = step[4];
+ output[5] = step[20];
+ output[6] = step[12];
+ output[7] = step[28];
+ output[8] = step[2];
+ output[9] = step[18];
+ output[10] = step[10];
+ output[11] = step[26];
+ output[12] = step[6];
+ output[13] = step[22];
+ output[14] = step[14];
+ output[15] = step[30];
+ output[16] = step[1];
+ output[17] = step[17];
+ output[18] = step[9];
+ output[19] = step[25];
+ output[20] = step[5];
+ output[21] = step[21];
+ output[22] = step[13];
+ output[23] = step[29];
+ output[24] = step[3];
+ output[25] = step[19];
+ output[26] = step[11];
+ output[27] = step[27];
+ output[28] = step[7];
+ output[29] = step[23];
+ output[30] = step[15];
+ output[31] = step[31];
+
+ range_check(output, 32, 18);
+}
+#endif // CONFIG_EXT_TX
+
+static void fadst4(const tran_low_t *input, tran_low_t *output) {
+ tran_high_t x0, x1, x2, x3;
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+ x0 = input[0];
+ x1 = input[1];
+ x2 = input[2];
+ x3 = input[3];
+
+ if (!(x0 | x1 | x2 | x3)) {
+ output[0] = output[1] = output[2] = output[3] = 0;
+ return;
+ }
+
+ s0 = sinpi_1_9 * x0;
+ s1 = sinpi_4_9 * x0;
+ s2 = sinpi_2_9 * x1;
+ s3 = sinpi_1_9 * x1;
+ s4 = sinpi_3_9 * x2;
+ s5 = sinpi_4_9 * x3;
+ s6 = sinpi_2_9 * x3;
+ s7 = x0 + x1 - x3;
+
+ x0 = s0 + s2 + s5;
+ x1 = sinpi_3_9 * s7;
+ x2 = s1 - s3 + s6;
+ x3 = s4;
+
+ s0 = x0 + x3;
+ s1 = x1;
+ s2 = x2 - x3;
+ s3 = x2 - x0 + x3;
+
+ // 1-D transform scaling factor is sqrt(2).
+ output[0] = (tran_low_t)fdct_round_shift(s0);
+ output[1] = (tran_low_t)fdct_round_shift(s1);
+ output[2] = (tran_low_t)fdct_round_shift(s2);
+ output[3] = (tran_low_t)fdct_round_shift(s3);
+}
+
+static void fadst8(const tran_low_t *input, tran_low_t *output) {
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+ tran_high_t x0 = input[7];
+ tran_high_t x1 = input[0];
+ tran_high_t x2 = input[5];
+ tran_high_t x3 = input[2];
+ tran_high_t x4 = input[3];
+ tran_high_t x5 = input[4];
+ tran_high_t x6 = input[1];
+ tran_high_t x7 = input[6];
+
+ // stage 1
+ s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
+ s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
+ s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
+ s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
+ s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
+ s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
+ s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
+ s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
+
+ x0 = fdct_round_shift(s0 + s4);
+ x1 = fdct_round_shift(s1 + s5);
+ x2 = fdct_round_shift(s2 + s6);
+ x3 = fdct_round_shift(s3 + s7);
+ x4 = fdct_round_shift(s0 - s4);
+ x5 = fdct_round_shift(s1 - s5);
+ x6 = fdct_round_shift(s2 - s6);
+ x7 = fdct_round_shift(s3 - s7);
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
+ s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
+ s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
+ s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
+
+ x0 = s0 + s2;
+ x1 = s1 + s3;
+ x2 = s0 - s2;
+ x3 = s1 - s3;
+ x4 = fdct_round_shift(s4 + s6);
+ x5 = fdct_round_shift(s5 + s7);
+ x6 = fdct_round_shift(s4 - s6);
+ x7 = fdct_round_shift(s5 - s7);
+
+ // stage 3
+ s2 = cospi_16_64 * (x2 + x3);
+ s3 = cospi_16_64 * (x2 - x3);
+ s6 = cospi_16_64 * (x6 + x7);
+ s7 = cospi_16_64 * (x6 - x7);
+
+ x2 = fdct_round_shift(s2);
+ x3 = fdct_round_shift(s3);
+ x6 = fdct_round_shift(s6);
+ x7 = fdct_round_shift(s7);
+
+ output[0] = (tran_low_t)x0;
+ output[1] = (tran_low_t)-x4;
+ output[2] = (tran_low_t)x6;
+ output[3] = (tran_low_t)-x2;
+ output[4] = (tran_low_t)x3;
+ output[5] = (tran_low_t)-x7;
+ output[6] = (tran_low_t)x5;
+ output[7] = (tran_low_t)-x1;
+}
+
+static void fadst16(const tran_low_t *input, tran_low_t *output) {
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
+ tran_high_t s9, s10, s11, s12, s13, s14, s15;
+
+ tran_high_t x0 = input[15];
+ tran_high_t x1 = input[0];
+ tran_high_t x2 = input[13];
+ tran_high_t x3 = input[2];
+ tran_high_t x4 = input[11];
+ tran_high_t x5 = input[4];
+ tran_high_t x6 = input[9];
+ tran_high_t x7 = input[6];
+ tran_high_t x8 = input[7];
+ tran_high_t x9 = input[8];
+ tran_high_t x10 = input[5];
+ tran_high_t x11 = input[10];
+ tran_high_t x12 = input[3];
+ tran_high_t x13 = input[12];
+ tran_high_t x14 = input[1];
+ tran_high_t x15 = input[14];
+
+ // stage 1
+ s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
+ s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
+ s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
+ s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
+ s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
+ s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
+ s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
+ s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
+ s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
+ s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
+ s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
+ s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
+ s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
+ s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
+ s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
+ s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
+
+ x0 = fdct_round_shift(s0 + s8);
+ x1 = fdct_round_shift(s1 + s9);
+ x2 = fdct_round_shift(s2 + s10);
+ x3 = fdct_round_shift(s3 + s11);
+ x4 = fdct_round_shift(s4 + s12);
+ x5 = fdct_round_shift(s5 + s13);
+ x6 = fdct_round_shift(s6 + s14);
+ x7 = fdct_round_shift(s7 + s15);
+ x8 = fdct_round_shift(s0 - s8);
+ x9 = fdct_round_shift(s1 - s9);
+ x10 = fdct_round_shift(s2 - s10);
+ x11 = fdct_round_shift(s3 - s11);
+ x12 = fdct_round_shift(s4 - s12);
+ x13 = fdct_round_shift(s5 - s13);
+ x14 = fdct_round_shift(s6 - s14);
+ x15 = fdct_round_shift(s7 - s15);
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4;
+ s5 = x5;
+ s6 = x6;
+ s7 = x7;
+ s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
+ s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
+ s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
+ s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
+ s12 = -x12 * cospi_28_64 + x13 * cospi_4_64;
+ s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
+ s14 = -x14 * cospi_12_64 + x15 * cospi_20_64;
+ s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
+
+ x0 = s0 + s4;
+ x1 = s1 + s5;
+ x2 = s2 + s6;
+ x3 = s3 + s7;
+ x4 = s0 - s4;
+ x5 = s1 - s5;
+ x6 = s2 - s6;
+ x7 = s3 - s7;
+ x8 = fdct_round_shift(s8 + s12);
+ x9 = fdct_round_shift(s9 + s13);
+ x10 = fdct_round_shift(s10 + s14);
+ x11 = fdct_round_shift(s11 + s15);
+ x12 = fdct_round_shift(s8 - s12);
+ x13 = fdct_round_shift(s9 - s13);
+ x14 = fdct_round_shift(s10 - s14);
+ x15 = fdct_round_shift(s11 - s15);
+
+ // stage 3
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
+ s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
+ s6 = -x6 * cospi_24_64 + x7 * cospi_8_64;
+ s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
+ s8 = x8;
+ s9 = x9;
+ s10 = x10;
+ s11 = x11;
+ s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
+ s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
+ s14 = -x14 * cospi_24_64 + x15 * cospi_8_64;
+ s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
+
+ x0 = s0 + s2;
+ x1 = s1 + s3;
+ x2 = s0 - s2;
+ x3 = s1 - s3;
+ x4 = fdct_round_shift(s4 + s6);
+ x5 = fdct_round_shift(s5 + s7);
+ x6 = fdct_round_shift(s4 - s6);
+ x7 = fdct_round_shift(s5 - s7);
+ x8 = s8 + s10;
+ x9 = s9 + s11;
+ x10 = s8 - s10;
+ x11 = s9 - s11;
+ x12 = fdct_round_shift(s12 + s14);
+ x13 = fdct_round_shift(s13 + s15);
+ x14 = fdct_round_shift(s12 - s14);
+ x15 = fdct_round_shift(s13 - s15);
+
+ // stage 4
+ s2 = (-cospi_16_64) * (x2 + x3);
+ s3 = cospi_16_64 * (x2 - x3);
+ s6 = cospi_16_64 * (x6 + x7);
+ s7 = cospi_16_64 * (-x6 + x7);
+ s10 = cospi_16_64 * (x10 + x11);
+ s11 = cospi_16_64 * (-x10 + x11);
+ s14 = (-cospi_16_64) * (x14 + x15);
+ s15 = cospi_16_64 * (x14 - x15);
+
+ x2 = fdct_round_shift(s2);
+ x3 = fdct_round_shift(s3);
+ x6 = fdct_round_shift(s6);
+ x7 = fdct_round_shift(s7);
+ x10 = fdct_round_shift(s10);
+ x11 = fdct_round_shift(s11);
+ x14 = fdct_round_shift(s14);
+ x15 = fdct_round_shift(s15);
+
+ output[0] = (tran_low_t)x0;
+ output[1] = (tran_low_t)-x8;
+ output[2] = (tran_low_t)x12;
+ output[3] = (tran_low_t)-x4;
+ output[4] = (tran_low_t)x6;
+ output[5] = (tran_low_t)x14;
+ output[6] = (tran_low_t)x10;
+ output[7] = (tran_low_t)x2;
+ output[8] = (tran_low_t)x3;
+ output[9] = (tran_low_t)x11;
+ output[10] = (tran_low_t)x15;
+ output[11] = (tran_low_t)x7;
+ output[12] = (tran_low_t)x5;
+ output[13] = (tran_low_t)-x13;
+ output[14] = (tran_low_t)x9;
+ output[15] = (tran_low_t)-x1;
+}
+
+#if CONFIG_EXT_TX
+static void fidtx4(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 4; ++i)
+ output[i] = (tran_low_t)fdct_round_shift(input[i] * Sqrt2);
+}
+
+static void fidtx8(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 8; ++i) output[i] = input[i] * 2;
+}
+
+static void fidtx16(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 16; ++i)
+ output[i] = (tran_low_t)fdct_round_shift(input[i] * 2 * Sqrt2);
+}
+
+static void fidtx32(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
+}
+
+// For use in lieu of ADST
+static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 16; ++i) {
+ output[16 + i] = input[i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 16] * Sqrt2);
+ }
+ fdct16(inputhalf, output);
+ // Note overall scaling factor is 4 times orthogonal
+}
+
+static void copy_block(const int16_t *src, int src_stride, int l, int w,
+ int16_t *dest, int dest_stride) {
+ int i;
+ for (i = 0; i < l; ++i) {
+ memcpy(dest + dest_stride * i, src + src_stride * i, w * sizeof(int16_t));
+ }
+}
+
+static void fliplr(int16_t *dest, int stride, int l, int w) {
+ int i, j;
+ for (i = 0; i < l; ++i) {
+ for (j = 0; j < w / 2; ++j) {
+ const int16_t tmp = dest[i * stride + j];
+ dest[i * stride + j] = dest[i * stride + w - 1 - j];
+ dest[i * stride + w - 1 - j] = tmp;
+ }
+ }
+}
+
+static void flipud(int16_t *dest, int stride, int l, int w) {
+ int i, j;
+ for (j = 0; j < w; ++j) {
+ for (i = 0; i < l / 2; ++i) {
+ const int16_t tmp = dest[i * stride + j];
+ dest[i * stride + j] = dest[(l - 1 - i) * stride + j];
+ dest[(l - 1 - i) * stride + j] = tmp;
+ }
+ }
+}
+
+static void fliplrud(int16_t *dest, int stride, int l, int w) {
+ int i, j;
+ for (i = 0; i < l / 2; ++i) {
+ for (j = 0; j < w; ++j) {
+ const int16_t tmp = dest[i * stride + j];
+ dest[i * stride + j] = dest[(l - 1 - i) * stride + w - 1 - j];
+ dest[(l - 1 - i) * stride + w - 1 - j] = tmp;
+ }
+ }
+}
+
+static void copy_fliplr(const int16_t *src, int src_stride, int l, int w,
+ int16_t *dest, int dest_stride) {
+ copy_block(src, src_stride, l, w, dest, dest_stride);
+ fliplr(dest, dest_stride, l, w);
+}
+
+static void copy_flipud(const int16_t *src, int src_stride, int l, int w,
+ int16_t *dest, int dest_stride) {
+ copy_block(src, src_stride, l, w, dest, dest_stride);
+ flipud(dest, dest_stride, l, w);
+}
+
+static void copy_fliplrud(const int16_t *src, int src_stride, int l, int w,
+ int16_t *dest, int dest_stride) {
+ copy_block(src, src_stride, l, w, dest, dest_stride);
+ fliplrud(dest, dest_stride, l, w);
+}
+
+static void maybe_flip_input(const int16_t **src, int *src_stride, int l, int w,
+ int16_t *buff, int tx_type) {
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case IDTX:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST: break;
+ case FLIPADST_DCT:
+ case FLIPADST_ADST:
+ case V_FLIPADST:
+ copy_flipud(*src, *src_stride, l, w, buff, w);
+ *src = buff;
+ *src_stride = w;
+ break;
+ case DCT_FLIPADST:
+ case ADST_FLIPADST:
+ case H_FLIPADST:
+ copy_fliplr(*src, *src_stride, l, w, buff, w);
+ *src = buff;
+ *src_stride = w;
+ break;
+ case FLIPADST_FLIPADST:
+ copy_fliplrud(*src, *src_stride, l, w, buff, w);
+ *src = buff;
+ *src_stride = w;
+ break;
+ default: assert(0); break;
+ }
+}
+#endif // CONFIG_EXT_TX
+
+static const transform_2d FHT_4[] = {
+ { fdct4, fdct4 }, // DCT_DCT
+ { fadst4, fdct4 }, // ADST_DCT
+ { fdct4, fadst4 }, // DCT_ADST
+ { fadst4, fadst4 }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { fadst4, fdct4 }, // FLIPADST_DCT
+ { fdct4, fadst4 }, // DCT_FLIPADST
+ { fadst4, fadst4 }, // FLIPADST_FLIPADST
+ { fadst4, fadst4 }, // ADST_FLIPADST
+ { fadst4, fadst4 }, // FLIPADST_ADST
+ { fidtx4, fidtx4 }, // IDTX
+ { fdct4, fidtx4 }, // V_DCT
+ { fidtx4, fdct4 }, // H_DCT
+ { fadst4, fidtx4 }, // V_ADST
+ { fidtx4, fadst4 }, // H_ADST
+ { fadst4, fidtx4 }, // V_FLIPADST
+ { fidtx4, fadst4 }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+};
+
+static const transform_2d FHT_8[] = {
+ { fdct8, fdct8 }, // DCT_DCT
+ { fadst8, fdct8 }, // ADST_DCT
+ { fdct8, fadst8 }, // DCT_ADST
+ { fadst8, fadst8 }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { fadst8, fdct8 }, // FLIPADST_DCT
+ { fdct8, fadst8 }, // DCT_FLIPADST
+ { fadst8, fadst8 }, // FLIPADST_FLIPADST
+ { fadst8, fadst8 }, // ADST_FLIPADST
+ { fadst8, fadst8 }, // FLIPADST_ADST
+ { fidtx8, fidtx8 }, // IDTX
+ { fdct8, fidtx8 }, // V_DCT
+ { fidtx8, fdct8 }, // H_DCT
+ { fadst8, fidtx8 }, // V_ADST
+ { fidtx8, fadst8 }, // H_ADST
+ { fadst8, fidtx8 }, // V_FLIPADST
+ { fidtx8, fadst8 }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+};
+
+static const transform_2d FHT_16[] = {
+ { fdct16, fdct16 }, // DCT_DCT
+ { fadst16, fdct16 }, // ADST_DCT
+ { fdct16, fadst16 }, // DCT_ADST
+ { fadst16, fadst16 }, // ADST_ADST
+#if CONFIG_EXT_TX
+ { fadst16, fdct16 }, // FLIPADST_DCT
+ { fdct16, fadst16 }, // DCT_FLIPADST
+ { fadst16, fadst16 }, // FLIPADST_FLIPADST
+ { fadst16, fadst16 }, // ADST_FLIPADST
+ { fadst16, fadst16 }, // FLIPADST_ADST
+ { fidtx16, fidtx16 }, // IDTX
+ { fdct16, fidtx16 }, // V_DCT
+ { fidtx16, fdct16 }, // H_DCT
+ { fadst16, fidtx16 }, // V_ADST
+ { fidtx16, fadst16 }, // H_ADST
+ { fadst16, fidtx16 }, // V_FLIPADST
+ { fidtx16, fadst16 }, // H_FLIPADST
+#endif // CONFIG_EXT_TX
+};
+
+#if CONFIG_EXT_TX
+static const transform_2d FHT_32[] = {
+ { fdct32, fdct32 }, // DCT_DCT
+ { fhalfright32, fdct32 }, // ADST_DCT
+ { fdct32, fhalfright32 }, // DCT_ADST
+ { fhalfright32, fhalfright32 }, // ADST_ADST
+ { fhalfright32, fdct32 }, // FLIPADST_DCT
+ { fdct32, fhalfright32 }, // DCT_FLIPADST
+ { fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST
+ { fhalfright32, fhalfright32 }, // ADST_FLIPADST
+ { fhalfright32, fhalfright32 }, // FLIPADST_ADST
+ { fidtx32, fidtx32 }, // IDTX
+ { fdct32, fidtx32 }, // V_DCT
+ { fidtx32, fdct32 }, // H_DCT
+ { fhalfright32, fidtx32 }, // V_ADST
+ { fidtx32, fhalfright32 }, // H_ADST
+ { fhalfright32, fidtx32 }, // V_FLIPADST
+ { fidtx32, fhalfright32 }, // H_FLIPADST
+};
+
+static const transform_2d FHT_4x8[] = {
+ { fdct8, fdct4 }, // DCT_DCT
+ { fadst8, fdct4 }, // ADST_DCT
+ { fdct8, fadst4 }, // DCT_ADST
+ { fadst8, fadst4 }, // ADST_ADST
+ { fadst8, fdct4 }, // FLIPADST_DCT
+ { fdct8, fadst4 }, // DCT_FLIPADST
+ { fadst8, fadst4 }, // FLIPADST_FLIPADST
+ { fadst8, fadst4 }, // ADST_FLIPADST
+ { fadst8, fadst4 }, // FLIPADST_ADST
+ { fidtx8, fidtx4 }, // IDTX
+ { fdct8, fidtx4 }, // V_DCT
+ { fidtx8, fdct4 }, // H_DCT
+ { fadst8, fidtx4 }, // V_ADST
+ { fidtx8, fadst4 }, // H_ADST
+ { fadst8, fidtx4 }, // V_FLIPADST
+ { fidtx8, fadst4 }, // H_FLIPADST
+};
+
+static const transform_2d FHT_8x4[] = {
+ { fdct4, fdct8 }, // DCT_DCT
+ { fadst4, fdct8 }, // ADST_DCT
+ { fdct4, fadst8 }, // DCT_ADST
+ { fadst4, fadst8 }, // ADST_ADST
+ { fadst4, fdct8 }, // FLIPADST_DCT
+ { fdct4, fadst8 }, // DCT_FLIPADST
+ { fadst4, fadst8 }, // FLIPADST_FLIPADST
+ { fadst4, fadst8 }, // ADST_FLIPADST
+ { fadst4, fadst8 }, // FLIPADST_ADST
+ { fidtx4, fidtx8 }, // IDTX
+ { fdct4, fidtx8 }, // V_DCT
+ { fidtx4, fdct8 }, // H_DCT
+ { fadst4, fidtx8 }, // V_ADST
+ { fidtx4, fadst8 }, // H_ADST
+ { fadst4, fidtx8 }, // V_FLIPADST
+ { fidtx4, fadst8 }, // H_FLIPADST
+};
+
+static const transform_2d FHT_8x16[] = {
+ { fdct16, fdct8 }, // DCT_DCT
+ { fadst16, fdct8 }, // ADST_DCT
+ { fdct16, fadst8 }, // DCT_ADST
+ { fadst16, fadst8 }, // ADST_ADST
+ { fadst16, fdct8 }, // FLIPADST_DCT
+ { fdct16, fadst8 }, // DCT_FLIPADST
+ { fadst16, fadst8 }, // FLIPADST_FLIPADST
+ { fadst16, fadst8 }, // ADST_FLIPADST
+ { fadst16, fadst8 }, // FLIPADST_ADST
+ { fidtx16, fidtx8 }, // IDTX
+ { fdct16, fidtx8 }, // V_DCT
+ { fidtx16, fdct8 }, // H_DCT
+ { fadst16, fidtx8 }, // V_ADST
+ { fidtx16, fadst8 }, // H_ADST
+ { fadst16, fidtx8 }, // V_FLIPADST
+ { fidtx16, fadst8 }, // H_FLIPADST
+};
+
+static const transform_2d FHT_16x8[] = {
+ { fdct8, fdct16 }, // DCT_DCT
+ { fadst8, fdct16 }, // ADST_DCT
+ { fdct8, fadst16 }, // DCT_ADST
+ { fadst8, fadst16 }, // ADST_ADST
+ { fadst8, fdct16 }, // FLIPADST_DCT
+ { fdct8, fadst16 }, // DCT_FLIPADST
+ { fadst8, fadst16 }, // FLIPADST_FLIPADST
+ { fadst8, fadst16 }, // ADST_FLIPADST
+ { fadst8, fadst16 }, // FLIPADST_ADST
+ { fidtx8, fidtx16 }, // IDTX
+ { fdct8, fidtx16 }, // V_DCT
+ { fidtx8, fdct16 }, // H_DCT
+ { fadst8, fidtx16 }, // V_ADST
+ { fidtx8, fadst16 }, // H_ADST
+ { fadst8, fidtx16 }, // V_FLIPADST
+ { fidtx8, fadst16 }, // H_FLIPADST
+};
+
+static const transform_2d FHT_16x32[] = {
+ { fdct32, fdct16 }, // DCT_DCT
+ { fhalfright32, fdct16 }, // ADST_DCT
+ { fdct32, fadst16 }, // DCT_ADST
+ { fhalfright32, fadst16 }, // ADST_ADST
+ { fhalfright32, fdct16 }, // FLIPADST_DCT
+ { fdct32, fadst16 }, // DCT_FLIPADST
+ { fhalfright32, fadst16 }, // FLIPADST_FLIPADST
+ { fhalfright32, fadst16 }, // ADST_FLIPADST
+ { fhalfright32, fadst16 }, // FLIPADST_ADST
+ { fidtx32, fidtx16 }, // IDTX
+ { fdct32, fidtx16 }, // V_DCT
+ { fidtx32, fdct16 }, // H_DCT
+ { fhalfright32, fidtx16 }, // V_ADST
+ { fidtx32, fadst16 }, // H_ADST
+ { fhalfright32, fidtx16 }, // V_FLIPADST
+ { fidtx32, fadst16 }, // H_FLIPADST
+};
+
+static const transform_2d FHT_32x16[] = {
+ { fdct16, fdct32 }, // DCT_DCT
+ { fadst16, fdct32 }, // ADST_DCT
+ { fdct16, fhalfright32 }, // DCT_ADST
+ { fadst16, fhalfright32 }, // ADST_ADST
+ { fadst16, fdct32 }, // FLIPADST_DCT
+ { fdct16, fhalfright32 }, // DCT_FLIPADST
+ { fadst16, fhalfright32 }, // FLIPADST_FLIPADST
+ { fadst16, fhalfright32 }, // ADST_FLIPADST
+ { fadst16, fhalfright32 }, // FLIPADST_ADST
+ { fidtx16, fidtx32 }, // IDTX
+ { fdct16, fidtx32 }, // V_DCT
+ { fidtx16, fdct32 }, // H_DCT
+ { fadst16, fidtx32 }, // V_ADST
+ { fidtx16, fhalfright32 }, // H_ADST
+ { fadst16, fidtx32 }, // V_FLIPADST
+ { fidtx16, fhalfright32 }, // H_FLIPADST
+};
+#endif // CONFIG_EXT_TX
+
+void vp10_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vpx_fdct4x4_c(input, output, stride);
+ } else {
+ tran_low_t out[4 * 4];
+ int i, j;
+ tran_low_t temp_in[4], temp_out[4];
+ const transform_2d ht = FHT_4[tx_type];
+
+#if CONFIG_EXT_TX
+ int16_t flipped_input[4 * 4];
+ maybe_flip_input(&input, &stride, 4, 4, flipped_input, tx_type);
+#endif
+
+ // Columns
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j) temp_in[j] = input[j * stride + i] * 16;
+ if (i == 0 && temp_in[0]) temp_in[0] += 1;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 4; ++j) out[j * 4 + i] = temp_out[j];
+ }
+
+ // Rows
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j) temp_in[j] = out[j + i * 4];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 4; ++j) output[j + i * 4] = (temp_out[j] + 1) >> 2;
+ }
+ }
+}
+
+#if CONFIG_EXT_TX
+void vp10_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ const int n = 4;
+ const int n2 = 8;
+ tran_low_t out[8 * 4];
+ tran_low_t temp_in[8], temp_out[8];
+ int i, j;
+ const transform_2d ht = FHT_4x8[tx_type];
+ int16_t flipped_input[8 * 4];
+ maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
+
+ // Columns
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j)
+ temp_in[j] =
+ (tran_low_t)fdct_round_shift(input[j * stride + i] * 8 * Sqrt2);
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < n2; ++j) out[j * n + i] = temp_out[j];
+ }
+
+ // Rows
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < n; ++j) output[j + i * n] = (temp_out[j] + 1) >> 2;
+ }
+ // Note: overall scale factor of transform is 8 times unitary
+}
+
+void vp10_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ const int n = 4;
+ const int n2 = 8;
+ tran_low_t out[8 * 4];
+ tran_low_t temp_in[8], temp_out[8];
+ int i, j;
+ const transform_2d ht = FHT_8x4[tx_type];
+ int16_t flipped_input[8 * 4];
+ maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
+
+ // Columns
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j)
+ temp_in[j] =
+ (tran_low_t)fdct_round_shift(input[j * stride + i] * 8 * Sqrt2);
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j];
+ }
+
+ // Rows
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < n2; ++j) output[j + i * n2] = (temp_out[j] + 1) >> 2;
+ }
+ // Note: overall scale factor of transform is 8 times unitary
+}
+
+void vp10_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ const int n = 8;
+ const int n2 = 16;
+ tran_low_t out[16 * 8];
+ tran_low_t temp_in[16], temp_out[16];
+ int i, j;
+ const transform_2d ht = FHT_8x16[tx_type];
+ int16_t flipped_input[16 * 8];
+ maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
+
+ // Columns
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j)
+ temp_in[j] =
+ (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < n2; ++j) out[j * n + i] = temp_out[j];
+ }
+
+ // Rows
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < n; ++j) output[j + i * n] = (temp_out[j] + 1) >> 2;
+ }
+ // Note: overall scale factor of transform is 8 times unitary
+}
+
+void vp10_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ const int n = 8;
+ const int n2 = 16;
+ tran_low_t out[16 * 8];
+ tran_low_t temp_in[16], temp_out[16];
+ int i, j;
+ const transform_2d ht = FHT_16x8[tx_type];
+ int16_t flipped_input[16 * 8];
+ maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
+
+ // Columns
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j)
+ temp_in[j] =
+ (tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j];
+ }
+
+ // Rows
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < n2; ++j) output[j + i * n2] = (temp_out[j] + 1) >> 2;
+ }
+ // Note: overall scale factor of transform is 8 times unitary
+}
+
+void vp10_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ const int n = 16;
+ const int n2 = 32;
+ tran_low_t out[32 * 16];
+ tran_low_t temp_in[32], temp_out[32];
+ int i, j;
+ const transform_2d ht = FHT_16x32[tx_type];
+ int16_t flipped_input[32 * 16];
+ maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
+
+ // Columns
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j)
+ temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * Sqrt2);
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < n2; ++j) out[j * n + i] = temp_out[j];
+ }
+
+ // Rows
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j) temp_in[j] = out[j + i * n];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < n; ++j)
+ output[j + i * n] =
+ (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
+ }
+ // Note: overall scale factor of transform is 4 times unitary
+}
+
+void vp10_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ const int n = 16;
+ const int n2 = 32;
+ tran_low_t out[32 * 16];
+ tran_low_t temp_in[32], temp_out[32];
+ int i, j;
+ const transform_2d ht = FHT_32x16[tx_type];
+ int16_t flipped_input[32 * 16];
+ maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
+
+ // Columns
+ for (i = 0; i < n2; ++i) {
+ for (j = 0; j < n; ++j)
+ temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * Sqrt2);
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < n; ++j) out[j * n2 + i] = temp_out[j];
+ }
+
+ // Rows
+ for (i = 0; i < n; ++i) {
+ for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < n2; ++j)
+ output[j + i * n2] =
+ (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
+ }
+ // Note: overall scale factor of transform is 4 times unitary
+}
+
+#endif // CONFIG_EXT_TX
+
+void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
+ tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan
+#if CONFIG_AOM_QM
+ ,
+ const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
+#endif
+ ) {
+ int eob = -1;
+
+ int i, j;
+ tran_low_t intermediate[64];
+
+ // Transform columns
+ {
+ tran_low_t *output = intermediate;
+ tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
+ tran_high_t t0, t1, t2, t3; // needs32
+ tran_high_t x0, x1, x2, x3; // canbe16
+
+ int i;
+ for (i = 0; i < 8; i++) {
+ // stage 1
+ s0 = (input[0 * stride] + input[7 * stride]) * 4;
+ s1 = (input[1 * stride] + input[6 * stride]) * 4;
+ s2 = (input[2 * stride] + input[5 * stride]) * 4;
+ s3 = (input[3 * stride] + input[4 * stride]) * 4;
+ s4 = (input[3 * stride] - input[4 * stride]) * 4;
+ s5 = (input[2 * stride] - input[5 * stride]) * 4;
+ s6 = (input[1 * stride] - input[6 * stride]) * 4;
+ s7 = (input[0 * stride] - input[7 * stride]) * 4;
+
+ // fdct4(step, step);
+ x0 = s0 + s3;
+ x1 = s1 + s2;
+ x2 = s1 - s2;
+ x3 = s0 - s3;
+ t0 = (x0 + x1) * cospi_16_64;
+ t1 = (x0 - x1) * cospi_16_64;
+ t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
+ t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
+ output[0 * 8] = (tran_low_t)fdct_round_shift(t0);
+ output[2 * 8] = (tran_low_t)fdct_round_shift(t2);
+ output[4 * 8] = (tran_low_t)fdct_round_shift(t1);
+ output[6 * 8] = (tran_low_t)fdct_round_shift(t3);
+
+ // stage 2
+ t0 = (s6 - s5) * cospi_16_64;
+ t1 = (s6 + s5) * cospi_16_64;
+ t2 = fdct_round_shift(t0);
+ t3 = fdct_round_shift(t1);
+
+ // stage 3
+ x0 = s4 + t2;
+ x1 = s4 - t2;
+ x2 = s7 - t3;
+ x3 = s7 + t3;
+
+ // stage 4
+ t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
+ t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
+ t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
+ t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
+ output[1 * 8] = (tran_low_t)fdct_round_shift(t0);
+ output[3 * 8] = (tran_low_t)fdct_round_shift(t2);
+ output[5 * 8] = (tran_low_t)fdct_round_shift(t1);
+ output[7 * 8] = (tran_low_t)fdct_round_shift(t3);
+ input++;
+ output++;
+ }
+ }
+
+ // Rows
+ for (i = 0; i < 8; ++i) {
+ fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
+ for (j = 0; j < 8; ++j) coeff_ptr[j + i * 8] /= 2;
+ }
+
+ // TODO(jingning) Decide the need of these arguments after the
+ // quantization process is completed.
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+#if CONFIG_AOM_QM
+ const qm_val_t wt = qm_ptr[rc];
+ const qm_val_t iwt = iqm_ptr[rc];
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+#endif
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ int tmp32;
+#if CONFIG_AOM_QM
+ tmp32 = (int)((tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS));
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
+#else
+ tmp32 = (int)((tmp * quant_ptr[rc != 0]) >> 16);
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+#endif
+
+ if (tmp32) eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+void vp10_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vpx_fdct8x8_c(input, output, stride);
+ } else {
+ tran_low_t out[64];
+ int i, j;
+ tran_low_t temp_in[8], temp_out[8];
+ const transform_2d ht = FHT_8[tx_type];
+
+#if CONFIG_EXT_TX
+ int16_t flipped_input[8 * 8];
+ maybe_flip_input(&input, &stride, 8, 8, flipped_input, tx_type);
+#endif
+
+ // Columns
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j) temp_in[j] = input[j * stride + i] * 4;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 8; ++j) out[j * 8 + i] = temp_out[j];
+ }
+
+ // Rows
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j) temp_in[j] = out[j + i * 8];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+ }
+ }
+}
+
+/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
+ pixel. */
+void vp10_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
+ int i;
+ tran_high_t a1, b1, c1, d1, e1;
+ const int16_t *ip_pass0 = input;
+ const tran_low_t *ip = NULL;
+ tran_low_t *op = output;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip_pass0[0 * stride];
+ b1 = ip_pass0[1 * stride];
+ c1 = ip_pass0[2 * stride];
+ d1 = ip_pass0[3 * stride];
+
+ a1 += b1;
+ d1 = d1 - c1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= c1;
+ d1 += b1;
+ op[0] = (tran_low_t)a1;
+ op[4] = (tran_low_t)c1;
+ op[8] = (tran_low_t)d1;
+ op[12] = (tran_low_t)b1;
+
+ ip_pass0++;
+ op++;
+ }
+ ip = output;
+ op = output;
+
+ for (i = 0; i < 4; i++) {
+ a1 = ip[0];
+ b1 = ip[1];
+ c1 = ip[2];
+ d1 = ip[3];
+
+ a1 += b1;
+ d1 -= c1;
+ e1 = (a1 - d1) >> 1;
+ b1 = e1 - b1;
+ c1 = e1 - c1;
+ a1 -= c1;
+ d1 += b1;
+ op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR);
+ op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR);
+ op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR);
+ op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR);
+
+ ip += 4;
+ op += 4;
+ }
+}
+
+void vp10_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vpx_fdct16x16_c(input, output, stride);
+ } else {
+ tran_low_t out[256];
+ int i, j;
+ tran_low_t temp_in[16], temp_out[16];
+ const transform_2d ht = FHT_16[tx_type];
+
+#if CONFIG_EXT_TX
+ int16_t flipped_input[16 * 16];
+ maybe_flip_input(&input, &stride, 16, 16, flipped_input, tx_type);
+#endif
+
+ // Columns
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) temp_in[j] = input[j * stride + i] * 4;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ out[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
+ }
+
+ // Rows
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j) temp_in[j] = out[j + i * 16];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j];
+ }
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ vp10_fht4x4_c(input, output, stride, tx_type);
+}
+
+#if CONFIG_EXT_TX
+void vp10_highbd_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ vp10_fht4x8_c(input, output, stride, tx_type);
+}
+
+void vp10_highbd_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ vp10_fht8x4_c(input, output, stride, tx_type);
+}
+
+void vp10_highbd_fht8x16_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ vp10_fht8x16_c(input, output, stride, tx_type);
+}
+
+void vp10_highbd_fht16x8_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ vp10_fht16x8_c(input, output, stride, tx_type);
+}
+
+void vp10_highbd_fht16x32_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ vp10_fht16x32_c(input, output, stride, tx_type);
+}
+
+void vp10_highbd_fht32x16_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ vp10_fht32x16_c(input, output, stride, tx_type);
+}
+#endif // CONFIG_EXT_TX
+
+void vp10_highbd_fht8x8_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ vp10_fht8x8_c(input, output, stride, tx_type);
+}
+
+void vp10_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output,
+ int stride) {
+ vp10_fwht4x4_c(input, output, stride);
+}
+
+void vp10_highbd_fht16x16_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ vp10_fht16x16_c(input, output, stride, tx_type);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_EXT_TX
+void vp10_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vpx_fdct32x32_c(input, output, stride);
+ } else {
+ tran_low_t out[1024];
+ int i, j;
+ tran_low_t temp_in[32], temp_out[32];
+ const transform_2d ht = FHT_32[tx_type];
+
+ int16_t flipped_input[32 * 32];
+ maybe_flip_input(&input, &stride, 32, 32, flipped_input, tx_type);
+
+ // Columns
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 32; ++j)
+ out[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
+ }
+
+ // Rows
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) temp_in[j] = out[j + i * 32];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 32; ++j)
+ output[j + i * 32] =
+ (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
+ }
+ }
+}
+
+// Forward identity transform.
+void vp10_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride,
+ int bs, int tx_type) {
+ int r, c;
+ const int shift = bs < 32 ? 3 : 2;
+ if (tx_type == IDTX) {
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] << shift;
+ src_diff += stride;
+ coeff += bs;
+ }
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_fht32x32_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ vp10_fht32x32_c(input, output, stride, tx_type);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_TX
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
new file mode 100644
index 0000000..b2635b4
--- /dev/null
+++ b/av1/encoder/encodeframe.c
@@ -0,0 +1,6109 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_config.h"
+
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/vpx_timer.h"
+#include "aom_ports/system_state.h"
+
+#include "av1/common/common.h"
+#include "av1/common/entropy.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/idct.h"
+#include "av1/common/mvref_common.h"
+#include "av1/common/pred_common.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/reconintra.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/seg_common.h"
+#include "av1/common/tile_common.h"
+
+#include "av1/encoder/aq_complexity.h"
+#include "av1/encoder/aq_cyclicrefresh.h"
+#include "av1/encoder/aq_variance.h"
+#if CONFIG_SUPERTX
+#include "av1/encoder/cost.h"
+#endif
+#if CONFIG_GLOBAL_MOTION
+#include "av1/encoder/global_motion.h"
+#endif
+#include "av1/encoder/encodeframe.h"
+#include "av1/encoder/encodemb.h"
+#include "av1/encoder/encodemv.h"
+#include "av1/encoder/ethread.h"
+#include "av1/encoder/extend.h"
+#include "av1/encoder/rd.h"
+#include "av1/encoder/rdopt.h"
+#include "av1/encoder/segmentation.h"
+#include "av1/encoder/tokenize.h"
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#define IF_HBD(...) __VA_ARGS__
+#else
+#define IF_HBD(...)
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static void encode_superblock(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+ int output_enabled, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
+
+#if CONFIG_SUPERTX
+static int check_intra_b(PICK_MODE_CONTEXT *ctx);
+
+static int check_intra_sb(VP10_COMP *cpi, const TileInfo *const tile,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ PC_TREE *pc_tree);
+static void predict_superblock(VP10_COMP *cpi, ThreadData *td,
+#if CONFIG_EXT_INTER
+ int mi_row_ori, int mi_col_ori,
+#endif // CONFIG_EXT_INTER
+ int mi_row_pred, int mi_col_pred,
+ BLOCK_SIZE bsize_pred, int b_sub8x8, int block);
+static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size,
+ PC_TREE *pc_tree);
+static void predict_sb_complex(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile, int mi_row,
+ int mi_col, int mi_row_ori, int mi_col_ori,
+ int output_enabled, BLOCK_SIZE bsize,
+ BLOCK_SIZE top_bsize, uint8_t *dst_buf[3],
+ int dst_stride[3], PC_TREE *pc_tree);
+static void update_state_sb_supertx(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile, int mi_row,
+ int mi_col, BLOCK_SIZE bsize,
+ int output_enabled, PC_TREE *pc_tree);
+static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int *tmp_rate, int64_t *tmp_dist,
+ TX_TYPE *best_tx, PC_TREE *pc_tree);
+#endif // CONFIG_SUPERTX
+
+// This is used as a reference when computing the source variance for the
+// purposes of activity masking.
+// Eventually this should be replaced by custom no-reference routines,
+// which will be faster.
+static const uint8_t VP10_VAR_OFFS[MAX_SB_SIZE] = {
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+#if CONFIG_EXT_PARTITION
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
+#endif // CONFIG_EXT_PARTITION
+};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static const uint16_t VP10_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+#if CONFIG_EXT_PARTITION
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
+#endif // CONFIG_EXT_PARTITION
+};
+
+static const uint16_t VP10_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+#if CONFIG_EXT_PARTITION
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
+ 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
+#endif // CONFIG_EXT_PARTITION
+};
+
+static const uint16_t VP10_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16,
+#if CONFIG_EXT_PARTITION
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
+ 128 * 16
+#endif // CONFIG_EXT_PARTITION
+};
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+unsigned int vp10_get_sby_perpixel_variance(VP10_COMP *cpi,
+ const struct buf_2d *ref,
+ BLOCK_SIZE bs) {
+ unsigned int sse;
+ const unsigned int var =
+ cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP10_VAR_OFFS, 0, &sse);
+ return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+unsigned int vp10_high_get_sby_perpixel_variance(VP10_COMP *cpi,
+ const struct buf_2d *ref,
+ BLOCK_SIZE bs, int bd) {
+ unsigned int var, sse;
+ switch (bd) {
+ case 10:
+ var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
+ CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10), 0,
+ &sse);
+ break;
+ case 12:
+ var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
+ CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12), 0,
+ &sse);
+ break;
+ case 8:
+ default:
+ var =
+ cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
+ CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8), 0, &sse);
+ break;
+ }
+ return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static unsigned int get_sby_perpixel_diff_variance(VP10_COMP *cpi,
+ const struct buf_2d *ref,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bs) {
+ unsigned int sse, var;
+ uint8_t *last_y;
+ const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
+
+ assert(last != NULL);
+ last_y =
+ &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
+ var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
+ return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
+}
+
+static BLOCK_SIZE get_rd_var_based_fixed_partition(VP10_COMP *cpi,
+ MACROBLOCK *x, int mi_row,
+ int mi_col) {
+ unsigned int var = get_sby_perpixel_diff_variance(
+ cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
+ if (var < 8)
+ return BLOCK_64X64;
+ else if (var < 128)
+ return BLOCK_32X32;
+ else if (var < 2048)
+ return BLOCK_16X16;
+ else
+ return BLOCK_8X8;
+}
+
+// Lighter version of set_offsets that only sets the mode info
+// pointers.
+static void set_mode_info_offsets(VP10_COMP *const cpi, MACROBLOCK *const x,
+ MACROBLOCKD *const xd, int mi_row,
+ int mi_col) {
+ VP10_COMMON *const cm = &cpi->common;
+ const int idx_str = xd->mi_stride * mi_row + mi_col;
+ xd->mi = cm->mi_grid_visible + idx_str;
+ xd->mi[0] = cm->mi + idx_str;
+ x->mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
+}
+
+static void set_offsets_without_segment_id(VP10_COMP *cpi,
+ const TileInfo *const tile,
+ MACROBLOCK *const x, int mi_row,
+ int mi_col, BLOCK_SIZE bsize) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+
+ set_skip_context(xd, mi_row, mi_col);
+
+ set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+ xd->max_tx_size = max_txsize_lookup[bsize];
+#endif
+
+ // Set up destination pointers.
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
+
+ // Set up limit values for MV components.
+ // Mv beyond the range do not produce new/different prediction block.
+ x->mv_row_min = -(((mi_row + mi_height) * MI_SIZE) + VPX_INTERP_EXTEND);
+ x->mv_col_min = -(((mi_col + mi_width) * MI_SIZE) + VPX_INTERP_EXTEND);
+ x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VPX_INTERP_EXTEND;
+ x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VPX_INTERP_EXTEND;
+
+ // Set up distance of MB to edge of frame in 1/8th pel units.
+ assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
+ set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
+ cm->mi_cols);
+
+ // Set up source buffers.
+ vp10_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+
+ // R/D setup.
+ x->rddiv = cpi->rd.RDDIV;
+ x->rdmult = cpi->rd.RDMULT;
+
+ // required by vp10_append_sub8x8_mvs_for_idx() and vp10_find_best_ref_mvs()
+ xd->tile = *tile;
+}
+
+static void set_offsets(VP10_COMP *cpi, const TileInfo *const tile,
+ MACROBLOCK *const x, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi;
+ const struct segmentation *const seg = &cm->seg;
+
+ set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
+
+ mbmi = &xd->mi[0]->mbmi;
+
+ // Setup segment ID.
+ if (seg->enabled) {
+ if (!cpi->vaq_refresh) {
+ const uint8_t *const map =
+ seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
+ mbmi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
+ }
+ vp10_init_plane_quantizers(cpi, x, mbmi->segment_id);
+
+ x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
+ } else {
+ mbmi->segment_id = 0;
+ x->encode_breakout = cpi->encode_breakout;
+ }
+
+#if CONFIG_SUPERTX
+ mbmi->segment_id_supertx = MAX_SEGMENTS;
+#endif // CONFIG_SUPERTX
+}
+
+#if CONFIG_SUPERTX
+static void set_offsets_supertx(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile, int mi_row,
+ int mi_col, BLOCK_SIZE bsize) {
+ MACROBLOCK *const x = &td->mb;
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+
+ set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+
+ // Set up distance of MB to edge of frame in 1/8th pel units.
+ assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
+ set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
+ cm->mi_cols);
+}
+
+static void set_offsets_extend(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile, int mi_row_pred,
+ int mi_col_pred, int mi_row_ori, int mi_col_ori,
+ BLOCK_SIZE bsize_pred) {
+ // Used in supertx
+ // (mi_row_ori, mi_col_ori, bsize_ori): region for mv
+ // (mi_row_pred, mi_col_pred, bsize_pred): region to predict
+ MACROBLOCK *const x = &td->mb;
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize_pred];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize_pred];
+
+ set_mode_info_offsets(cpi, x, xd, mi_row_ori, mi_col_ori);
+
+ // Set up limit values for MV components.
+ // Mv beyond the range do not produce new/different prediction block.
+ x->mv_row_min = -(((mi_row_pred + mi_height) * MI_SIZE) + VPX_INTERP_EXTEND);
+ x->mv_col_min = -(((mi_col_pred + mi_width) * MI_SIZE) + VPX_INTERP_EXTEND);
+ x->mv_row_max = (cm->mi_rows - mi_row_pred) * MI_SIZE + VPX_INTERP_EXTEND;
+ x->mv_col_max = (cm->mi_cols - mi_col_pred) * MI_SIZE + VPX_INTERP_EXTEND;
+
+ // Set up distance of MB to edge of frame in 1/8th pel units.
+ assert(!(mi_col_pred & (mi_width - 1)) && !(mi_row_pred & (mi_height - 1)));
+ set_mi_row_col(xd, tile, mi_row_pred, mi_height, mi_col_pred, mi_width,
+ cm->mi_rows, cm->mi_cols);
+ xd->up_available = (mi_row_ori > tile->mi_row_start);
+ xd->left_available = (mi_col_ori > tile->mi_col_start);
+
+ // R/D setup.
+ x->rddiv = cpi->rd.RDDIV;
+ x->rdmult = cpi->rd.RDMULT;
+}
+
+static void set_segment_id_supertx(const VP10_COMP *const cpi,
+ MACROBLOCK *const x, const int mi_row,
+ const int mi_col, const BLOCK_SIZE bsize) {
+ const VP10_COMMON *cm = &cpi->common;
+ const struct segmentation *seg = &cm->seg;
+ const int miw =
+ VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col);
+ const int mih =
+ VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row);
+ const int mi_offset = mi_row * cm->mi_stride + mi_col;
+ MODE_INFO **const mip = cm->mi_grid_visible + mi_offset;
+ int r, c;
+ int seg_id_supertx = MAX_SEGMENTS;
+
+ if (!seg->enabled) {
+ seg_id_supertx = 0;
+ x->encode_breakout = cpi->encode_breakout;
+ } else {
+ // Find the minimum segment_id
+ for (r = 0; r < mih; r++)
+ for (c = 0; c < miw; c++)
+ seg_id_supertx =
+ VPXMIN(mip[r * cm->mi_stride + c]->mbmi.segment_id, seg_id_supertx);
+ assert(0 <= seg_id_supertx && seg_id_supertx < MAX_SEGMENTS);
+
+ // Initialize plane quantisers
+ vp10_init_plane_quantizers(cpi, x, seg_id_supertx);
+ x->encode_breakout = cpi->segment_encode_breakout[seg_id_supertx];
+ }
+
+ // Assign the the segment_id back to segment_id_supertx
+ for (r = 0; r < mih; r++)
+ for (c = 0; c < miw; c++)
+ mip[r * cm->mi_stride + c]->mbmi.segment_id_supertx = seg_id_supertx;
+}
+#endif // CONFIG_SUPERTX
+
+static void set_block_size(VP10_COMP *const cpi, MACROBLOCK *const x,
+ MACROBLOCKD *const xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
+ set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+ xd->mi[0]->mbmi.sb_type = bsize;
+ }
+}
+
+static void set_vt_partitioning(VP10_COMP *cpi, MACROBLOCK *const x,
+ MACROBLOCKD *const xd, VAR_TREE *vt, int mi_row,
+ int mi_col, const int64_t *const threshold,
+ const BLOCK_SIZE *const bsize_min) {
+ VP10_COMMON *const cm = &cpi->common;
+ const int hbw = num_8x8_blocks_wide_lookup[vt->bsize] / 2;
+ const int hbh = num_8x8_blocks_high_lookup[vt->bsize] / 2;
+ const int has_cols = mi_col + hbw < cm->mi_cols;
+ const int has_rows = mi_row + hbh < cm->mi_rows;
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+ assert(vt->bsize >= BLOCK_8X8);
+
+ assert(hbh == hbw);
+
+ if (vt->bsize == BLOCK_8X8 && cm->frame_type != KEY_FRAME) {
+ set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_8X8);
+ return;
+ }
+
+ if (vt->force_split || (!has_cols && !has_rows)) goto split;
+
+ // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
+ // variance is below threshold, otherwise split will be selected.
+ // No check for vert/horiz split as too few samples for variance.
+ if (vt->bsize == bsize_min[0]) {
+ if (has_cols && has_rows && vt->variances.none.variance < threshold[0]) {
+ set_block_size(cpi, x, xd, mi_row, mi_col, vt->bsize);
+ return;
+ } else {
+ BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_SPLIT);
+ set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
+ if (vt->bsize > BLOCK_8X8) {
+ set_block_size(cpi, x, xd, mi_row, mi_col + hbw, subsize);
+ set_block_size(cpi, x, xd, mi_row + hbh, mi_col, subsize);
+ set_block_size(cpi, x, xd, mi_row + hbh, mi_col + hbw, subsize);
+ }
+ return;
+ }
+ } else if (vt->bsize > bsize_min[0]) {
+ // For key frame: take split for bsize above 32X32 or very high variance.
+ if (cm->frame_type == KEY_FRAME &&
+ (vt->bsize > BLOCK_32X32 ||
+ vt->variances.none.variance > (threshold[0] << 4))) {
+ goto split;
+ }
+ // If variance is low, take the bsize (no split).
+ if (has_cols && has_rows && vt->variances.none.variance < threshold[0]) {
+ set_block_size(cpi, x, xd, mi_row, mi_col, vt->bsize);
+ return;
+ }
+
+ // Check vertical split.
+ if (has_rows) {
+ BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_VERT);
+ if (vt->variances.vert[0].variance < threshold[0] &&
+ vt->variances.vert[1].variance < threshold[0] &&
+ get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
+ set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
+ set_block_size(cpi, x, xd, mi_row, mi_col + hbw, subsize);
+ return;
+ }
+ }
+ // Check horizontal split.
+ if (has_cols) {
+ BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_HORZ);
+ if (vt->variances.horz[0].variance < threshold[0] &&
+ vt->variances.horz[1].variance < threshold[0] &&
+ get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
+ set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
+ set_block_size(cpi, x, xd, mi_row + hbh, mi_col, subsize);
+ return;
+ }
+ }
+ }
+
+split : {
+ set_vt_partitioning(cpi, x, xd, vt->split[0], mi_row, mi_col, threshold + 1,
+ bsize_min + 1);
+ set_vt_partitioning(cpi, x, xd, vt->split[1], mi_row, mi_col + hbw,
+ threshold + 1, bsize_min + 1);
+ set_vt_partitioning(cpi, x, xd, vt->split[2], mi_row + hbh, mi_col,
+ threshold + 1, bsize_min + 1);
+ set_vt_partitioning(cpi, x, xd, vt->split[3], mi_row + hbh, mi_col + hbw,
+ threshold + 1, bsize_min + 1);
+ return;
+}
+}
+
+// Set the variance split thresholds for following the block sizes:
+// 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
+// 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
+// currently only used on key frame.
+static void set_vbp_thresholds(VP10_COMP *cpi, int64_t thresholds[], int q) {
+ VP10_COMMON *const cm = &cpi->common;
+ const int is_key_frame = (cm->frame_type == KEY_FRAME);
+ const int threshold_multiplier = is_key_frame ? 20 : 1;
+ const int64_t threshold_base =
+ (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
+ if (is_key_frame) {
+ thresholds[1] = threshold_base;
+ thresholds[2] = threshold_base >> 2;
+ thresholds[3] = threshold_base >> 2;
+ thresholds[4] = threshold_base << 2;
+ } else {
+ thresholds[2] = threshold_base;
+ if (cm->width <= 352 && cm->height <= 288) {
+ thresholds[1] = threshold_base >> 2;
+ thresholds[3] = threshold_base << 3;
+ } else {
+ thresholds[1] = threshold_base;
+ thresholds[2] = (5 * threshold_base) >> 2;
+ if (cm->width >= 1920 && cm->height >= 1080)
+ thresholds[2] = (7 * threshold_base) >> 2;
+ thresholds[3] = threshold_base << cpi->oxcf.speed;
+ }
+ }
+ thresholds[0] = INT64_MIN;
+}
+
+void vp10_set_variance_partition_thresholds(VP10_COMP *cpi, int q) {
+ VP10_COMMON *const cm = &cpi->common;
+ SPEED_FEATURES *const sf = &cpi->sf;
+ const int is_key_frame = (cm->frame_type == KEY_FRAME);
+ if (sf->partition_search_type != VAR_BASED_PARTITION &&
+ sf->partition_search_type != REFERENCE_PARTITION) {
+ return;
+ } else {
+ set_vbp_thresholds(cpi, cpi->vbp_thresholds, q);
+ // The thresholds below are not changed locally.
+ if (is_key_frame) {
+ cpi->vbp_threshold_sad = 0;
+ cpi->vbp_bsize_min = BLOCK_8X8;
+ } else {
+ if (cm->width <= 352 && cm->height <= 288)
+ cpi->vbp_threshold_sad = 100;
+ else
+ cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000
+ ? (cpi->y_dequant[q][1] << 1)
+ : 1000;
+ cpi->vbp_bsize_min = BLOCK_16X16;
+ }
+ cpi->vbp_threshold_minmax = 15 + (q >> 3);
+ }
+}
+
+// Compute the minmax over the 8x8 subblocks.
+static int compute_minmax_8x8(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+#if CONFIG_VP9_HIGHBITDEPTH
+ int highbd,
+#endif
+ int pixels_wide, int pixels_high) {
+ int k;
+ int minmax_max = 0;
+ int minmax_min = 255;
+ // Loop over the 4 8x8 subblocks.
+ for (k = 0; k < 4; k++) {
+ const int x8_idx = ((k & 1) << 3);
+ const int y8_idx = ((k >> 1) << 3);
+ int min = 0;
+ int max = 0;
+ if (x8_idx < pixels_wide && y8_idx < pixels_high) {
+ const int src_offset = y8_idx * src_stride + x8_idx;
+ const int ref_offset = y8_idx * ref_stride + x8_idx;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (highbd) {
+ vpx_highbd_minmax_8x8(src + src_offset, src_stride, ref + ref_offset,
+ ref_stride, &min, &max);
+ } else {
+ vpx_minmax_8x8(src + src_offset, src_stride, ref + ref_offset,
+ ref_stride, &min, &max);
+ }
+#else
+ vpx_minmax_8x8(src + src_offset, src_stride, ref + ref_offset, ref_stride,
+ &min, &max);
+#endif
+ if ((max - min) > minmax_max) minmax_max = (max - min);
+ if ((max - min) < minmax_min) minmax_min = (max - min);
+ }
+ }
+ return (minmax_max - minmax_min);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE int avg_4x4(const uint8_t *const src, const int stride,
+ const int highbd) {
+ if (highbd) {
+ return vpx_highbd_avg_4x4(src, stride);
+ } else {
+ return vpx_avg_4x4(src, stride);
+ }
+}
+#else
+static INLINE int avg_4x4(const uint8_t *const src, const int stride) {
+ return vpx_avg_4x4(src, stride);
+}
+#endif
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE int avg_8x8(const uint8_t *const src, const int stride,
+ const int highbd) {
+ if (highbd) {
+ return vpx_highbd_avg_8x8(src, stride);
+ } else {
+ return vpx_avg_8x8(src, stride);
+ }
+}
+#else
+static INLINE int avg_8x8(const uint8_t *const src, const int stride) {
+ return vpx_avg_8x8(src, stride);
+}
+#endif
+
+static void init_variance_tree(VAR_TREE *const vt,
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int highbd,
+#endif
+ BLOCK_SIZE bsize, BLOCK_SIZE leaf_size,
+ const int width, const int height,
+ const uint8_t *const src, const int src_stride,
+ const uint8_t *const ref, const int ref_stride) {
+ assert(bsize >= leaf_size);
+
+ vt->bsize = bsize;
+
+ vt->force_split = 0;
+
+ vt->src = src;
+ vt->src_stride = src_stride;
+ vt->ref = ref;
+ vt->ref_stride = ref_stride;
+
+ vt->width = width;
+ vt->height = height;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ vt->highbd = highbd;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (bsize > leaf_size) {
+ const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
+ const int px = num_4x4_blocks_wide_lookup[subsize] * 4;
+
+ init_variance_tree(vt->split[0],
+#if CONFIG_VP9_HIGHBITDEPTH
+ highbd,
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ subsize, leaf_size, VPXMIN(px, width),
+ VPXMIN(px, height), src, src_stride, ref, ref_stride);
+ init_variance_tree(vt->split[1],
+#if CONFIG_VP9_HIGHBITDEPTH
+ highbd,
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ subsize, leaf_size, width - px, VPXMIN(px, height),
+ src + px, src_stride, ref + px, ref_stride);
+ init_variance_tree(vt->split[2],
+#if CONFIG_VP9_HIGHBITDEPTH
+ highbd,
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ subsize, leaf_size, VPXMIN(px, width), height - px,
+ src + px * src_stride, src_stride, ref + px * ref_stride,
+ ref_stride);
+ init_variance_tree(vt->split[3],
+#if CONFIG_VP9_HIGHBITDEPTH
+ highbd,
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ subsize, leaf_size, width - px, height - px,
+ src + px * src_stride + px, src_stride,
+ ref + px * ref_stride + px, ref_stride);
+ }
+}
+
+// Fill the variance tree based on averaging pixel values (sub-sampling), at
+// the leaf node size.
+static void fill_variance_tree(VAR_TREE *const vt, const BLOCK_SIZE leaf_size) {
+ if (vt->bsize > leaf_size) {
+ fill_variance_tree(vt->split[0], leaf_size);
+ fill_variance_tree(vt->split[1], leaf_size);
+ fill_variance_tree(vt->split[2], leaf_size);
+ fill_variance_tree(vt->split[3], leaf_size);
+ fill_variance_node(vt);
+ } else if (vt->width <= 0 || vt->height <= 0) {
+ fill_variance(0, 0, 0, &vt->variances.none);
+ } else {
+ unsigned int sse = 0;
+ int sum = 0;
+ int src_avg;
+ int ref_avg;
+ assert(leaf_size == BLOCK_4X4 || leaf_size == BLOCK_8X8);
+ if (leaf_size == BLOCK_4X4) {
+ src_avg = avg_4x4(vt->src, vt->src_stride IF_HBD(, vt->highbd));
+ ref_avg = avg_4x4(vt->ref, vt->ref_stride IF_HBD(, vt->highbd));
+ } else {
+ src_avg = avg_8x8(vt->src, vt->src_stride IF_HBD(, vt->highbd));
+ ref_avg = avg_8x8(vt->ref, vt->ref_stride IF_HBD(, vt->highbd));
+ }
+ sum = src_avg - ref_avg;
+ sse = sum * sum;
+ fill_variance(sse, sum, 0, &vt->variances.none);
+ }
+}
+
+static void refine_variance_tree(VAR_TREE *const vt, const int64_t threshold) {
+ if (vt->bsize >= BLOCK_8X8) {
+ if (vt->bsize == BLOCK_16X16) {
+ if (vt->variances.none.variance <= threshold)
+ return;
+ else
+ vt->force_split = 0;
+ }
+
+ refine_variance_tree(vt->split[0], threshold);
+ refine_variance_tree(vt->split[1], threshold);
+ refine_variance_tree(vt->split[2], threshold);
+ refine_variance_tree(vt->split[3], threshold);
+
+ if (vt->bsize <= BLOCK_16X16) fill_variance_node(vt);
+ } else if (vt->width <= 0 || vt->height <= 0) {
+ fill_variance(0, 0, 0, &vt->variances.none);
+ } else {
+ const int src_avg = avg_4x4(vt->src, vt->src_stride IF_HBD(, vt->highbd));
+ const int ref_avg = avg_4x4(vt->ref, vt->ref_stride IF_HBD(, vt->highbd));
+ const int sum = src_avg - ref_avg;
+ const unsigned int sse = sum * sum;
+ assert(vt->bsize == BLOCK_4X4);
+ fill_variance(sse, sum, 0, &vt->variances.none);
+ }
+}
+
+static int check_split_key_frame(VAR_TREE *const vt, const int64_t threshold) {
+ if (vt->bsize == BLOCK_32X32) {
+ vt->force_split = vt->variances.none.variance > threshold;
+ } else {
+ vt->force_split |= check_split_key_frame(vt->split[0], threshold);
+ vt->force_split |= check_split_key_frame(vt->split[1], threshold);
+ vt->force_split |= check_split_key_frame(vt->split[2], threshold);
+ vt->force_split |= check_split_key_frame(vt->split[3], threshold);
+ }
+ return vt->force_split;
+}
+
+static int check_split(VP10_COMP *const cpi, VAR_TREE *const vt,
+ const int segment_id, const int64_t *const thresholds) {
+ if (vt->bsize == BLOCK_16X16) {
+ vt->force_split = vt->variances.none.variance > thresholds[0];
+ if (!vt->force_split && vt->variances.none.variance > thresholds[-1] &&
+ !cyclic_refresh_segment_id_boosted(segment_id)) {
+ // We have some nominal amount of 16x16 variance (based on average),
+ // compute the minmax over the 8x8 sub-blocks, and if above threshold,
+ // force split to 8x8 block for this 16x16 block.
+ int minmax =
+ compute_minmax_8x8(vt->src, vt->src_stride, vt->ref, vt->ref_stride,
+#if CONFIG_VP9_HIGHBITDEPTH
+ vt->highbd,
+#endif
+ vt->width, vt->height);
+ vt->force_split = minmax > cpi->vbp_threshold_minmax;
+ }
+ } else {
+ vt->force_split |=
+ check_split(cpi, vt->split[0], segment_id, thresholds + 1);
+ vt->force_split |=
+ check_split(cpi, vt->split[1], segment_id, thresholds + 1);
+ vt->force_split |=
+ check_split(cpi, vt->split[2], segment_id, thresholds + 1);
+ vt->force_split |=
+ check_split(cpi, vt->split[3], segment_id, thresholds + 1);
+
+ if (vt->bsize == BLOCK_32X32 && !vt->force_split) {
+ vt->force_split = vt->variances.none.variance > thresholds[0];
+ }
+ }
+
+ return vt->force_split;
+}
+
+// This function chooses partitioning based on the variance between source and
+// reconstructed last (or golden), where variance is computed for down-sampled
+// inputs.
+static void choose_partitioning(VP10_COMP *const cpi, ThreadData *const td,
+ const TileInfo *const tile, MACROBLOCK *const x,
+ const int mi_row, const int mi_col) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ VAR_TREE *const vt = td->var_root[cm->mib_size_log2 - MIN_MIB_SIZE_LOG2];
+ int i;
+ const uint8_t *src;
+ const uint8_t *ref;
+ int src_stride;
+ int ref_stride;
+ int pixels_wide = 8 * num_8x8_blocks_wide_lookup[cm->sb_size];
+ int pixels_high = 8 * num_8x8_blocks_high_lookup[cm->sb_size];
+ int64_t thresholds[5] = {
+ cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], cpi->vbp_thresholds[2],
+ cpi->vbp_thresholds[3], cpi->vbp_thresholds[4],
+ };
+ BLOCK_SIZE bsize_min[5] = { BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
+ cpi->vbp_bsize_min, BLOCK_8X8 };
+ const int start_level = cm->sb_size == BLOCK_64X64 ? 1 : 0;
+ const int64_t *const thre = thresholds + start_level;
+ const BLOCK_SIZE *const bmin = bsize_min + start_level;
+
+ const int is_key_frame = (cm->frame_type == KEY_FRAME);
+ const int low_res = (cm->width <= 352 && cm->height <= 288);
+
+ int segment_id = CR_SEGMENT_ID_BASE;
+
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
+ const uint8_t *const map =
+ cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
+ segment_id = get_segment_id(cm, map, cm->sb_size, mi_row, mi_col);
+
+ if (cyclic_refresh_segment_id_boosted(segment_id)) {
+ int q = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+ set_vbp_thresholds(cpi, thresholds, q);
+ }
+ }
+
+ set_offsets(cpi, tile, x, mi_row, mi_col, cm->sb_size);
+
+ if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
+ if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
+
+ src = x->plane[0].src.buf;
+ src_stride = x->plane[0].src.stride;
+
+ if (!is_key_frame) {
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ unsigned int uv_sad;
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
+ const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ unsigned int y_sad, y_sad_g;
+
+ const int hbs = cm->mib_size / 2;
+ const int split_vert = mi_col + hbs >= cm->mi_cols;
+ const int split_horz = mi_row + hbs >= cm->mi_rows;
+ BLOCK_SIZE bsize;
+
+ if (split_vert && split_horz)
+ bsize = get_subsize(cm->sb_size, PARTITION_SPLIT);
+ else if (split_vert)
+ bsize = get_subsize(cm->sb_size, PARTITION_VERT);
+ else if (split_horz)
+ bsize = get_subsize(cm->sb_size, PARTITION_HORZ);
+ else
+ bsize = cm->sb_size;
+
+ assert(yv12 != NULL);
+
+ if (yv12_g && yv12_g != yv12) {
+ vp10_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
+ &cm->frame_refs[GOLDEN_FRAME - 1].sf);
+ y_sad_g = cpi->fn_ptr[bsize].sdf(
+ x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
+ xd->plane[0].pre[0].stride);
+ } else {
+ y_sad_g = UINT_MAX;
+ }
+
+ vp10_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ &cm->frame_refs[LAST_FRAME - 1].sf);
+ mbmi->ref_frame[0] = LAST_FRAME;
+ mbmi->ref_frame[1] = NONE;
+ mbmi->sb_type = cm->sb_size;
+ mbmi->mv[0].as_int = 0;
+#if CONFIG_DUAL_FILTER
+ for (i = 0; i < 4; ++i) mbmi->interp_filter[i] = BILINEAR;
+#else
+ mbmi->interp_filter = BILINEAR;
+#endif
+
+ y_sad = vp10_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
+
+ if (y_sad_g < y_sad) {
+ vp10_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
+ &cm->frame_refs[GOLDEN_FRAME - 1].sf);
+ mbmi->ref_frame[0] = GOLDEN_FRAME;
+ mbmi->mv[0].as_int = 0;
+ y_sad = y_sad_g;
+ } else {
+ x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv;
+ }
+
+ vp10_build_inter_predictors_sb(xd, mi_row, mi_col, cm->sb_size);
+
+ for (i = 1; i < MAX_MB_PLANE; ++i) {
+ struct macroblock_plane *p = &x->plane[i];
+ struct macroblockd_plane *pd = &xd->plane[i];
+ const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
+
+ if (bs == BLOCK_INVALID)
+ uv_sad = UINT_MAX;
+ else
+ uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf,
+ pd->dst.stride);
+
+ x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
+ }
+
+ ref = xd->plane[0].dst.buf;
+ ref_stride = xd->plane[0].dst.stride;
+
+ // If the y_sad is very small, take the largest partition and exit.
+ // Don't check on boosted segment for now, as largest is suppressed there.
+ if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) {
+ if (!split_vert && !split_horz) {
+ set_block_size(cpi, x, xd, mi_row, mi_col, cm->sb_size);
+ return;
+ }
+ }
+ } else {
+ ref = VP10_VAR_OFFS;
+ ref_stride = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ switch (xd->bd) {
+ case 10: ref = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_10); break;
+ case 12: ref = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_12); break;
+ case 8:
+ default: ref = CONVERT_TO_BYTEPTR(VP10_HIGH_VAR_OFFS_8); break;
+ }
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+
+ init_variance_tree(
+ vt,
+#if CONFIG_VP9_HIGHBITDEPTH
+ xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH,
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ cm->sb_size, (is_key_frame || low_res) ? BLOCK_4X4 : BLOCK_8X8,
+ pixels_wide, pixels_high, src, src_stride, ref, ref_stride);
+
+ // Fill in the entire tree of variances and compute splits.
+ if (is_key_frame) {
+ fill_variance_tree(vt, BLOCK_4X4);
+ check_split_key_frame(vt, thre[1]);
+ } else {
+ fill_variance_tree(vt, BLOCK_8X8);
+ check_split(cpi, vt, segment_id, thre);
+ if (low_res) {
+ refine_variance_tree(vt, thre[1] << 1);
+ }
+ }
+
+ vt->force_split |= mi_col + cm->mib_size > cm->mi_cols ||
+ mi_row + cm->mib_size > cm->mi_rows;
+
+ // Now go through the entire structure, splitting every block size until
+ // we get to one that's got a variance lower than our threshold.
+ set_vt_partitioning(cpi, x, xd, vt, mi_row, mi_col, thre, bmin);
+}
+
+#if CONFIG_DUAL_FILTER
+static void reset_intmv_filter_type(VP10_COMMON *cm, MACROBLOCKD *xd,
+ MB_MODE_INFO *mbmi) {
+ int dir;
+ for (dir = 0; dir < 2; ++dir) {
+ if (!has_subpel_mv_component(xd->mi[0], xd, dir) &&
+ (mbmi->ref_frame[1] == NONE ||
+ !has_subpel_mv_component(xd->mi[0], xd, dir + 2)))
+ mbmi->interp_filter[dir] = (cm->interp_filter == SWITCHABLE)
+ ? EIGHTTAP_REGULAR
+ : cm->interp_filter;
+ mbmi->interp_filter[dir + 2] = mbmi->interp_filter[dir];
+ }
+}
+
+static void update_filter_type_count(FRAME_COUNTS *counts,
+ const MACROBLOCKD *xd,
+ const MB_MODE_INFO *mbmi) {
+ int dir;
+ for (dir = 0; dir < 2; ++dir) {
+ if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
+ (mbmi->ref_frame[1] > INTRA_FRAME &&
+ has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
+ const int ctx = vp10_get_pred_context_switchable_interp(xd, dir);
+ ++counts->switchable_interp[ctx][mbmi->interp_filter[dir]];
+ }
+ }
+}
+#endif
+#if CONFIG_GLOBAL_MOTION
+static void update_global_motion_used(PREDICTION_MODE mode,
+ const MB_MODE_INFO *mbmi,
+ VP10_COMP *cpi) {
+ if (mode == ZEROMV) {
+ ++cpi->global_motion_used[mbmi->ref_frame[0]];
+ if (has_second_ref(mbmi)) ++cpi->global_motion_used[mbmi->ref_frame[1]];
+ }
+}
+#endif // CONFIG_GLOBAL_MOTION
+
+static void update_state(VP10_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ int output_enabled) {
+ int i, x_idx, y;
+ VP10_COMMON *const cm = &cpi->common;
+ RD_COUNTS *const rdc = &td->rd_counts;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = xd->plane;
+ MODE_INFO *mi = &ctx->mic;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *mi_addr = xd->mi[0];
+ const struct segmentation *const seg = &cm->seg;
+ const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
+ const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
+ const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
+ const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
+ MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
+ int w, h;
+
+ const int mis = cm->mi_stride;
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ int max_plane;
+
+#if CONFIG_REF_MV
+ int8_t rf_type;
+#endif
+
+#if !CONFIG_SUPERTX
+ assert(mi->mbmi.sb_type == bsize);
+#endif
+
+ *mi_addr = *mi;
+ *x->mbmi_ext = ctx->mbmi_ext;
+
+#if CONFIG_DUAL_FILTER
+ reset_intmv_filter_type(cm, xd, mbmi);
+#endif
+
+#if CONFIG_REF_MV
+ rf_type = vp10_ref_frame_type(mbmi->ref_frame);
+ if (x->mbmi_ext->ref_mv_count[rf_type] > 1 && mbmi->sb_type >= BLOCK_8X8 &&
+ mbmi->mode == NEWMV) {
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+ int_mv this_mv =
+ (i == 0)
+ ? x->mbmi_ext->ref_mv_stack[rf_type][mbmi->ref_mv_idx].this_mv
+ : x->mbmi_ext->ref_mv_stack[rf_type][mbmi->ref_mv_idx].comp_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+ x->mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0] = this_mv;
+ mbmi->pred_mv[i] = this_mv;
+ }
+ }
+#endif
+
+ // If segmentation in use
+ if (seg->enabled) {
+ // For in frame complexity AQ copy the segment id from the segment map.
+ if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
+ const uint8_t *const map =
+ seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
+ mi_addr->mbmi.segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
+ }
+ // Else for cyclic refresh mode update the segment map, set the segment id
+ // and then update the quantizer.
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+ vp10_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row, mi_col,
+ bsize, ctx->rate, ctx->dist, x->skip);
+ }
+ }
+
+ max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
+ for (i = 0; i < max_plane; ++i) {
+ p[i].coeff = ctx->coeff[i][1];
+ p[i].qcoeff = ctx->qcoeff[i][1];
+ pd[i].dqcoeff = ctx->dqcoeff[i][1];
+ p[i].eobs = ctx->eobs[i][1];
+ }
+
+ for (i = max_plane; i < MAX_MB_PLANE; ++i) {
+ p[i].coeff = ctx->coeff[i][2];
+ p[i].qcoeff = ctx->qcoeff[i][2];
+ pd[i].dqcoeff = ctx->dqcoeff[i][2];
+ p[i].eobs = ctx->eobs[i][2];
+ }
+
+ for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
+
+ // Restore the coding context of the MB to that that was in place
+ // when the mode was picked for it
+ for (y = 0; y < mi_height; y++)
+ for (x_idx = 0; x_idx < mi_width; x_idx++)
+ if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
+ (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
+ xd->mi[x_idx + y * mis] = mi_addr;
+ }
+
+ if (cpi->oxcf.aq_mode)
+ vp10_init_plane_quantizers(cpi, x, xd->mi[0]->mbmi.segment_id);
+
+ if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) {
+ mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
+ mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
+ }
+
+ x->skip = ctx->skip;
+
+#if CONFIG_VAR_TX
+ for (i = 0; i < 1; ++i)
+ memcpy(x->blk_skip[i], ctx->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif
+
+ if (!output_enabled) return;
+
+#if CONFIG_INTERNAL_STATS
+ if (frame_is_intra_only(cm)) {
+ static const int kf_mode_index[] = {
+ THR_DC /*DC_PRED*/, THR_V_PRED /*V_PRED*/,
+ THR_H_PRED /*H_PRED*/, THR_D45_PRED /*D45_PRED*/,
+ THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/,
+ THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/,
+ THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/,
+ };
+ ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]];
+ } else {
+ // Note how often each mode chosen as best
+ ++cpi->mode_chosen_counts[ctx->best_mode_index];
+ }
+#endif
+ if (!frame_is_intra_only(cm)) {
+ if (is_inter_block(mbmi)) {
+ vp10_update_mv_count(td);
+#if CONFIG_GLOBAL_MOTION
+ if (bsize >= BLOCK_8X8) {
+ update_global_motion_used(mbmi->mode, mbmi, cpi);
+ } else {
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
+ const int j = idy * 2 + idx;
+ update_global_motion_used(mi->bmi[j].as_mode, mbmi, cpi);
+ }
+ }
+ }
+#endif // CONFIG_GLOBAL_MOTION
+ if (cm->interp_filter == SWITCHABLE
+#if CONFIG_EXT_INTERP
+ && vp10_is_interp_needed(xd)
+#endif
+ ) {
+#if CONFIG_DUAL_FILTER
+ update_filter_type_count(td->counts, xd, mbmi);
+#else
+ const int ctx = vp10_get_pred_context_switchable_interp(xd);
+ ++td->counts->switchable_interp[ctx][mbmi->interp_filter];
+#endif
+ }
+ }
+
+ rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
+ rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
+ rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
+ }
+
+ for (h = 0; h < y_mis; ++h) {
+ MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
+ for (w = 0; w < x_mis; ++w) {
+ MV_REF *const mv = frame_mv + w;
+ mv->ref_frame[0] = mi->mbmi.ref_frame[0];
+ mv->ref_frame[1] = mi->mbmi.ref_frame[1];
+ mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
+ mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
+ }
+ }
+}
+
+#if CONFIG_SUPERTX
+static void update_state_supertx(VP10_COMP *cpi, ThreadData *td,
+ PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int output_enabled) {
+ int y, x_idx;
+#if CONFIG_VAR_TX || CONFIG_REF_MV
+ int i;
+#endif
+ VP10_COMMON *const cm = &cpi->common;
+ RD_COUNTS *const rdc = &td->rd_counts;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *mi = &ctx->mic;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ MODE_INFO *mi_addr = xd->mi[0];
+ const struct segmentation *const seg = &cm->seg;
+ const int mis = cm->mi_stride;
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int x_mis = VPXMIN(mi_width, cm->mi_cols - mi_col);
+ const int y_mis = VPXMIN(mi_height, cm->mi_rows - mi_row);
+ MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
+ int w, h;
+
+#if CONFIG_REF_MV
+ int8_t rf_type;
+#endif
+
+ *mi_addr = *mi;
+ *x->mbmi_ext = ctx->mbmi_ext;
+ assert(is_inter_block(mbmi));
+ assert(mbmi->tx_size == ctx->mic.mbmi.tx_size);
+
+#if CONFIG_DUAL_FILTER
+ reset_intmv_filter_type(cm, xd, mbmi);
+#endif
+
+#if CONFIG_REF_MV
+ rf_type = vp10_ref_frame_type(mbmi->ref_frame);
+ if (x->mbmi_ext->ref_mv_count[rf_type] > 1 && mbmi->sb_type >= BLOCK_8X8 &&
+ mbmi->mode == NEWMV) {
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+ int_mv this_mv =
+ (i == 0)
+ ? x->mbmi_ext->ref_mv_stack[rf_type][mbmi->ref_mv_idx].this_mv
+ : x->mbmi_ext->ref_mv_stack[rf_type][mbmi->ref_mv_idx].comp_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+ lower_mv_precision(&this_mv.as_mv, cm->allow_high_precision_mv);
+ x->mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0] = this_mv;
+ mbmi->pred_mv[i] = this_mv;
+ }
+ }
+#endif
+
+ // If segmentation in use
+ if (seg->enabled) {
+ if (cpi->vaq_refresh) {
+ const int energy = bsize <= BLOCK_16X16
+ ? x->mb_energy
+ : vp10_block_energy(cpi, x, bsize);
+ mi_addr->mbmi.segment_id = vp10_vaq_segment_id(energy);
+ } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+ // For cyclic refresh mode, now update the segment map
+ // and set the segment id.
+ vp10_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row, mi_col,
+ bsize, ctx->rate, ctx->dist, 1);
+ } else {
+ // Otherwise just set the segment id based on the current segment map
+ const uint8_t *const map =
+ seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
+ mi_addr->mbmi.segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
+ }
+ mi_addr->mbmi.segment_id_supertx = MAX_SEGMENTS;
+ }
+
+ // Restore the coding context of the MB to that that was in place
+ // when the mode was picked for it
+ for (y = 0; y < mi_height; y++)
+ for (x_idx = 0; x_idx < mi_width; x_idx++)
+ if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
+ (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
+ xd->mi[x_idx + y * mis] = mi_addr;
+ }
+
+ if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) {
+ mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
+ mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
+ }
+
+ x->skip = ctx->skip;
+
+#if CONFIG_VAR_TX
+ for (i = 0; i < 1; ++i)
+ memcpy(x->blk_skip[i], ctx->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif // CONFIG_VAR_TX
+
+#if CONFIG_VAR_TX
+ {
+ const TX_SIZE mtx = mbmi->tx_size;
+ int idy, idx;
+ for (idy = 0; idy < (1 << mtx) / 2; ++idy)
+ for (idx = 0; idx < (1 << mtx) / 2; ++idx)
+ mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
+ }
+#endif // CONFIG_VAR_TX
+ // Turn motion variation off for supertx
+ mbmi->motion_variation = SIMPLE_TRANSLATION;
+
+ if (!output_enabled) return;
+
+ if (!frame_is_intra_only(cm)) {
+ vp10_update_mv_count(td);
+
+ if (cm->interp_filter == SWITCHABLE
+#if CONFIG_EXT_INTERP
+ && vp10_is_interp_needed(xd)
+#endif
+ ) {
+#if CONFIG_DUAL_FILTER
+ update_filter_type_count(td->counts, xd, mbmi);
+#else
+ const int ctx = vp10_get_pred_context_switchable_interp(xd);
+ ++td->counts->switchable_interp[ctx][mbmi->interp_filter];
+#endif
+ }
+
+ rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
+ rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
+ rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
+ }
+
+ for (h = 0; h < y_mis; ++h) {
+ MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
+ for (w = 0; w < x_mis; ++w) {
+ MV_REF *const mv = frame_mv + w;
+ mv->ref_frame[0] = mi->mbmi.ref_frame[0];
+ mv->ref_frame[1] = mi->mbmi.ref_frame[1];
+ mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
+ mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
+ }
+ }
+}
+
+static void update_state_sb_supertx(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile, int mi_row,
+ int mi_col, BLOCK_SIZE bsize,
+ int output_enabled, PC_TREE *pc_tree) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = xd->plane;
+ int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
+ PARTITION_TYPE partition = pc_tree->partitioning;
+ BLOCK_SIZE subsize = get_subsize(bsize, partition);
+ int i;
+#if CONFIG_EXT_PARTITION_TYPES
+ BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
+ PICK_MODE_CONTEXT *pmc = NULL;
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+ if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
+ x->mb_energy = vp10_block_energy(cpi, x, bsize);
+
+ switch (partition) {
+ case PARTITION_NONE:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->none, mi_row, mi_col, subsize,
+ output_enabled);
+ break;
+ case PARTITION_VERT:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->vertical[0], mi_row, mi_col,
+ subsize, output_enabled);
+ if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize);
+ update_state_supertx(cpi, td, &pc_tree->vertical[1], mi_row,
+ mi_col + hbs, subsize, output_enabled);
+ }
+ pmc = &pc_tree->vertical_supertx;
+ break;
+ case PARTITION_HORZ:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->horizontal[0], mi_row, mi_col,
+ subsize, output_enabled);
+ if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->horizontal[1], mi_row + hbs,
+ mi_col, subsize, output_enabled);
+ }
+ pmc = &pc_tree->horizontal_supertx;
+ break;
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8) {
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_supertx(cpi, td, pc_tree->leaf_split[0], mi_row, mi_col,
+ subsize, output_enabled);
+ } else {
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_sb_supertx(cpi, td, tile, mi_row, mi_col, subsize,
+ output_enabled, pc_tree->split[0]);
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize);
+ update_state_sb_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize,
+ output_enabled, pc_tree->split[1]);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize);
+ update_state_sb_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize,
+ output_enabled, pc_tree->split[2]);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col + hbs, subsize);
+ update_state_sb_supertx(cpi, td, tile, mi_row + hbs, mi_col + hbs,
+ subsize, output_enabled, pc_tree->split[3]);
+ }
+ pmc = &pc_tree->split_supertx;
+ break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->horizontala[0], mi_row, mi_col,
+ bsize2, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->horizontala[1], mi_row,
+ mi_col + hbs, bsize2, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->horizontala[2], mi_row + hbs,
+ mi_col, subsize, output_enabled);
+ pmc = &pc_tree->horizontala_supertx;
+ break;
+ case PARTITION_HORZ_B:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->horizontalb[0], mi_row, mi_col,
+ subsize, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->horizontalb[1], mi_row + hbs,
+ mi_col, bsize2, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col + hbs, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->horizontalb[2], mi_row + hbs,
+ mi_col + hbs, bsize2, output_enabled);
+ pmc = &pc_tree->horizontalb_supertx;
+ break;
+ case PARTITION_VERT_A:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->verticala[0], mi_row, mi_col,
+ bsize2, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->verticala[1], mi_row + hbs,
+ mi_col, bsize2, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, subsize);
+ update_state_supertx(cpi, td, &pc_tree->verticala[2], mi_row,
+ mi_col + hbs, subsize, output_enabled);
+ pmc = &pc_tree->verticala_supertx;
+ break;
+ case PARTITION_VERT_B:
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col, subsize);
+ update_state_supertx(cpi, td, &pc_tree->verticalb[0], mi_row, mi_col,
+ subsize, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row, mi_col + hbs, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->verticalb[1], mi_row,
+ mi_col + hbs, bsize2, output_enabled);
+ set_offsets_supertx(cpi, td, tile, mi_row + hbs, mi_col + hbs, bsize2);
+ update_state_supertx(cpi, td, &pc_tree->verticalb[2], mi_row + hbs,
+ mi_col + hbs, bsize2, output_enabled);
+ pmc = &pc_tree->verticalb_supertx;
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
+ default: assert(0);
+ }
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ if (pmc != NULL) {
+ p[i].coeff = pmc->coeff[i][1];
+ p[i].qcoeff = pmc->qcoeff[i][1];
+ pd[i].dqcoeff = pmc->dqcoeff[i][1];
+ p[i].eobs = pmc->eobs[i][1];
+ } else {
+ // These should never be used
+ p[i].coeff = NULL;
+ p[i].qcoeff = NULL;
+ pd[i].dqcoeff = NULL;
+ p[i].eobs = NULL;
+ }
+ }
+}
+
+static void update_supertx_param(ThreadData *td, PICK_MODE_CONTEXT *ctx,
+ int best_tx, TX_SIZE supertx_size) {
+ MACROBLOCK *const x = &td->mb;
+#if CONFIG_VAR_TX
+ int i;
+
+ for (i = 0; i < 1; ++i)
+ memcpy(ctx->blk_skip[i], x->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif // CONFIG_VAR_TX
+ ctx->mic.mbmi.tx_size = supertx_size;
+ ctx->skip = x->skip;
+ ctx->mic.mbmi.tx_type = best_tx;
+}
+
+static void update_supertx_param_sb(VP10_COMP *cpi, ThreadData *td, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, int best_tx,
+ TX_SIZE supertx_size, PC_TREE *pc_tree) {
+ VP10_COMMON *const cm = &cpi->common;
+ int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
+ PARTITION_TYPE partition = pc_tree->partitioning;
+ BLOCK_SIZE subsize = get_subsize(bsize, partition);
+#if CONFIG_EXT_PARTITION_TYPES
+ int i;
+#endif
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+ switch (partition) {
+ case PARTITION_NONE:
+ update_supertx_param(td, &pc_tree->none, best_tx, supertx_size);
+ break;
+ case PARTITION_VERT:
+ update_supertx_param(td, &pc_tree->vertical[0], best_tx, supertx_size);
+ if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8)
+ update_supertx_param(td, &pc_tree->vertical[1], best_tx, supertx_size);
+ break;
+ case PARTITION_HORZ:
+ update_supertx_param(td, &pc_tree->horizontal[0], best_tx, supertx_size);
+ if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8)
+ update_supertx_param(td, &pc_tree->horizontal[1], best_tx,
+ supertx_size);
+ break;
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8) {
+ update_supertx_param(td, pc_tree->leaf_split[0], best_tx, supertx_size);
+ } else {
+ update_supertx_param_sb(cpi, td, mi_row, mi_col, subsize, best_tx,
+ supertx_size, pc_tree->split[0]);
+ update_supertx_param_sb(cpi, td, mi_row, mi_col + hbs, subsize, best_tx,
+ supertx_size, pc_tree->split[1]);
+ update_supertx_param_sb(cpi, td, mi_row + hbs, mi_col, subsize, best_tx,
+ supertx_size, pc_tree->split[2]);
+ update_supertx_param_sb(cpi, td, mi_row + hbs, mi_col + hbs, subsize,
+ best_tx, supertx_size, pc_tree->split[3]);
+ }
+ break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ for (i = 0; i < 3; i++)
+ update_supertx_param(td, &pc_tree->horizontala[i], best_tx,
+ supertx_size);
+ break;
+ case PARTITION_HORZ_B:
+ for (i = 0; i < 3; i++)
+ update_supertx_param(td, &pc_tree->horizontalb[i], best_tx,
+ supertx_size);
+ break;
+ case PARTITION_VERT_A:
+ for (i = 0; i < 3; i++)
+ update_supertx_param(td, &pc_tree->verticala[i], best_tx, supertx_size);
+ break;
+ case PARTITION_VERT_B:
+ for (i = 0; i < 3; i++)
+ update_supertx_param(td, &pc_tree->verticalb[i], best_tx, supertx_size);
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
+ default: assert(0);
+ }
+}
+#endif // CONFIG_SUPERTX
+
+void vp10_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
+ int mi_row, int mi_col) {
+ uint8_t *const buffers[3] = { src->y_buffer, src->u_buffer, src->v_buffer };
+ const int widths[3] = { src->y_crop_width, src->uv_crop_width,
+ src->uv_crop_width };
+ const int heights[3] = { src->y_crop_height, src->uv_crop_height,
+ src->uv_crop_height };
+ const int strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
+ int i;
+
+ // Set current frame pointer.
+ x->e_mbd.cur_buf = src;
+
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ setup_pred_plane(&x->plane[i].src, buffers[i], widths[i], heights[i],
+ strides[i], mi_row, mi_col, NULL,
+ x->e_mbd.plane[i].subsampling_x,
+ x->e_mbd.plane[i].subsampling_y);
+}
+
+static int set_segment_rdmult(VP10_COMP *const cpi, MACROBLOCK *const x,
+ int8_t segment_id) {
+ int segment_qindex;
+ VP10_COMMON *const cm = &cpi->common;
+ vp10_init_plane_quantizers(cpi, x, segment_id);
+ vpx_clear_system_state();
+ segment_qindex = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+ return vp10_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
+}
+
+static void rd_pick_sb_modes(VP10_COMP *cpi, TileDataEnc *tile_data,
+ MACROBLOCK *const x, int mi_row, int mi_col,
+ RD_COST *rd_cost,
+#if CONFIG_SUPERTX
+ int *totalrate_nocoef,
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition,
+#endif
+ BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd) {
+ VP10_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = xd->plane;
+ const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
+ int i, orig_rdmult;
+
+ vpx_clear_system_state();
+
+ // Use the lower precision, but faster, 32x32 fdct for mode selection.
+ x->use_lp32x32fdct = 1;
+
+ set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
+ mbmi = &xd->mi[0]->mbmi;
+ mbmi->sb_type = bsize;
+#if CONFIG_SUPERTX
+ // We set tx_size here as skip blocks would otherwise not set it.
+ // tx_size needs to be set at this point as supertx_enable in
+ // write_modes_sb is computed based on this, and if the garbage in memory
+ // just happens to be the supertx_size, then the packer will code this
+ // block as a supertx block, even if rdopt did not pick it as such.
+ mbmi->tx_size = max_txsize_lookup[bsize];
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ mbmi->partition = partition;
+#endif
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ p[i].coeff = ctx->coeff[i][0];
+ p[i].qcoeff = ctx->qcoeff[i][0];
+ pd[i].dqcoeff = ctx->dqcoeff[i][0];
+ p[i].eobs = ctx->eobs[i][0];
+ }
+
+ for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
+
+ ctx->is_coded = 0;
+ ctx->skippable = 0;
+ ctx->pred_pixel_ready = 0;
+
+ // Set to zero to make sure we do not use the previous encoded frame stats
+ mbmi->skip = 0;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ x->source_variance = vp10_high_get_sby_perpixel_variance(
+ cpi, &x->plane[0].src, bsize, xd->bd);
+ } else {
+ x->source_variance =
+ vp10_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+ }
+#else
+ x->source_variance =
+ vp10_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // Save rdmult before it might be changed, so it can be restored later.
+ orig_rdmult = x->rdmult;
+
+ if (aq_mode == VARIANCE_AQ) {
+ if (cpi->vaq_refresh) {
+ const int energy = bsize <= BLOCK_16X16
+ ? x->mb_energy
+ : vp10_block_energy(cpi, x, bsize);
+ mbmi->segment_id = vp10_vaq_segment_id(energy);
+ // Re-initialise quantiser
+ vp10_init_plane_quantizers(cpi, x, mbmi->segment_id);
+ x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
+ }
+ x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
+ } else if (aq_mode == COMPLEXITY_AQ) {
+ x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
+ } else if (aq_mode == CYCLIC_REFRESH_AQ) {
+ // If segment is boosted, use rdmult for that segment.
+ if (cyclic_refresh_segment_id_boosted(mbmi->segment_id))
+ x->rdmult = vp10_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
+ }
+
+ // Find best coding mode & reconstruct the MB so it is available
+ // as a predictor for MBs that follow in the SB
+ if (frame_is_intra_only(cm)) {
+ vp10_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
+#if CONFIG_SUPERTX
+ *totalrate_nocoef = 0;
+#endif // CONFIG_SUPERTX
+ } else {
+ if (bsize >= BLOCK_8X8) {
+ if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ vp10_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
+ ctx, best_rd);
+#if CONFIG_SUPERTX
+ *totalrate_nocoef = rd_cost->rate;
+#endif // CONFIG_SUPERTX
+ } else {
+ vp10_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
+#if CONFIG_SUPERTX
+ totalrate_nocoef,
+#endif // CONFIG_SUPERTX
+ bsize, ctx, best_rd);
+#if CONFIG_SUPERTX
+ assert(*totalrate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
+ }
+ } else {
+ if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ // The decoder rejects sub8x8 partitions when SEG_LVL_SKIP is set.
+ rd_cost->rate = INT_MAX;
+ } else {
+ vp10_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col,
+ rd_cost,
+#if CONFIG_SUPERTX
+ totalrate_nocoef,
+#endif // CONFIG_SUPERTX
+ bsize, ctx, best_rd);
+#if CONFIG_SUPERTX
+ assert(*totalrate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
+ }
+ }
+ }
+
+ // Examine the resulting rate and for AQ mode 2 make a segment choice.
+ if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) &&
+ (bsize >= BLOCK_16X16) &&
+ (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
+ (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) {
+ vp10_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
+ }
+
+ x->rdmult = orig_rdmult;
+
+ // TODO(jingning) The rate-distortion optimization flow needs to be
+ // refactored to provide proper exit/return handle.
+ if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
+
+ ctx->rate = rd_cost->rate;
+ ctx->dist = rd_cost->dist;
+}
+
+#if CONFIG_REF_MV
+static void update_inter_mode_stats(FRAME_COUNTS *counts, PREDICTION_MODE mode,
+#if CONFIG_EXT_INTER
+ int is_compound,
+#endif // CONFIG_EXT_INTER
+ int16_t mode_context) {
+ int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
+#if CONFIG_EXT_INTER
+ if (mode == NEWMV || mode == NEWFROMNEARMV) {
+ if (!is_compound) ++counts->new2mv_mode[mode == NEWFROMNEARMV];
+#else
+ if (mode == NEWMV) {
+#endif // CONFIG_EXT_INTER
+ ++counts->newmv_mode[mode_ctx][0];
+ return;
+ } else {
+ ++counts->newmv_mode[mode_ctx][1];
+
+ if (mode_context & (1 << ALL_ZERO_FLAG_OFFSET)) {
+ return;
+ }
+
+ mode_ctx = (mode_context >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
+ if (mode == ZEROMV) {
+ ++counts->zeromv_mode[mode_ctx][0];
+ return;
+ } else {
+ ++counts->zeromv_mode[mode_ctx][1];
+ mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
+
+ if (mode_context & (1 << SKIP_NEARESTMV_OFFSET)) mode_ctx = 6;
+ if (mode_context & (1 << SKIP_NEARMV_OFFSET)) mode_ctx = 7;
+ if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) mode_ctx = 8;
+
+ ++counts->refmv_mode[mode_ctx][mode != NEARESTMV];
+ }
+ }
+}
+#endif
+
+static void update_stats(VP10_COMMON *cm, ThreadData *td
+#if CONFIG_SUPERTX
+ ,
+ int supertx_enabled
+#endif
+ ) {
+ const MACROBLOCK *x = &td->mb;
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const MODE_INFO *const mi = xd->mi[0];
+ const MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+
+ if (!frame_is_intra_only(cm)) {
+ FRAME_COUNTS *const counts = td->counts;
+ const int inter_block = is_inter_block(mbmi);
+ const int seg_ref_active =
+ segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
+ if (!seg_ref_active) {
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif
+ counts->intra_inter[vp10_get_intra_inter_context(xd)][inter_block]++;
+ // If the segment reference feature is enabled we have only a single
+ // reference frame allowed for the segment so exclude it from
+ // the reference frame counts used to work out probabilities.
+ if (inter_block) {
+ const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
+#if CONFIG_EXT_REFS
+ const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1];
+#endif // CONFIG_EXT_REFS
+
+ if (cm->reference_mode == REFERENCE_MODE_SELECT)
+ counts->comp_inter[vp10_get_reference_mode_context(
+ cm, xd)][has_second_ref(mbmi)]++;
+
+ if (has_second_ref(mbmi)) {
+#if CONFIG_EXT_REFS
+ const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME);
+
+ counts->comp_ref[vp10_get_pred_context_comp_ref_p(cm, xd)][0][bit]++;
+ if (!bit) {
+ counts->comp_ref[vp10_get_pred_context_comp_ref_p1(
+ cm, xd)][1][ref0 == LAST_FRAME]++;
+ } else {
+ counts->comp_ref[vp10_get_pred_context_comp_ref_p2(
+ cm, xd)][2][ref0 == GOLDEN_FRAME]++;
+ }
+
+ counts->comp_bwdref[vp10_get_pred_context_comp_bwdref_p(
+ cm, xd)][0][ref1 == ALTREF_FRAME]++;
+#else
+ counts->comp_ref[vp10_get_pred_context_comp_ref_p(
+ cm, xd)][0][ref0 == GOLDEN_FRAME]++;
+#endif // CONFIG_EXT_REFS
+ } else {
+#if CONFIG_EXT_REFS
+ const int bit = (ref0 == ALTREF_FRAME || ref0 == BWDREF_FRAME);
+
+ counts->single_ref[vp10_get_pred_context_single_ref_p1(xd)][0][bit]++;
+ if (bit) {
+ counts->single_ref[vp10_get_pred_context_single_ref_p2(
+ xd)][1][ref0 != BWDREF_FRAME]++;
+ } else {
+ const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME);
+ counts->single_ref[vp10_get_pred_context_single_ref_p3(
+ xd)][2][bit1]++;
+ if (!bit1) {
+ counts->single_ref[vp10_get_pred_context_single_ref_p4(
+ xd)][3][ref0 != LAST_FRAME]++;
+ } else {
+ counts->single_ref[vp10_get_pred_context_single_ref_p5(
+ xd)][4][ref0 != LAST3_FRAME]++;
+ }
+ }
+#else
+ counts->single_ref[vp10_get_pred_context_single_ref_p1(
+ xd)][0][ref0 != LAST_FRAME]++;
+ if (ref0 != LAST_FRAME) {
+ counts->single_ref[vp10_get_pred_context_single_ref_p2(
+ xd)][1][ref0 != GOLDEN_FRAME]++;
+ }
+#endif // CONFIG_EXT_REFS
+ }
+
+#if CONFIG_EXT_INTER
+ if (cm->reference_mode != COMPOUND_REFERENCE &&
+#if CONFIG_SUPERTX
+ !supertx_enabled &&
+#endif
+ is_interintra_allowed(mbmi)) {
+ const int bsize_group = size_group_lookup[bsize];
+ if (mbmi->ref_frame[1] == INTRA_FRAME) {
+ counts->interintra[bsize_group][1]++;
+ counts->interintra_mode[bsize_group][mbmi->interintra_mode]++;
+ if (is_interintra_wedge_used(bsize))
+ counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
+ } else {
+ counts->interintra[bsize_group][0]++;
+ }
+ }
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+#if CONFIG_SUPERTX
+ if (!supertx_enabled)
+#endif // CONFIG_SUPERTX
+#if CONFIG_EXT_INTER
+ if (mbmi->ref_frame[1] != INTRA_FRAME)
+#endif // CONFIG_EXT_INTER
+ if (is_motvar_allowed(mbmi))
+ counts->motvar[mbmi->sb_type][mbmi->motion_variation]++;
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+#if CONFIG_EXT_INTER
+ if (cm->reference_mode != SINGLE_REFERENCE &&
+ is_inter_compound_mode(mbmi->mode) &&
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ !(is_motvar_allowed(mbmi) &&
+ mbmi->motion_variation != SIMPLE_TRANSLATION) &&
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ is_interinter_wedge_used(bsize)) {
+ counts->wedge_interinter[bsize][mbmi->use_wedge_interinter]++;
+ }
+#endif // CONFIG_EXT_INTER
+ }
+ }
+
+ if (inter_block &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ int16_t mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
+ if (bsize >= BLOCK_8X8) {
+ const PREDICTION_MODE mode = mbmi->mode;
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (has_second_ref(mbmi)) {
+ mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
+ ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
+ } else {
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
+ mbmi->ref_frame, bsize, -1);
+ update_inter_mode_stats(counts, mode,
+#if CONFIG_EXT_INTER
+ has_second_ref(mbmi),
+#endif // CONFIG_EXT_INTER
+ mode_ctx);
+
+ if (mode == NEWMV) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+ int idx;
+
+ for (idx = 0; idx < 2; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx];
+
+ if (mbmi->ref_mv_idx == idx) break;
+ }
+ }
+ }
+
+ if (mode == NEARMV) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+ int idx;
+
+ for (idx = 1; idx < 3; ++idx) {
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+ uint8_t drl_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
+ ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1];
+
+ if (mbmi->ref_mv_idx == idx - 1) break;
+ }
+ }
+ }
+#if CONFIG_EXT_INTER
+ }
+#endif // CONFIG_EXT_INTER
+#else
+#if CONFIG_EXT_INTER
+ if (is_inter_compound_mode(mode))
+ ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
+ else
+#endif // CONFIG_EXT_INTER
+ ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)];
+#endif
+ } else {
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
+ const int j = idy * 2 + idx;
+ const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (has_second_ref(mbmi)) {
+ mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
+ ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(
+ b_mode)];
+ } else {
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
+ mbmi->ref_frame, bsize, j);
+ update_inter_mode_stats(counts, b_mode,
+#if CONFIG_EXT_INTER
+ has_second_ref(mbmi),
+#endif // CONFIG_EXT_INTER
+ mode_ctx);
+#if CONFIG_EXT_INTER
+ }
+#endif // CONFIG_EXT_INTER
+#else
+#if CONFIG_EXT_INTER
+ if (is_inter_compound_mode(b_mode))
+ ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(
+ b_mode)];
+ else
+#endif // CONFIG_EXT_INTER
+ ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)];
+#endif
+ }
+ }
+ }
+ }
+ }
+}
+
+typedef struct {
+ ENTROPY_CONTEXT a[2 * MAX_MIB_SIZE * MAX_MB_PLANE];
+ ENTROPY_CONTEXT l[2 * MAX_MIB_SIZE * MAX_MB_PLANE];
+ PARTITION_CONTEXT sa[MAX_MIB_SIZE];
+ PARTITION_CONTEXT sl[MAX_MIB_SIZE];
+#if CONFIG_VAR_TX
+ TXFM_CONTEXT *p_ta;
+ TXFM_CONTEXT *p_tl;
+ TXFM_CONTEXT ta[MAX_MIB_SIZE];
+ TXFM_CONTEXT tl[MAX_MIB_SIZE];
+#endif
+} RD_SEARCH_MACROBLOCK_CONTEXT;
+
+static void restore_context(MACROBLOCK *x,
+ const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, int mi_row,
+ int mi_col, BLOCK_SIZE bsize) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ int p;
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ int mi_height = num_8x8_blocks_high_lookup[bsize];
+ for (p = 0; p < MAX_MB_PLANE; p++) {
+ memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
+ ctx->a + num_4x4_blocks_wide * p,
+ (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
+ xd->plane[p].subsampling_x);
+ memcpy(xd->left_context[p] +
+ ((mi_row & MAX_MIB_MASK) * 2 >> xd->plane[p].subsampling_y),
+ ctx->l + num_4x4_blocks_high * p,
+ (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
+ xd->plane[p].subsampling_y);
+ }
+ memcpy(xd->above_seg_context + mi_col, ctx->sa,
+ sizeof(*xd->above_seg_context) * mi_width);
+ memcpy(xd->left_seg_context + (mi_row & MAX_MIB_MASK), ctx->sl,
+ sizeof(xd->left_seg_context[0]) * mi_height);
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = ctx->p_ta;
+ xd->left_txfm_context = ctx->p_tl;
+ memcpy(xd->above_txfm_context, ctx->ta,
+ sizeof(*xd->above_txfm_context) * mi_width);
+ memcpy(xd->left_txfm_context, ctx->tl,
+ sizeof(*xd->left_txfm_context) * mi_height);
+#endif
+}
+
+static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
+ int mi_row, int mi_col, BLOCK_SIZE bsize) {
+ const MACROBLOCKD *xd = &x->e_mbd;
+ int p;
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ int mi_height = num_8x8_blocks_high_lookup[bsize];
+
+ // buffer the above/left context information of the block in search.
+ for (p = 0; p < MAX_MB_PLANE; ++p) {
+ memcpy(ctx->a + num_4x4_blocks_wide * p,
+ xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
+ (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
+ xd->plane[p].subsampling_x);
+ memcpy(ctx->l + num_4x4_blocks_high * p,
+ xd->left_context[p] +
+ ((mi_row & MAX_MIB_MASK) * 2 >> xd->plane[p].subsampling_y),
+ (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
+ xd->plane[p].subsampling_y);
+ }
+ memcpy(ctx->sa, xd->above_seg_context + mi_col,
+ sizeof(*xd->above_seg_context) * mi_width);
+ memcpy(ctx->sl, xd->left_seg_context + (mi_row & MAX_MIB_MASK),
+ sizeof(xd->left_seg_context[0]) * mi_height);
+#if CONFIG_VAR_TX
+ memcpy(ctx->ta, xd->above_txfm_context,
+ sizeof(*xd->above_txfm_context) * mi_width);
+ memcpy(ctx->tl, xd->left_txfm_context,
+ sizeof(*xd->left_txfm_context) * mi_height);
+ ctx->p_ta = xd->above_txfm_context;
+ ctx->p_tl = xd->left_txfm_context;
+#endif
+}
+
+static void encode_b(VP10_COMP *cpi, const TileInfo *const tile, ThreadData *td,
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ int output_enabled, BLOCK_SIZE bsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition,
+#endif
+ PICK_MODE_CONTEXT *ctx) {
+ MACROBLOCK *const x = &td->mb;
+ set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
+#if CONFIG_EXT_PARTITION_TYPES
+ x->e_mbd.mi[0]->mbmi.partition = partition;
+#endif
+ update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
+ encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
+
+ if (output_enabled) {
+#if CONFIG_SUPERTX
+ update_stats(&cpi->common, td, 0);
+#else
+ update_stats(&cpi->common, td);
+#endif
+ }
+}
+
+static void encode_sb(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile, TOKENEXTRA **tp, int mi_row,
+ int mi_col, int output_enabled, BLOCK_SIZE bsize,
+ PC_TREE *pc_tree) {
+ const VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
+ const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
+ const PARTITION_TYPE partition = pc_tree->partitioning;
+ const BLOCK_SIZE subsize = get_subsize(bsize, partition);
+#if CONFIG_EXT_PARTITION_TYPES
+ const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
+
+ assert(bsize >= BLOCK_8X8);
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+ if (output_enabled) td->counts->partition[ctx][partition]++;
+
+#if CONFIG_SUPERTX
+ if (!frame_is_intra_only(cm) && bsize <= MAX_SUPERTX_BLOCK_SIZE &&
+ partition != PARTITION_NONE && !xd->lossless[0]) {
+ int supertx_enabled;
+ TX_SIZE supertx_size = max_txsize_lookup[bsize];
+ supertx_enabled = check_supertx_sb(bsize, supertx_size, pc_tree);
+ if (supertx_enabled) {
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ int x_idx, y_idx, i;
+ uint8_t *dst_buf[3];
+ int dst_stride[3];
+ set_skip_context(xd, mi_row, mi_col);
+ set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+ update_state_sb_supertx(cpi, td, tile, mi_row, mi_col, bsize,
+ output_enabled, pc_tree);
+
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row,
+ mi_col);
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ dst_buf[i] = xd->plane[i].dst.buf;
+ dst_stride[i] = xd->plane[i].dst.stride;
+ }
+ predict_sb_complex(cpi, td, tile, mi_row, mi_col, mi_row, mi_col,
+ output_enabled, bsize, bsize, dst_buf, dst_stride,
+ pc_tree);
+
+ set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
+ set_segment_id_supertx(cpi, x, mi_row, mi_col, bsize);
+
+ if (!x->skip) {
+ x->skip_optimize = 0;
+ x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
+
+ vp10_encode_sb_supertx(x, bsize);
+ vp10_tokenize_sb_supertx(cpi, td, tp, !output_enabled, bsize);
+ } else {
+ xd->mi[0]->mbmi.skip = 1;
+ if (output_enabled) td->counts->skip[vp10_get_skip_context(xd)][1]++;
+ reset_skip_context(xd, bsize);
+ }
+ if (output_enabled) {
+ for (y_idx = 0; y_idx < mi_height; y_idx++)
+ for (x_idx = 0; x_idx < mi_width; x_idx++) {
+ if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width >
+ x_idx &&
+ (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height >
+ y_idx) {
+ xd->mi[x_idx + y_idx * cm->mi_stride]->mbmi.skip =
+ xd->mi[0]->mbmi.skip;
+ }
+ }
+ td->counts->supertx[partition_supertx_context_lookup[partition]]
+ [supertx_size][1]++;
+ td->counts->supertx_size[supertx_size]++;
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(supertx_size, bsize, 1) > 1 &&
+ !xd->mi[0]->mbmi.skip) {
+ int eset = get_ext_tx_set(supertx_size, bsize, 1);
+ if (eset > 0) {
+ ++td->counts
+ ->inter_ext_tx[eset][supertx_size][xd->mi[0]->mbmi.tx_type];
+ }
+ }
+#else
+ if (supertx_size < TX_32X32 && !xd->mi[0]->mbmi.skip) {
+ ++td->counts->inter_ext_tx[supertx_size][xd->mi[0]->mbmi.tx_type];
+ }
+#endif // CONFIG_EXT_TX
+ }
+#if CONFIG_EXT_PARTITION_TYPES
+ update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize,
+ partition);
+#else
+ if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+#endif
+#if CONFIG_VAR_TX
+ set_txfm_ctxs(supertx_size, mi_width, mi_height, xd);
+#endif // CONFIG_VAR_TX
+ return;
+ } else {
+ if (output_enabled) {
+ td->counts->supertx[partition_supertx_context_lookup[partition]]
+ [supertx_size][0]++;
+ }
+ }
+ }
+#endif // CONFIG_SUPERTX
+
+ switch (partition) {
+ case PARTITION_NONE:
+ encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ &pc_tree->none);
+ break;
+ case PARTITION_VERT:
+ encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ &pc_tree->vertical[0]);
+ if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
+ encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled,
+ subsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ &pc_tree->vertical[1]);
+ }
+ break;
+ case PARTITION_HORZ:
+ encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ &pc_tree->horizontal[0]);
+ if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
+ encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled,
+ subsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ &pc_tree->horizontal[1]);
+ }
+ break;
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8) {
+ encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ pc_tree->leaf_split[0]);
+ } else {
+ encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
+ pc_tree->split[0]);
+ encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
+ subsize, pc_tree->split[1]);
+ encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
+ subsize, pc_tree->split[2]);
+ encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
+ subsize, pc_tree->split[3]);
+ }
+ break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, bsize2,
+ partition, &pc_tree->horizontala[0]);
+ encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled, bsize2,
+ partition, &pc_tree->horizontala[1]);
+ encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled, subsize,
+ partition, &pc_tree->horizontala[2]);
+ break;
+ case PARTITION_HORZ_B:
+ encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
+ partition, &pc_tree->horizontalb[0]);
+ encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled, bsize2,
+ partition, &pc_tree->horizontalb[1]);
+ encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col + hbs, output_enabled,
+ bsize2, partition, &pc_tree->horizontalb[2]);
+ break;
+ case PARTITION_VERT_A:
+ encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, bsize2,
+ partition, &pc_tree->verticala[0]);
+ encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled, bsize2,
+ partition, &pc_tree->verticala[1]);
+ encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled, subsize,
+ partition, &pc_tree->verticala[2]);
+
+ break;
+ case PARTITION_VERT_B:
+ encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
+ partition, &pc_tree->verticalb[0]);
+ encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled, bsize2,
+ partition, &pc_tree->verticalb[1]);
+ encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col + hbs, output_enabled,
+ bsize2, partition, &pc_tree->verticalb[2]);
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
+ default: assert(0 && "Invalid partition type."); break;
+ }
+
+#if CONFIG_EXT_PARTITION_TYPES
+ update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
+#else
+ if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+#endif // CONFIG_EXT_PARTITION_TYPES
+}
+
+// Check to see if the given partition size is allowed for a specified number
+// of mi block rows and columns remaining in the image.
+// If not then return the largest allowed partition size
+static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left,
+ int cols_left, int *bh, int *bw) {
+ if (rows_left <= 0 || cols_left <= 0) {
+ return VPXMIN(bsize, BLOCK_8X8);
+ } else {
+ for (; bsize > 0; bsize -= 3) {
+ *bh = num_8x8_blocks_high_lookup[bsize];
+ *bw = num_8x8_blocks_wide_lookup[bsize];
+ if ((*bh <= rows_left) && (*bw <= cols_left)) {
+ break;
+ }
+ }
+ }
+ return bsize;
+}
+
+static void set_partial_sb_partition(const VP10_COMMON *const cm, MODE_INFO *mi,
+ int bh_in, int bw_in,
+ int mi_rows_remaining,
+ int mi_cols_remaining, BLOCK_SIZE bsize,
+ MODE_INFO **mib) {
+ int bh = bh_in;
+ int r, c;
+ for (r = 0; r < cm->mib_size; r += bh) {
+ int bw = bw_in;
+ for (c = 0; c < cm->mib_size; c += bw) {
+ const int index = r * cm->mi_stride + c;
+ mib[index] = mi + index;
+ mib[index]->mbmi.sb_type = find_partition_size(
+ bsize, mi_rows_remaining - r, mi_cols_remaining - c, &bh, &bw);
+ }
+ }
+}
+
+// This function attempts to set all mode info entries in a given superblock
+// to the same block partition size.
+// However, at the bottom and right borders of the image the requested size
+// may not be allowed in which case this code attempts to choose the largest
+// allowable partition.
+static void set_fixed_partitioning(VP10_COMP *cpi, const TileInfo *const tile,
+ MODE_INFO **mib, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ VP10_COMMON *const cm = &cpi->common;
+ const int mi_rows_remaining = tile->mi_row_end - mi_row;
+ const int mi_cols_remaining = tile->mi_col_end - mi_col;
+ int block_row, block_col;
+ MODE_INFO *const mi_upper_left = cm->mi + mi_row * cm->mi_stride + mi_col;
+ int bh = num_8x8_blocks_high_lookup[bsize];
+ int bw = num_8x8_blocks_wide_lookup[bsize];
+
+ assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0));
+
+ // Apply the requested partition size to the SB if it is all "in image"
+ if ((mi_cols_remaining >= cm->mib_size) &&
+ (mi_rows_remaining >= cm->mib_size)) {
+ for (block_row = 0; block_row < cm->mib_size; block_row += bh) {
+ for (block_col = 0; block_col < cm->mib_size; block_col += bw) {
+ int index = block_row * cm->mi_stride + block_col;
+ mib[index] = mi_upper_left + index;
+ mib[index]->mbmi.sb_type = bsize;
+ }
+ }
+ } else {
+ // Else this is a partial SB.
+ set_partial_sb_partition(cm, mi_upper_left, bh, bw, mi_rows_remaining,
+ mi_cols_remaining, bsize, mib);
+ }
+}
+
+static void rd_use_partition(VP10_COMP *cpi, ThreadData *td,
+ TileDataEnc *tile_data, MODE_INFO **mib,
+ TOKENEXTRA **tp, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int *rate, int64_t *dist,
+#if CONFIG_SUPERTX
+ int *rate_nocoef,
+#endif
+ int do_recon, PC_TREE *pc_tree) {
+ VP10_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const int bs = num_8x8_blocks_wide_lookup[bsize];
+ const int hbs = bs / 2;
+ int i;
+ const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+ const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize);
+ const BLOCK_SIZE subsize = get_subsize(bsize, partition);
+ RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
+ RD_COST last_part_rdc, none_rdc, chosen_rdc;
+ BLOCK_SIZE sub_subsize = BLOCK_4X4;
+ int splits_below = 0;
+ BLOCK_SIZE bs_type = mib[0]->mbmi.sb_type;
+ int do_partition_search = 1;
+ PICK_MODE_CONTEXT *ctx = &pc_tree->none;
+#if CONFIG_SUPERTX
+ int last_part_rate_nocoef = INT_MAX;
+ int none_rate_nocoef = INT_MAX;
+ int chosen_rate_nocoef = INT_MAX;
+#endif
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+ assert(num_4x4_blocks_wide_lookup[bsize] ==
+ num_4x4_blocks_high_lookup[bsize]);
+
+ vp10_rd_cost_reset(&last_part_rdc);
+ vp10_rd_cost_reset(&none_rdc);
+ vp10_rd_cost_reset(&chosen_rdc);
+
+ pc_tree->partitioning = partition;
+
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+#endif
+
+ save_context(x, &x_ctx, mi_row, mi_col, bsize);
+
+ if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
+ set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
+ x->mb_energy = vp10_block_energy(cpi, x, bsize);
+ }
+
+ if (do_partition_search &&
+ cpi->sf.partition_search_type == SEARCH_PARTITION &&
+ cpi->sf.adjust_partitioning_from_last_frame) {
+ // Check if any of the sub blocks are further split.
+ if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
+ sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
+ splits_below = 1;
+ for (i = 0; i < 4; i++) {
+ int jj = i >> 1, ii = i & 0x01;
+ MODE_INFO *this_mi = mib[jj * hbs * cm->mi_stride + ii * hbs];
+ if (this_mi && this_mi->mbmi.sb_type >= sub_subsize) {
+ splits_below = 0;
+ }
+ }
+ }
+
+ // If partition is not none try none unless each of the 4 splits are split
+ // even further..
+ if (partition != PARTITION_NONE && !splits_below &&
+ mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
+ pc_tree->partitioning = PARTITION_NONE;
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
+#if CONFIG_SUPERTX
+ &none_rate_nocoef,
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_NONE,
+#endif
+ bsize, ctx, INT64_MAX);
+
+ if (none_rdc.rate < INT_MAX) {
+ none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
+ none_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist);
+#if CONFIG_SUPERTX
+ none_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE];
+#endif
+ }
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
+ mib[0]->mbmi.sb_type = bs_type;
+ pc_tree->partitioning = partition;
+ }
+ }
+
+ switch (partition) {
+ case PARTITION_NONE:
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+#if CONFIG_SUPERTX
+ &last_part_rate_nocoef,
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_NONE,
+#endif
+ bsize, ctx, INT64_MAX);
+ break;
+ case PARTITION_HORZ:
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+#if CONFIG_SUPERTX
+ &last_part_rate_nocoef,
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_HORZ,
+#endif
+ subsize, &pc_tree->horizontal[0], INT64_MAX);
+ if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
+ mi_row + hbs < cm->mi_rows) {
+ RD_COST tmp_rdc;
+#if CONFIG_SUPERTX
+ int rt_nocoef = 0;
+#endif
+ PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
+ vp10_rd_cost_init(&tmp_rdc);
+ update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
+ encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
+#if CONFIG_SUPERTX
+ &rt_nocoef,
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_HORZ,
+#endif
+ subsize, &pc_tree->horizontal[1], INT64_MAX);
+ if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
+ vp10_rd_cost_reset(&last_part_rdc);
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef = INT_MAX;
+#endif
+ break;
+ }
+ last_part_rdc.rate += tmp_rdc.rate;
+ last_part_rdc.dist += tmp_rdc.dist;
+ last_part_rdc.rdcost += tmp_rdc.rdcost;
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef += rt_nocoef;
+#endif
+ }
+ break;
+ case PARTITION_VERT:
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+#if CONFIG_SUPERTX
+ &last_part_rate_nocoef,
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_VERT,
+#endif
+ subsize, &pc_tree->vertical[0], INT64_MAX);
+ if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
+ mi_col + hbs < cm->mi_cols) {
+ RD_COST tmp_rdc;
+#if CONFIG_SUPERTX
+ int rt_nocoef = 0;
+#endif
+ PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
+ vp10_rd_cost_init(&tmp_rdc);
+ update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
+ encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
+#if CONFIG_SUPERTX
+ &rt_nocoef,
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_VERT,
+#endif
+ subsize, &pc_tree->vertical[bsize > BLOCK_8X8],
+ INT64_MAX);
+ if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
+ vp10_rd_cost_reset(&last_part_rdc);
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef = INT_MAX;
+#endif
+ break;
+ }
+ last_part_rdc.rate += tmp_rdc.rate;
+ last_part_rdc.dist += tmp_rdc.dist;
+ last_part_rdc.rdcost += tmp_rdc.rdcost;
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef += rt_nocoef;
+#endif
+ }
+ break;
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8) {
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
+#if CONFIG_SUPERTX
+ &last_part_rate_nocoef,
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_SPLIT,
+#endif
+ subsize, pc_tree->leaf_split[0], INT64_MAX);
+ break;
+ }
+ last_part_rdc.rate = 0;
+ last_part_rdc.dist = 0;
+ last_part_rdc.rdcost = 0;
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef = 0;
+#endif
+ for (i = 0; i < 4; i++) {
+ int x_idx = (i & 1) * hbs;
+ int y_idx = (i >> 1) * hbs;
+ int jj = i >> 1, ii = i & 0x01;
+ RD_COST tmp_rdc;
+#if CONFIG_SUPERTX
+ int rt_nocoef;
+#endif
+ if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
+ continue;
+
+ vp10_rd_cost_init(&tmp_rdc);
+ rd_use_partition(cpi, td, tile_data,
+ mib + jj * hbs * cm->mi_stride + ii * hbs, tp,
+ mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate,
+ &tmp_rdc.dist,
+#if CONFIG_SUPERTX
+ &rt_nocoef,
+#endif
+ i != 3, pc_tree->split[i]);
+ if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
+ vp10_rd_cost_reset(&last_part_rdc);
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef = INT_MAX;
+#endif
+ break;
+ }
+ last_part_rdc.rate += tmp_rdc.rate;
+ last_part_rdc.dist += tmp_rdc.dist;
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef += rt_nocoef;
+#endif
+ }
+ break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_VERT_A:
+ case PARTITION_VERT_B:
+ case PARTITION_HORZ_A:
+ case PARTITION_HORZ_B: assert(0 && "Cannot handle extended partiton types");
+#endif // CONFIG_EXT_PARTITION_TYPES
+ default: assert(0); break;
+ }
+
+ if (last_part_rdc.rate < INT_MAX) {
+ last_part_rdc.rate += cpi->partition_cost[pl][partition];
+ last_part_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist);
+#if CONFIG_SUPERTX
+ last_part_rate_nocoef += cpi->partition_cost[pl][partition];
+#endif
+ }
+
+ if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame &&
+ cpi->sf.partition_search_type == SEARCH_PARTITION &&
+ partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
+ (mi_row + bs < cm->mi_rows || mi_row + hbs == cm->mi_rows) &&
+ (mi_col + bs < cm->mi_cols || mi_col + hbs == cm->mi_cols)) {
+ BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
+ chosen_rdc.rate = 0;
+ chosen_rdc.dist = 0;
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef = 0;
+#endif
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
+ pc_tree->partitioning = PARTITION_SPLIT;
+
+ // Split partition.
+ for (i = 0; i < 4; i++) {
+ int x_idx = (i & 1) * hbs;
+ int y_idx = (i >> 1) * hbs;
+ RD_COST tmp_rdc;
+#if CONFIG_SUPERTX
+ int rt_nocoef = 0;
+#endif
+ RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
+
+ if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
+ continue;
+
+ save_context(x, &x_ctx, mi_row, mi_col, bsize);
+ pc_tree->split[i]->partitioning = PARTITION_NONE;
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
+ &tmp_rdc,
+#if CONFIG_SUPERTX
+ &rt_nocoef,
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_SPLIT,
+#endif
+ split_subsize, &pc_tree->split[i]->none, INT64_MAX);
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
+ if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
+ vp10_rd_cost_reset(&chosen_rdc);
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef = INT_MAX;
+#endif
+ break;
+ }
+
+ chosen_rdc.rate += tmp_rdc.rate;
+ chosen_rdc.dist += tmp_rdc.dist;
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef += rt_nocoef;
+#endif
+
+ if (i != 3)
+ encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0,
+ split_subsize, pc_tree->split[i]);
+
+ chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT];
+#endif
+ }
+ if (chosen_rdc.rate < INT_MAX) {
+ chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
+ chosen_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist);
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE];
+#endif
+ }
+ }
+
+ // If last_part is better set the partitioning to that.
+ if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
+ mib[0]->mbmi.sb_type = bsize;
+ if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
+ chosen_rdc = last_part_rdc;
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef = last_part_rate_nocoef;
+#endif
+ }
+ // If none was better set the partitioning to that.
+ if (none_rdc.rdcost < chosen_rdc.rdcost) {
+ if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
+ chosen_rdc = none_rdc;
+#if CONFIG_SUPERTX
+ chosen_rate_nocoef = none_rate_nocoef;
+#endif
+ }
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
+ // We must have chosen a partitioning and encoding or we'll fail later on.
+ // No other opportunities for success.
+ if (bsize == cm->sb_size)
+ assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
+
+ if (do_recon) {
+ int output_enabled = (bsize == cm->sb_size);
+ encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
+ pc_tree);
+ }
+
+ *rate = chosen_rdc.rate;
+ *dist = chosen_rdc.dist;
+#if CONFIG_SUPERTX
+ *rate_nocoef = chosen_rate_nocoef;
+#endif
+}
+
+/* clang-format off */
+static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
+ BLOCK_4X4, // 4x4
+ BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, // 4x8, 8x4, 8x8
+ BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 8x16, 16x8, 16x16
+ BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 16x32, 32x16, 32x32
+ BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 32x64, 64x32, 64x64
+#if CONFIG_EXT_PARTITION
+ BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 // 64x128, 128x64, 128x128
+#endif // CONFIG_EXT_PARTITION
+};
+
+static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
+ BLOCK_8X8, // 4x4
+ BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 4x8, 8x4, 8x8
+ BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, // 8x16, 16x8, 16x16
+ BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 16x32, 32x16, 32x32
+ BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 32x64, 64x32, 64x64
+#if CONFIG_EXT_PARTITION
+ BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST // 64x128, 128x64, 128x128
+#endif // CONFIG_EXT_PARTITION
+};
+
+// Next square block size less or equal than current block size.
+static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
+ BLOCK_4X4, // 4x4
+ BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x8, 8x4, 8x8
+ BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 8x16, 16x8, 16x16
+ BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 16x32, 32x16, 32x32
+ BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, // 32x64, 64x32, 64x64
+#if CONFIG_EXT_PARTITION
+ BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 // 64x128, 128x64, 128x128
+#endif // CONFIG_EXT_PARTITION
+};
+/* clang-format on */
+
+// Look at all the mode_info entries for blocks that are part of this
+// partition and find the min and max values for sb_type.
+// At the moment this is designed to work on a superblock but could be
+// adjusted to use a size parameter.
+//
+// The min and max are assumed to have been initialized prior to calling this
+// function so repeat calls can accumulate a min and max of more than one
+// superblock.
+static void get_sb_partition_size_range(const VP10_COMMON *const cm,
+ MACROBLOCKD *xd, MODE_INFO **mib,
+ BLOCK_SIZE *min_block_size,
+ BLOCK_SIZE *max_block_size) {
+ int i, j;
+ int index = 0;
+
+ // Check the sb_type for each block that belongs to this region.
+ for (i = 0; i < cm->mib_size; ++i) {
+ for (j = 0; j < cm->mib_size; ++j) {
+ MODE_INFO *mi = mib[index + j];
+ BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : BLOCK_4X4;
+ *min_block_size = VPXMIN(*min_block_size, sb_type);
+ *max_block_size = VPXMAX(*max_block_size, sb_type);
+ }
+ index += xd->mi_stride;
+ }
+}
+
+// Look at neighboring blocks and set a min and max partition size based on
+// what they chose.
+static void rd_auto_partition_range(VP10_COMP *cpi, const TileInfo *const tile,
+ MACROBLOCKD *const xd, int mi_row,
+ int mi_col, BLOCK_SIZE *min_block_size,
+ BLOCK_SIZE *max_block_size) {
+ VP10_COMMON *const cm = &cpi->common;
+ MODE_INFO **mi = xd->mi;
+ const int left_in_image = xd->left_available && mi[-1];
+ const int above_in_image = xd->up_available && mi[-xd->mi_stride];
+ const int mi_rows_remaining = tile->mi_row_end - mi_row;
+ const int mi_cols_remaining = tile->mi_col_end - mi_col;
+ int bh, bw;
+ BLOCK_SIZE min_size = BLOCK_4X4;
+ BLOCK_SIZE max_size = BLOCK_LARGEST;
+
+ // Trap case where we do not have a prediction.
+ if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
+ // Default "min to max" and "max to min"
+ min_size = BLOCK_LARGEST;
+ max_size = BLOCK_4X4;
+
+ // NOTE: each call to get_sb_partition_size_range() uses the previous
+ // passed in values for min and max as a starting point.
+ // Find the min and max partition used in previous frame at this location
+ if (cm->frame_type != KEY_FRAME) {
+ MODE_INFO **prev_mi =
+ &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
+ get_sb_partition_size_range(cm, xd, prev_mi, &min_size, &max_size);
+ }
+ // Find the min and max partition sizes used in the left superblock
+ if (left_in_image) {
+ MODE_INFO **left_sb_mi = &mi[-cm->mib_size];
+ get_sb_partition_size_range(cm, xd, left_sb_mi, &min_size, &max_size);
+ }
+ // Find the min and max partition sizes used in the above suprblock.
+ if (above_in_image) {
+ MODE_INFO **above_sb_mi = &mi[-xd->mi_stride * cm->mib_size];
+ get_sb_partition_size_range(cm, xd, above_sb_mi, &min_size, &max_size);
+ }
+
+ // Adjust observed min and max for "relaxed" auto partition case.
+ if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
+ min_size = min_partition_size[min_size];
+ max_size = max_partition_size[max_size];
+ }
+ }
+
+ // Check border cases where max and min from neighbors may not be legal.
+ max_size = find_partition_size(max_size, mi_rows_remaining, mi_cols_remaining,
+ &bh, &bw);
+ min_size = VPXMIN(min_size, max_size);
+
+ // Test for blocks at the edge of the active image.
+ // This may be the actual edge of the image or where there are formatting
+ // bars.
+ if (vp10_active_edge_sb(cpi, mi_row, mi_col)) {
+ min_size = BLOCK_4X4;
+ } else {
+ min_size = VPXMIN(cpi->sf.rd_auto_partition_min_limit, min_size);
+ }
+
+ // When use_square_partition_only is true, make sure at least one square
+ // partition is allowed by selecting the next smaller square size as
+ // *min_block_size.
+ if (cpi->sf.use_square_partition_only) {
+ min_size = VPXMIN(min_size, next_square_size[max_size]);
+ }
+
+ *min_block_size = VPXMIN(min_size, cm->sb_size);
+ *max_block_size = VPXMIN(max_size, cm->sb_size);
+}
+
+// TODO(jingning) refactor functions setting partition search range
+static void set_partition_range(VP10_COMMON *cm, MACROBLOCKD *xd, int mi_row,
+ int mi_col, BLOCK_SIZE bsize,
+ BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) {
+ int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ int mi_height = num_8x8_blocks_high_lookup[bsize];
+ int idx, idy;
+
+ MODE_INFO *mi;
+ const int idx_str = cm->mi_stride * mi_row + mi_col;
+ MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str];
+ BLOCK_SIZE bs, min_size, max_size;
+
+ min_size = BLOCK_LARGEST;
+ max_size = BLOCK_4X4;
+
+ if (prev_mi) {
+ for (idy = 0; idy < mi_height; ++idy) {
+ for (idx = 0; idx < mi_width; ++idx) {
+ mi = prev_mi[idy * cm->mi_stride + idx];
+ bs = mi ? mi->mbmi.sb_type : bsize;
+ min_size = VPXMIN(min_size, bs);
+ max_size = VPXMAX(max_size, bs);
+ }
+ }
+ }
+
+ if (xd->left_available) {
+ for (idy = 0; idy < mi_height; ++idy) {
+ mi = xd->mi[idy * cm->mi_stride - 1];
+ bs = mi ? mi->mbmi.sb_type : bsize;
+ min_size = VPXMIN(min_size, bs);
+ max_size = VPXMAX(max_size, bs);
+ }
+ }
+
+ if (xd->up_available) {
+ for (idx = 0; idx < mi_width; ++idx) {
+ mi = xd->mi[idx - cm->mi_stride];
+ bs = mi ? mi->mbmi.sb_type : bsize;
+ min_size = VPXMIN(min_size, bs);
+ max_size = VPXMAX(max_size, bs);
+ }
+ }
+
+ if (min_size == max_size) {
+ min_size = min_partition_size[min_size];
+ max_size = max_partition_size[max_size];
+ }
+
+ *min_bs = VPXMIN(min_size, cm->sb_size);
+ *max_bs = VPXMIN(max_size, cm->sb_size);
+}
+
+static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
+ memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
+}
+
+static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
+ memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
+}
+
+#if CONFIG_FP_MB_STATS
+const int qindex_skip_threshold_lookup[BLOCK_SIZES] = {
+ 0,
+ 10,
+ 10,
+ 30,
+ 40,
+ 40,
+ 60,
+ 80,
+ 80,
+ 90,
+ 100,
+ 100,
+ 120,
+#if CONFIG_EXT_PARTITION
+ // TODO(debargha): What are the correct numbers here?
+ 130,
+ 130,
+ 150
+#endif // CONFIG_EXT_PARTITION
+};
+const int qindex_split_threshold_lookup[BLOCK_SIZES] = {
+ 0,
+ 3,
+ 3,
+ 7,
+ 15,
+ 15,
+ 30,
+ 40,
+ 40,
+ 60,
+ 80,
+ 80,
+ 120,
+#if CONFIG_EXT_PARTITION
+ // TODO(debargha): What are the correct numbers here?
+ 160,
+ 160,
+ 240
+#endif // CONFIG_EXT_PARTITION
+};
+const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = {
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 4,
+ 4,
+ 6
+#if CONFIG_EXT_PARTITION
+ // TODO(debargha): What are the correct numbers here?
+ 8,
+ 8,
+ 10
+#endif // CONFIG_EXT_PARTITION
+};
+
+typedef enum {
+ MV_ZERO = 0,
+ MV_LEFT = 1,
+ MV_UP = 2,
+ MV_RIGHT = 3,
+ MV_DOWN = 4,
+ MV_INVALID
+} MOTION_DIRECTION;
+
+static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {
+ if (fp_byte & FPMB_MOTION_ZERO_MASK) {
+ return MV_ZERO;
+ } else if (fp_byte & FPMB_MOTION_LEFT_MASK) {
+ return MV_LEFT;
+ } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {
+ return MV_RIGHT;
+ } else if (fp_byte & FPMB_MOTION_UP_MASK) {
+ return MV_UP;
+ } else {
+ return MV_DOWN;
+ }
+}
+
+static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
+ MOTION_DIRECTION that_mv) {
+ if (this_mv == that_mv) {
+ return 0;
+ } else {
+ return abs(this_mv - that_mv) == 2 ? 2 : 1;
+ }
+}
+#endif
+
+#if CONFIG_EXT_PARTITION_TYPES
+static void rd_test_partition3(
+ VP10_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, TOKENEXTRA **tp,
+ PC_TREE *pc_tree, RD_COST *best_rdc, PICK_MODE_CONTEXT ctxs[3],
+ PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, BLOCK_SIZE bsize,
+ PARTITION_TYPE partition,
+#if CONFIG_SUPERTX
+ int64_t best_rd, int *best_rate_nocoef, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
+#endif
+ int mi_row0, int mi_col0, BLOCK_SIZE subsize0, int mi_row1, int mi_col1,
+ BLOCK_SIZE subsize1, int mi_row2, int mi_col2, BLOCK_SIZE subsize2) {
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ RD_COST this_rdc, sum_rdc;
+#if CONFIG_SUPERTX
+ VP10_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
+ int this_rate_nocoef, sum_rate_nocoef;
+ int abort_flag;
+ const int supertx_allowed = !frame_is_intra_only(cm) &&
+ bsize <= MAX_SUPERTX_BLOCK_SIZE &&
+ !xd->lossless[0];
+#endif
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row0, mi_col0, &sum_rdc,
+#if CONFIG_SUPERTX
+ &sum_rate_nocoef,
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ subsize0, &ctxs[0], best_rdc->rdcost);
+#if CONFIG_SUPERTX
+ abort_flag = sum_rdc.rdcost >= best_rd;
+#endif
+
+#if CONFIG_SUPERTX
+ if (sum_rdc.rdcost < INT64_MAX) {
+#else
+ if (sum_rdc.rdcost < best_rdc->rdcost) {
+#endif
+ PICK_MODE_CONTEXT *ctx = &ctxs[0];
+ update_state(cpi, td, ctx, mi_row0, mi_col0, subsize0, 0);
+ encode_superblock(cpi, td, tp, 0, mi_row0, mi_col0, subsize0, ctx);
+
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+
+#if CONFIG_SUPERTX
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row1, mi_col1, &this_rdc,
+ &this_rate_nocoef,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ subsize1, &ctxs[1], INT64_MAX - sum_rdc.rdcost);
+#else
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row1, mi_col1, &this_rdc,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ subsize1, &ctxs[1], best_rdc->rdcost - sum_rdc.rdcost);
+#endif // CONFIG_SUPERTX
+
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef = INT_MAX;
+#endif
+ } else {
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += this_rate_nocoef;
+#endif
+ }
+
+#if CONFIG_SUPERTX
+ if (sum_rdc.rdcost < INT64_MAX) {
+#else
+ if (sum_rdc.rdcost < best_rdc->rdcost) {
+#endif
+ PICK_MODE_CONTEXT *ctx = &ctxs[1];
+ update_state(cpi, td, ctx, mi_row1, mi_col1, subsize1, 0);
+ encode_superblock(cpi, td, tp, 0, mi_row1, mi_col1, subsize1, ctx);
+
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+
+#if CONFIG_SUPERTX
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row2, mi_col2, &this_rdc,
+ &this_rate_nocoef,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ subsize2, &ctxs[2], INT64_MAX - sum_rdc.rdcost);
+#else
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row2, mi_col2, &this_rdc,
+#if CONFIG_EXT_PARTITION_TYPES
+ partition,
+#endif
+ subsize2, &ctxs[2], best_rdc->rdcost - sum_rdc.rdcost);
+#endif // CONFIG_SUPERTX
+
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef = INT_MAX;
+#endif
+ } else {
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += this_rate_nocoef;
+#endif
+ }
+
+#if CONFIG_SUPERTX
+ if (supertx_allowed && !abort_flag && sum_rdc.rdcost < INT64_MAX) {
+ TX_SIZE supertx_size = max_txsize_lookup[bsize];
+ const PARTITION_TYPE best_partition = pc_tree->partitioning;
+ pc_tree->partitioning = partition;
+ sum_rdc.rate += vp10_cost_bit(
+ cm->fc->supertx_prob[partition_supertx_context_lookup[partition]]
+ [supertx_size],
+ 0);
+ sum_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+
+ if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
+ TX_TYPE best_tx = DCT_DCT;
+ RD_COST tmp_rdc = { sum_rate_nocoef, 0, 0 };
+
+ restore_context(x, x_ctx, mi_row, mi_col, bsize);
+
+ rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
+ &tmp_rdc.rate, &tmp_rdc.dist, &best_tx, pc_tree);
+
+ tmp_rdc.rate += vp10_cost_bit(
+ cm->fc->supertx_prob[partition_supertx_context_lookup[partition]]
+ [supertx_size],
+ 1);
+ tmp_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist);
+ if (tmp_rdc.rdcost < sum_rdc.rdcost) {
+ sum_rdc = tmp_rdc;
+ update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx,
+ supertx_size, pc_tree);
+ }
+ }
+
+ pc_tree->partitioning = best_partition;
+ }
+#endif // CONFIG_SUPERTX
+
+ if (sum_rdc.rdcost < best_rdc->rdcost) {
+ int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+ sum_rdc.rate += cpi->partition_cost[pl][partition];
+ sum_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += cpi->partition_cost[pl][partition];
+#endif
+ if (sum_rdc.rdcost < best_rdc->rdcost) {
+#if CONFIG_SUPERTX
+ *best_rate_nocoef = sum_rate_nocoef;
+ assert(*best_rate_nocoef >= 0);
+#endif
+ *best_rdc = sum_rdc;
+ pc_tree->partitioning = partition;
+ }
+ }
+ }
+ }
+}
+#endif // CONFIG_EXT_PARTITION_TYPES
+
+// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
+// unlikely to be selected depending on previous rate-distortion optimization
+// results, for encoding speed-up.
+static void rd_pick_partition(VP10_COMP *cpi, ThreadData *td,
+ TileDataEnc *tile_data, TOKENEXTRA **tp,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ RD_COST *rd_cost,
+#if CONFIG_SUPERTX
+ int *rate_nocoef,
+#endif
+ int64_t best_rd, PC_TREE *pc_tree) {
+ VP10_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
+ RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
+ TOKENEXTRA *tp_orig = *tp;
+ PICK_MODE_CONTEXT *ctx = &pc_tree->none;
+ int i;
+ const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+ int *partition_cost = cpi->partition_cost[pl];
+ int tmp_partition_cost[PARTITION_TYPES];
+ BLOCK_SIZE subsize;
+ RD_COST this_rdc, sum_rdc, best_rdc;
+#if CONFIG_SUPERTX
+ int this_rate_nocoef, sum_rate_nocoef = 0, best_rate_nocoef = INT_MAX;
+ int abort_flag;
+ const int supertx_allowed = !frame_is_intra_only(cm) &&
+ bsize <= MAX_SUPERTX_BLOCK_SIZE &&
+ !xd->lossless[0];
+#endif // CONFIG_SUPERTX
+ int do_split = bsize >= BLOCK_8X8;
+ int do_rect = 1;
+#if CONFIG_EXT_PARTITION_TYPES
+ BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
+
+ // Override skipping rectangular partition operations for edge blocks
+ const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
+ const int force_vert_split = (mi_col + mi_step >= cm->mi_cols);
+ const int xss = x->e_mbd.plane[1].subsampling_x;
+ const int yss = x->e_mbd.plane[1].subsampling_y;
+
+ BLOCK_SIZE min_size = x->min_partition_size;
+ BLOCK_SIZE max_size = x->max_partition_size;
+
+#if CONFIG_FP_MB_STATS
+ unsigned int src_diff_var = UINT_MAX;
+ int none_complexity = 0;
+#endif
+
+ int partition_none_allowed = !force_horz_split && !force_vert_split;
+ int partition_horz_allowed =
+ !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
+ int partition_vert_allowed =
+ !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
+ (void)*tp_orig;
+
+ if (force_horz_split || force_vert_split) {
+ tmp_partition_cost[PARTITION_NONE] = INT_MAX;
+
+ if (!force_vert_split) { // force_horz_split only
+ tmp_partition_cost[PARTITION_VERT] = INT_MAX;
+ tmp_partition_cost[PARTITION_HORZ] =
+ vp10_cost_bit(cm->fc->partition_prob[pl][PARTITION_HORZ], 0);
+ tmp_partition_cost[PARTITION_SPLIT] =
+ vp10_cost_bit(cm->fc->partition_prob[pl][PARTITION_HORZ], 1);
+ } else if (!force_horz_split) { // force_vert_split only
+ tmp_partition_cost[PARTITION_HORZ] = INT_MAX;
+ tmp_partition_cost[PARTITION_VERT] =
+ vp10_cost_bit(cm->fc->partition_prob[pl][PARTITION_VERT], 0);
+ tmp_partition_cost[PARTITION_SPLIT] =
+ vp10_cost_bit(cm->fc->partition_prob[pl][PARTITION_VERT], 1);
+ } else { // force_ horz_split && force_vert_split horz_split
+ tmp_partition_cost[PARTITION_HORZ] = INT_MAX;
+ tmp_partition_cost[PARTITION_VERT] = INT_MAX;
+ tmp_partition_cost[PARTITION_SPLIT] = 0;
+ }
+
+ partition_cost = tmp_partition_cost;
+ }
+
+#if CONFIG_VAR_TX
+#ifndef NDEBUG
+ // Nothing should rely on the default value of this array (which is just
+ // leftover from encoding the previous block. Setting it to magic number
+ // when debugging.
+ memset(x->blk_skip[0], 234, sizeof(x->blk_skip[0]));
+#endif // NDEBUG
+#endif // CONFIG_VAR_TX
+
+ assert(num_8x8_blocks_wide_lookup[bsize] ==
+ num_8x8_blocks_high_lookup[bsize]);
+
+ vp10_rd_cost_init(&this_rdc);
+ vp10_rd_cost_init(&sum_rdc);
+ vp10_rd_cost_reset(&best_rdc);
+ best_rdc.rdcost = best_rd;
+
+ set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
+
+ if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
+ x->mb_energy = vp10_block_energy(cpi, x, bsize);
+
+ if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
+ int cb_partition_search_ctrl =
+ ((pc_tree->index == 0 || pc_tree->index == 3) +
+ get_chessboard_index(cm->current_video_frame)) &
+ 0x1;
+
+ if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size)
+ set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size);
+ }
+
+ // Determine partition types in search according to the speed features.
+ // The threshold set here has to be of square block size.
+ if (cpi->sf.auto_min_max_partition_size) {
+ partition_none_allowed &= (bsize <= max_size && bsize >= min_size);
+ partition_horz_allowed &=
+ ((bsize <= max_size && bsize > min_size) || force_horz_split);
+ partition_vert_allowed &=
+ ((bsize <= max_size && bsize > min_size) || force_vert_split);
+ do_split &= bsize > min_size;
+ }
+ if (cpi->sf.use_square_partition_only) {
+ partition_horz_allowed &= force_horz_split;
+ partition_vert_allowed &= force_vert_split;
+ }
+
+#if CONFIG_VAR_TX
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+#endif
+
+ save_context(x, &x_ctx, mi_row, mi_col, bsize);
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
+ src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row,
+ mi_col, bsize);
+ }
+#endif
+
+#if CONFIG_FP_MB_STATS
+ // Decide whether we shall split directly and skip searching NONE by using
+ // the first pass block statistics
+ if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&
+ partition_none_allowed && src_diff_var > 4 &&
+ cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
+ int mb_row = mi_row >> 1;
+ int mb_col = mi_col >> 1;
+ int mb_row_end =
+ VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
+ int mb_col_end =
+ VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
+ int r, c;
+
+ // compute a complexity measure, basically measure inconsistency of motion
+ // vectors obtained from the first pass in the current block
+ for (r = mb_row; r < mb_row_end; r++) {
+ for (c = mb_col; c < mb_col_end; c++) {
+ const int mb_index = r * cm->mb_cols + c;
+
+ MOTION_DIRECTION this_mv;
+ MOTION_DIRECTION right_mv;
+ MOTION_DIRECTION bottom_mv;
+
+ this_mv =
+ get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);
+
+ // to its right
+ if (c != mb_col_end - 1) {
+ right_mv = get_motion_direction_fp(
+ cpi->twopass.this_frame_mb_stats[mb_index + 1]);
+ none_complexity += get_motion_inconsistency(this_mv, right_mv);
+ }
+
+ // to its bottom
+ if (r != mb_row_end - 1) {
+ bottom_mv = get_motion_direction_fp(
+ cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);
+ none_complexity += get_motion_inconsistency(this_mv, bottom_mv);
+ }
+
+ // do not count its left and top neighbors to avoid double counting
+ }
+ }
+
+ if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {
+ partition_none_allowed = 0;
+ }
+ }
+#endif
+
+ // PARTITION_NONE
+ if (partition_none_allowed) {
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
+#if CONFIG_SUPERTX
+ &this_rate_nocoef,
+#endif
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_NONE,
+#endif
+ bsize, ctx, best_rdc.rdcost);
+ if (this_rdc.rate != INT_MAX) {
+ if (bsize >= BLOCK_8X8) {
+ this_rdc.rate += partition_cost[PARTITION_NONE];
+ this_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
+#if CONFIG_SUPERTX
+ this_rate_nocoef += partition_cost[PARTITION_NONE];
+#endif
+ }
+
+ if (this_rdc.rdcost < best_rdc.rdcost) {
+ int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr;
+ int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr;
+
+ best_rdc = this_rdc;
+#if CONFIG_SUPERTX
+ best_rate_nocoef = this_rate_nocoef;
+ assert(best_rate_nocoef >= 0);
+#endif
+ if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
+
+ // Adjust dist breakout threshold according to the partition size.
+ dist_breakout_thr >>=
+ (2 * (MAX_SB_SIZE_LOG2 - 2)) -
+ (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
+
+ rate_breakout_thr *= num_pels_log2_lookup[bsize];
+
+ // If all y, u, v transform blocks in this partition are skippable, and
+ // the dist & rate are within the thresholds, the partition search is
+ // terminated for current branch of the partition search tree.
+ // The dist & rate thresholds are set to 0 at speed 0 to disable the
+ // early termination at that speed.
+ if (!x->e_mbd.lossless[xd->mi[0]->mbmi.segment_id] &&
+ (ctx->skippable && best_rdc.dist < dist_breakout_thr &&
+ best_rdc.rate < rate_breakout_thr)) {
+ do_split = 0;
+ do_rect = 0;
+ }
+
+#if CONFIG_FP_MB_STATS
+ // Check if every 16x16 first pass block statistics has zero
+ // motion and the corresponding first pass residue is small enough.
+ // If that is the case, check the difference variance between the
+ // current frame and the last frame. If the variance is small enough,
+ // stop further splitting in RD optimization
+ if (cpi->use_fp_mb_stats && do_split != 0 &&
+ cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
+ int mb_row = mi_row >> 1;
+ int mb_col = mi_col >> 1;
+ int mb_row_end =
+ VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
+ int mb_col_end =
+ VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
+ int r, c;
+
+ int skip = 1;
+ for (r = mb_row; r < mb_row_end; r++) {
+ for (c = mb_col; c < mb_col_end; c++) {
+ const int mb_index = r * cm->mb_cols + c;
+ if (!(cpi->twopass.this_frame_mb_stats[mb_index] &
+ FPMB_MOTION_ZERO_MASK) ||
+ !(cpi->twopass.this_frame_mb_stats[mb_index] &
+ FPMB_ERROR_SMALL_MASK)) {
+ skip = 0;
+ break;
+ }
+ }
+ if (skip == 0) {
+ break;
+ }
+ }
+ if (skip) {
+ if (src_diff_var == UINT_MAX) {
+ set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
+ src_diff_var = get_sby_perpixel_diff_variance(
+ cpi, &x->plane[0].src, mi_row, mi_col, bsize);
+ }
+ if (src_diff_var < 8) {
+ do_split = 0;
+ do_rect = 0;
+ }
+ }
+ }
+#endif
+ }
+ }
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+ }
+
+ // store estimated motion vector
+ if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx);
+
+ // PARTITION_SPLIT
+ // TODO(jingning): use the motion vectors given by the above search as
+ // the starting point of motion search in the following partition type check.
+ if (do_split) {
+ subsize = get_subsize(bsize, PARTITION_SPLIT);
+ if (bsize == BLOCK_8X8) {
+ i = 4;
+#if CONFIG_DUAL_FILTER
+ if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
+ pc_tree->leaf_split[0]->pred_interp_filter =
+ ctx->mic.mbmi.interp_filter[0];
+#else
+ if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
+ pc_tree->leaf_split[0]->pred_interp_filter =
+ ctx->mic.mbmi.interp_filter;
+#endif
+#if CONFIG_SUPERTX
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
+ &sum_rate_nocoef,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_SPLIT,
+#endif
+ subsize, pc_tree->leaf_split[0], INT64_MAX);
+#else
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_SPLIT,
+#endif
+ subsize, pc_tree->leaf_split[0], best_rdc.rdcost);
+#endif // CONFIG_SUPERTX
+ if (sum_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef = INT_MAX;
+#endif
+ }
+#if CONFIG_SUPERTX
+ if (supertx_allowed && sum_rdc.rdcost < INT64_MAX) {
+ TX_SIZE supertx_size = max_txsize_lookup[bsize];
+ const PARTITION_TYPE best_partition = pc_tree->partitioning;
+
+ pc_tree->partitioning = PARTITION_SPLIT;
+
+ sum_rdc.rate += vp10_cost_bit(
+ cm->fc->supertx_prob[partition_supertx_context_lookup
+ [PARTITION_SPLIT]][supertx_size],
+ 0);
+ sum_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+
+ if (is_inter_mode(pc_tree->leaf_split[0]->mic.mbmi.mode)) {
+ TX_TYPE best_tx = DCT_DCT;
+ RD_COST tmp_rdc = { sum_rate_nocoef, 0, 0 };
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
+ rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
+ &tmp_rdc.rate, &tmp_rdc.dist, &best_tx, pc_tree);
+
+ tmp_rdc.rate += vp10_cost_bit(
+ cm->fc->supertx_prob[partition_supertx_context_lookup
+ [PARTITION_SPLIT]][supertx_size],
+ 1);
+ tmp_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist);
+ if (tmp_rdc.rdcost < sum_rdc.rdcost) {
+ sum_rdc = tmp_rdc;
+ update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx,
+ supertx_size, pc_tree);
+ }
+ }
+
+ pc_tree->partitioning = best_partition;
+ }
+#endif // CONFIG_SUPERTX
+ } else {
+#if CONFIG_SUPERTX
+ for (i = 0; i < 4 && sum_rdc.rdcost < INT64_MAX; ++i) {
+#else
+ for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
+#endif // CONFIG_SUPERTX
+ const int x_idx = (i & 1) * mi_step;
+ const int y_idx = (i >> 1) * mi_step;
+
+ if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
+ continue;
+
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+
+ pc_tree->split[i]->index = i;
+#if CONFIG_SUPERTX
+ rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
+ mi_col + x_idx, subsize, &this_rdc, &this_rate_nocoef,
+ INT64_MAX - sum_rdc.rdcost, pc_tree->split[i]);
+#else
+ rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
+ mi_col + x_idx, subsize, &this_rdc,
+ best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
+#endif // CONFIG_SUPERTX
+
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef = INT_MAX;
+#endif // CONFIG_SUPERTX
+ break;
+ } else {
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += this_rate_nocoef;
+#endif // CONFIG_SUPERTX
+ }
+ }
+#if CONFIG_SUPERTX
+ if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && i == 4) {
+ TX_SIZE supertx_size = max_txsize_lookup[bsize];
+ const PARTITION_TYPE best_partition = pc_tree->partitioning;
+
+ pc_tree->partitioning = PARTITION_SPLIT;
+
+ sum_rdc.rate += vp10_cost_bit(
+ cm->fc->supertx_prob[partition_supertx_context_lookup
+ [PARTITION_SPLIT]][supertx_size],
+ 0);
+ sum_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+
+ if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
+ TX_TYPE best_tx = DCT_DCT;
+ RD_COST tmp_rdc = { sum_rate_nocoef, 0, 0 };
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
+ rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize,
+ &tmp_rdc.rate, &tmp_rdc.dist, &best_tx, pc_tree);
+
+ tmp_rdc.rate += vp10_cost_bit(
+ cm->fc->supertx_prob[partition_supertx_context_lookup
+ [PARTITION_SPLIT]][supertx_size],
+ 1);
+ tmp_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist);
+ if (tmp_rdc.rdcost < sum_rdc.rdcost) {
+ sum_rdc = tmp_rdc;
+ update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx,
+ supertx_size, pc_tree);
+ }
+ }
+
+ pc_tree->partitioning = best_partition;
+ }
+#endif // CONFIG_SUPERTX
+ }
+
+ if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
+ sum_rdc.rate += partition_cost[PARTITION_SPLIT];
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += partition_cost[PARTITION_SPLIT];
+#endif // CONFIG_SUPERTX
+
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
+#if CONFIG_SUPERTX
+ best_rate_nocoef = sum_rate_nocoef;
+ assert(best_rate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
+ pc_tree->partitioning = PARTITION_SPLIT;
+ }
+ } else {
+ // skip rectangular partition test when larger block size
+ // gives better rd cost
+ if (cpi->sf.less_rectangular_check) do_rect &= !partition_none_allowed;
+ }
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+ } // if (do_split)
+
+ // PARTITION_HORZ
+ if (partition_horz_allowed &&
+ (do_rect || vp10_active_h_edge(cpi, mi_row, mi_step))) {
+ subsize = get_subsize(bsize, PARTITION_HORZ);
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+#if CONFIG_DUAL_FILTER
+ if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
+ partition_none_allowed)
+ pc_tree->horizontal[0].pred_interp_filter =
+ ctx->mic.mbmi.interp_filter[0];
+#else
+ if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
+ partition_none_allowed)
+ pc_tree->horizontal[0].pred_interp_filter = ctx->mic.mbmi.interp_filter;
+#endif
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
+#if CONFIG_SUPERTX
+ &sum_rate_nocoef,
+#endif // CONFIG_SUPERTX
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_HORZ,
+#endif
+ subsize, &pc_tree->horizontal[0], best_rdc.rdcost);
+
+#if CONFIG_SUPERTX
+ abort_flag = (sum_rdc.rdcost >= best_rd && bsize > BLOCK_8X8) ||
+ (sum_rdc.rate == INT_MAX && bsize == BLOCK_8X8);
+ if (sum_rdc.rdcost < INT64_MAX &&
+#else
+ if (sum_rdc.rdcost < best_rdc.rdcost &&
+#endif // CONFIG_SUPERTX
+ mi_row + mi_step < cm->mi_rows && bsize > BLOCK_8X8) {
+ PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
+ update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
+ encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
+
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+
+#if CONFIG_DUAL_FILTER
+ if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
+ partition_none_allowed)
+ pc_tree->horizontal[1].pred_interp_filter =
+ ctx->mic.mbmi.interp_filter[0];
+#else
+ if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
+ partition_none_allowed)
+ pc_tree->horizontal[1].pred_interp_filter = ctx->mic.mbmi.interp_filter;
+#endif
+#if CONFIG_SUPERTX
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
+ &this_rate_nocoef,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_HORZ,
+#endif
+ subsize, &pc_tree->horizontal[1], INT64_MAX);
+#else
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_HORZ,
+#endif
+ subsize, &pc_tree->horizontal[1],
+ best_rdc.rdcost - sum_rdc.rdcost);
+#endif // CONFIG_SUPERTX
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef = INT_MAX;
+#endif // CONFIG_SUPERTX
+ } else {
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += this_rate_nocoef;
+#endif // CONFIG_SUPERTX
+ }
+ }
+
+#if CONFIG_SUPERTX
+ if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && !abort_flag) {
+ TX_SIZE supertx_size = max_txsize_lookup[bsize];
+ const PARTITION_TYPE best_partition = pc_tree->partitioning;
+
+ pc_tree->partitioning = PARTITION_HORZ;
+
+ sum_rdc.rate += vp10_cost_bit(
+ cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_HORZ]]
+ [supertx_size],
+ 0);
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+
+ if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
+ TX_TYPE best_tx = DCT_DCT;
+ RD_COST tmp_rdc = { sum_rate_nocoef, 0, 0 };
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
+ rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize, &tmp_rdc.rate,
+ &tmp_rdc.dist, &best_tx, pc_tree);
+
+ tmp_rdc.rate += vp10_cost_bit(
+ cm->fc
+ ->supertx_prob[partition_supertx_context_lookup[PARTITION_HORZ]]
+ [supertx_size],
+ 1);
+ tmp_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist);
+ if (tmp_rdc.rdcost < sum_rdc.rdcost) {
+ sum_rdc = tmp_rdc;
+ update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx,
+ supertx_size, pc_tree);
+ }
+ }
+
+ pc_tree->partitioning = best_partition;
+ }
+#endif // CONFIG_SUPERTX
+
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ sum_rdc.rate += partition_cost[PARTITION_HORZ];
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += partition_cost[PARTITION_HORZ];
+#endif // CONFIG_SUPERTX
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
+#if CONFIG_SUPERTX
+ best_rate_nocoef = sum_rate_nocoef;
+ assert(best_rate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
+ pc_tree->partitioning = PARTITION_HORZ;
+ }
+ }
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+ }
+
+ // PARTITION_VERT
+ if (partition_vert_allowed &&
+ (do_rect || vp10_active_v_edge(cpi, mi_col, mi_step))) {
+ subsize = get_subsize(bsize, PARTITION_VERT);
+
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+
+#if CONFIG_DUAL_FILTER
+ if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
+ partition_none_allowed)
+ pc_tree->vertical[0].pred_interp_filter = ctx->mic.mbmi.interp_filter[0];
+#else
+ if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
+ partition_none_allowed)
+ pc_tree->vertical[0].pred_interp_filter = ctx->mic.mbmi.interp_filter;
+#endif
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
+#if CONFIG_SUPERTX
+ &sum_rate_nocoef,
+#endif // CONFIG_SUPERTX
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_VERT,
+#endif
+ subsize, &pc_tree->vertical[0], best_rdc.rdcost);
+#if CONFIG_SUPERTX
+ abort_flag = (sum_rdc.rdcost >= best_rd && bsize > BLOCK_8X8) ||
+ (sum_rdc.rate == INT_MAX && bsize == BLOCK_8X8);
+ if (sum_rdc.rdcost < INT64_MAX &&
+#else
+ if (sum_rdc.rdcost < best_rdc.rdcost &&
+#endif // CONFIG_SUPERTX
+ mi_col + mi_step < cm->mi_cols && bsize > BLOCK_8X8) {
+ update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
+ encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize,
+ &pc_tree->vertical[0]);
+
+ if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+
+#if CONFIG_DUAL_FILTER
+ if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
+ partition_none_allowed)
+ pc_tree->vertical[1].pred_interp_filter =
+ ctx->mic.mbmi.interp_filter[0];
+#else
+ if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
+ partition_none_allowed)
+ pc_tree->vertical[1].pred_interp_filter = ctx->mic.mbmi.interp_filter;
+#endif
+#if CONFIG_SUPERTX
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
+ &this_rate_nocoef,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_VERT,
+#endif
+ subsize, &pc_tree->vertical[1],
+ INT64_MAX - sum_rdc.rdcost);
+#else
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_VERT,
+#endif
+ subsize, &pc_tree->vertical[1],
+ best_rdc.rdcost - sum_rdc.rdcost);
+#endif // CONFIG_SUPERTX
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef = INT_MAX;
+#endif // CONFIG_SUPERTX
+ } else {
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += this_rate_nocoef;
+#endif // CONFIG_SUPERTX
+ }
+ }
+#if CONFIG_SUPERTX
+ if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && !abort_flag) {
+ TX_SIZE supertx_size = max_txsize_lookup[bsize];
+ const PARTITION_TYPE best_partition = pc_tree->partitioning;
+
+ pc_tree->partitioning = PARTITION_VERT;
+
+ sum_rdc.rate += vp10_cost_bit(
+ cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_VERT]]
+ [supertx_size],
+ 0);
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+
+ if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) {
+ TX_TYPE best_tx = DCT_DCT;
+ RD_COST tmp_rdc = { sum_rate_nocoef, 0, 0 };
+
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+
+ rd_supertx_sb(cpi, td, tile_info, mi_row, mi_col, bsize, &tmp_rdc.rate,
+ &tmp_rdc.dist, &best_tx, pc_tree);
+
+ tmp_rdc.rate += vp10_cost_bit(
+ cm->fc
+ ->supertx_prob[partition_supertx_context_lookup[PARTITION_VERT]]
+ [supertx_size],
+ 1);
+ tmp_rdc.rdcost =
+ RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist);
+ if (tmp_rdc.rdcost < sum_rdc.rdcost) {
+ sum_rdc = tmp_rdc;
+ update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx,
+ supertx_size, pc_tree);
+ }
+ }
+
+ pc_tree->partitioning = best_partition;
+ }
+#endif // CONFIG_SUPERTX
+
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ sum_rdc.rate += partition_cost[PARTITION_VERT];
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
+#if CONFIG_SUPERTX
+ sum_rate_nocoef += partition_cost[PARTITION_VERT];
+#endif // CONFIG_SUPERTX
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
+#if CONFIG_SUPERTX
+ best_rate_nocoef = sum_rate_nocoef;
+ assert(best_rate_nocoef >= 0);
+#endif // CONFIG_SUPERTX
+ pc_tree->partitioning = PARTITION_VERT;
+ }
+ }
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+ }
+
+#if CONFIG_EXT_PARTITION_TYPES
+ // PARTITION_HORZ_A
+ if (partition_horz_allowed && do_rect && bsize > BLOCK_8X8 &&
+ partition_none_allowed) {
+ subsize = get_subsize(bsize, PARTITION_HORZ_A);
+ rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
+ pc_tree->horizontala, ctx, mi_row, mi_col, bsize,
+ PARTITION_HORZ_A,
+#if CONFIG_SUPERTX
+ best_rd, &best_rate_nocoef, &x_ctx,
+#endif
+ mi_row, mi_col, bsize2, mi_row, mi_col + mi_step, bsize2,
+ mi_row + mi_step, mi_col, subsize);
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+ }
+ // PARTITION_HORZ_B
+ if (partition_horz_allowed && do_rect && bsize > BLOCK_8X8 &&
+ partition_none_allowed) {
+ subsize = get_subsize(bsize, PARTITION_HORZ_B);
+ rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
+ pc_tree->horizontalb, ctx, mi_row, mi_col, bsize,
+ PARTITION_HORZ_B,
+#if CONFIG_SUPERTX
+ best_rd, &best_rate_nocoef, &x_ctx,
+#endif
+ mi_row, mi_col, subsize, mi_row + mi_step, mi_col,
+ bsize2, mi_row + mi_step, mi_col + mi_step, bsize2);
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+ }
+ // PARTITION_VERT_A
+ if (partition_vert_allowed && do_rect && bsize > BLOCK_8X8 &&
+ partition_none_allowed) {
+ subsize = get_subsize(bsize, PARTITION_VERT_A);
+ rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
+ pc_tree->verticala, ctx, mi_row, mi_col, bsize,
+ PARTITION_VERT_A,
+#if CONFIG_SUPERTX
+ best_rd, &best_rate_nocoef, &x_ctx,
+#endif
+ mi_row, mi_col, bsize2, mi_row + mi_step, mi_col, bsize2,
+ mi_row, mi_col + mi_step, subsize);
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+ }
+ // PARTITION_VERT_B
+ if (partition_vert_allowed && do_rect && bsize > BLOCK_8X8 &&
+ partition_none_allowed) {
+ subsize = get_subsize(bsize, PARTITION_VERT_B);
+ rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
+ pc_tree->verticalb, ctx, mi_row, mi_col, bsize,
+ PARTITION_VERT_B,
+#if CONFIG_SUPERTX
+ best_rd, &best_rate_nocoef, &x_ctx,
+#endif
+ mi_row, mi_col, subsize, mi_row, mi_col + mi_step,
+ bsize2, mi_row + mi_step, mi_col + mi_step, bsize2);
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize);
+ }
+#endif // CONFIG_EXT_PARTITION_TYPES
+
+ // TODO(jbb): This code added so that we avoid static analysis
+ // warning related to the fact that best_rd isn't used after this
+ // point. This code should be refactored so that the duplicate
+ // checks occur in some sub function and thus are used...
+ (void)best_rd;
+ *rd_cost = best_rdc;
+#if CONFIG_SUPERTX
+ *rate_nocoef = best_rate_nocoef;
+#endif // CONFIG_SUPERTX
+
+ if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
+ pc_tree->index != 3) {
+ int output_enabled = (bsize == cm->sb_size);
+ encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
+ pc_tree);
+ }
+
+ if (bsize == cm->sb_size) {
+ assert(tp_orig < *tp || (tp_orig == *tp && xd->mi[0]->mbmi.skip));
+ assert(best_rdc.rate < INT_MAX);
+ assert(best_rdc.dist < INT64_MAX);
+ } else {
+ assert(tp_orig == *tp);
+ }
+}
+
+static void encode_rd_sb_row(VP10_COMP *cpi, ThreadData *td,
+ TileDataEnc *tile_data, int mi_row,
+ TOKENEXTRA **tp) {
+ VP10_COMMON *const cm = &cpi->common;
+ const TileInfo *const tile_info = &tile_data->tile_info;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ SPEED_FEATURES *const sf = &cpi->sf;
+ int mi_col;
+#if CONFIG_EXT_PARTITION
+ const int leaf_nodes = 256;
+#else
+ const int leaf_nodes = 64;
+#endif // CONFIG_EXT_PARTITION
+
+ // Initialize the left context for the new SB row
+ vp10_zero_left_context(xd);
+
+ // Code each SB in the row
+ for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
+ mi_col += cm->mib_size) {
+ const struct segmentation *const seg = &cm->seg;
+ int dummy_rate;
+ int64_t dummy_dist;
+ RD_COST dummy_rdc;
+#if CONFIG_SUPERTX
+ int dummy_rate_nocoef;
+#endif // CONFIG_SUPERTX
+ int i;
+ int seg_skip = 0;
+
+ const int idx_str = cm->mi_stride * mi_row + mi_col;
+ MODE_INFO **mi = cm->mi_grid_visible + idx_str;
+ PC_TREE *const pc_root = td->pc_root[cm->mib_size_log2 - MIN_MIB_SIZE_LOG2];
+
+ if (sf->adaptive_pred_interp_filter) {
+ for (i = 0; i < leaf_nodes; ++i)
+ td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
+
+ for (i = 0; i < leaf_nodes; ++i) {
+ td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
+ td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
+ td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
+ td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
+ }
+ }
+
+ vp10_zero(x->pred_mv);
+ pc_root->index = 0;
+
+ if (seg->enabled) {
+ const uint8_t *const map =
+ seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
+ int segment_id = get_segment_id(cm, map, cm->sb_size, mi_row, mi_col);
+ seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
+ }
+
+ x->source_variance = UINT_MAX;
+ if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
+ BLOCK_SIZE bsize;
+ set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->sb_size);
+ bsize = seg_skip ? cm->sb_size : sf->always_this_block_size;
+ set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
+ rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, cm->sb_size,
+ &dummy_rate, &dummy_dist,
+#if CONFIG_SUPERTX
+ &dummy_rate_nocoef,
+#endif // CONFIG_SUPERTX
+ 1, pc_root);
+ } else if (cpi->partition_search_skippable_frame) {
+ BLOCK_SIZE bsize;
+ set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->sb_size);
+ bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
+ set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
+ rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, cm->sb_size,
+ &dummy_rate, &dummy_dist,
+#if CONFIG_SUPERTX
+ &dummy_rate_nocoef,
+#endif // CONFIG_SUPERTX
+ 1, pc_root);
+ } else if (sf->partition_search_type == VAR_BASED_PARTITION) {
+ choose_partitioning(cpi, td, tile_info, x, mi_row, mi_col);
+ rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, cm->sb_size,
+ &dummy_rate, &dummy_dist,
+#if CONFIG_SUPERTX
+ &dummy_rate_nocoef,
+#endif // CONFIG_SUPERTX
+ 1, pc_root);
+ } else {
+ // If required set upper and lower partition size limits
+ if (sf->auto_min_max_partition_size) {
+ set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->sb_size);
+ rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
+ &x->min_partition_size, &x->max_partition_size);
+ }
+ rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size,
+ &dummy_rdc,
+#if CONFIG_SUPERTX
+ &dummy_rate_nocoef,
+#endif // CONFIG_SUPERTX
+ INT64_MAX, pc_root);
+ }
+ }
+#if CONFIG_ENTROPY
+ if (cm->do_subframe_update &&
+ cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+ if ((mi_row + MI_SIZE) %
+ (MI_SIZE *
+ VPXMAX(cm->mi_rows / MI_SIZE / COEF_PROBS_BUFS, 1)) ==
+ 0 &&
+ mi_row + MI_SIZE < cm->mi_rows &&
+ cm->coef_probs_update_idx < COEF_PROBS_BUFS - 1) {
+ TX_SIZE t;
+ SUBFRAME_STATS *subframe_stats = &cpi->subframe_stats;
+
+ for (t = TX_4X4; t <= TX_32X32; ++t)
+ vp10_full_to_model_counts(cpi->td.counts->coef[t],
+ cpi->td.rd_counts.coef_counts[t]);
+ vp10_partial_adapt_probs(cm, mi_row, mi_col);
+ ++cm->coef_probs_update_idx;
+ vp10_copy(subframe_stats->coef_probs_buf[cm->coef_probs_update_idx],
+ cm->fc->coef_probs);
+ vp10_copy(subframe_stats->coef_counts_buf[cm->coef_probs_update_idx],
+ cpi->td.rd_counts.coef_counts);
+ vp10_copy(subframe_stats->eob_counts_buf[cm->coef_probs_update_idx],
+ cm->counts.eob_branch);
+ vp10_fill_token_costs(x->token_costs,
+#if CONFIG_ANS
+ cm->fc->coef_cdfs,
+#endif // CONFIG_ANS
+ cm->fc->coef_probs);
+ }
+ }
+#endif // CONFIG_ENTROPY
+}
+
+static void init_encode_frame_mb_context(VP10_COMP *cpi) {
+ MACROBLOCK *const x = &cpi->td.mb;
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ // Copy data over into macro block data structures.
+ vp10_setup_src_planes(x, cpi->Source, 0, 0);
+
+ vp10_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
+}
+
+static int check_dual_ref_flags(VP10_COMP *cpi) {
+ const int ref_flags = cpi->ref_frame_flags;
+
+ if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
+ return 0;
+ } else {
+ return (!!(ref_flags & VPX_GOLD_FLAG) + !!(ref_flags & VPX_LAST_FLAG) +
+#if CONFIG_EXT_REFS
+ !!(ref_flags & VPX_LAST2_FLAG) + !!(ref_flags & VPX_LAST3_FLAG) +
+ !!(ref_flags & VPX_BWD_FLAG) +
+#endif // CONFIG_EXT_REFS
+ !!(ref_flags & VPX_ALT_FLAG)) >= 2;
+ }
+}
+
+#if !CONFIG_VAR_TX
+static void reset_skip_tx_size(VP10_COMMON *cm, TX_SIZE max_tx_size) {
+ int mi_row, mi_col;
+ const int mis = cm->mi_stride;
+ MODE_INFO **mi_ptr = cm->mi_grid_visible;
+
+ for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
+ for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
+ if (txsize_sqr_up_map[mi_ptr[mi_col]->mbmi.tx_size] > max_tx_size)
+ mi_ptr[mi_col]->mbmi.tx_size = max_tx_size;
+ }
+ }
+}
+#endif
+
+static MV_REFERENCE_FRAME get_frame_type(const VP10_COMP *cpi) {
+ if (frame_is_intra_only(&cpi->common)) return INTRA_FRAME;
+#if CONFIG_EXT_REFS
+ // We will not update the golden frame with an internal overlay frame
+ else if ((cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) ||
+ cpi->rc.is_src_frame_ext_arf)
+#else
+ else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
+#endif
+ return ALTREF_FRAME;
+ else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
+ return GOLDEN_FRAME;
+ else
+ // TODO(zoeliu): To investigate whether a frame_type other than
+ // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
+ return LAST_FRAME;
+}
+
+static TX_MODE select_tx_mode(const VP10_COMP *cpi, MACROBLOCKD *const xd) {
+ if (xd->lossless[0]) return ONLY_4X4;
+ if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
+ return ALLOW_32X32;
+ else if (cpi->sf.tx_size_search_method == USE_FULL_RD ||
+ cpi->sf.tx_size_search_method == USE_TX_8X8)
+ return TX_MODE_SELECT;
+ else
+ return cpi->common.tx_mode;
+}
+
+void vp10_init_tile_data(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
+ int tile_col, tile_row;
+ TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
+ unsigned int tile_tok = 0;
+
+ if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
+ if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
+ CHECK_MEM_ERROR(cm, cpi->tile_data, vpx_malloc(tile_cols * tile_rows *
+ sizeof(*cpi->tile_data)));
+ cpi->allocated_tiles = tile_cols * tile_rows;
+
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ TileDataEnc *const tile_data =
+ &cpi->tile_data[tile_row * tile_cols + tile_col];
+ int i, j;
+ for (i = 0; i < BLOCK_SIZES; ++i) {
+ for (j = 0; j < MAX_MODES; ++j) {
+ tile_data->thresh_freq_fact[i][j] = 32;
+ tile_data->mode_map[i][j] = j;
+ }
+ }
+ }
+ }
+
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ TileInfo *const tile_info =
+ &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
+ vp10_tile_init(tile_info, cm, tile_row, tile_col);
+
+ cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
+ pre_tok = cpi->tile_tok[tile_row][tile_col];
+ tile_tok = allocated_tokens(*tile_info);
+ }
+ }
+}
+
+void vp10_encode_tile(VP10_COMP *cpi, ThreadData *td, int tile_row,
+ int tile_col) {
+ VP10_COMMON *const cm = &cpi->common;
+ TileDataEnc *const this_tile =
+ &cpi->tile_data[tile_row * cm->tile_cols + tile_col];
+ const TileInfo *const tile_info = &this_tile->tile_info;
+ TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
+ int mi_row;
+
+ vp10_zero_above_context(cm, tile_info->mi_col_start, tile_info->mi_col_end);
+
+ // Set up pointers to per thread motion search counters.
+ td->mb.m_search_count_ptr = &td->rd_counts.m_search_count;
+ td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count;
+
+ for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
+ mi_row += cm->mib_size) {
+ encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
+ }
+
+ cpi->tok_count[tile_row][tile_col] =
+ (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
+ assert(cpi->tok_count[tile_row][tile_col] <= allocated_tokens(*tile_info));
+}
+
+static void encode_tiles(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ int tile_col, tile_row;
+
+ vp10_init_tile_data(cpi);
+
+ for (tile_row = 0; tile_row < cm->tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < cm->tile_cols; ++tile_col)
+ vp10_encode_tile(cpi, &cpi->td, tile_row, tile_col);
+}
+
+#if CONFIG_FP_MB_STATS
+static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
+ VP10_COMMON *cm, uint8_t **this_frame_mb_stats) {
+ uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start +
+ cm->current_video_frame * cm->MBs * sizeof(uint8_t);
+
+ if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF;
+
+ *this_frame_mb_stats = mb_stats_in;
+
+ return 1;
+}
+#endif
+
+#if CONFIG_GLOBAL_MOTION
+#define MIN_TRANS_THRESH 8
+#define GLOBAL_MOTION_ADVANTAGE_THRESH 0.60
+#define GLOBAL_MOTION_MODEL ROTZOOM
+static void convert_to_params(double *H, TransformationType type,
+ Global_Motion_Params *model) {
+ int i;
+ int alpha_present = 0;
+ int n_params = n_trans_model_params[type];
+ model->motion_params.wmmat[0] =
+ (int)floor(H[0] * (1 << GM_TRANS_PREC_BITS) + 0.5);
+ model->motion_params.wmmat[1] =
+ (int)floor(H[1] * (1 << GM_TRANS_PREC_BITS) + 0.5);
+ model->motion_params.wmmat[0] =
+ clamp(model->motion_params.wmmat[0], GM_TRANS_MIN, GM_TRANS_MAX) *
+ GM_TRANS_DECODE_FACTOR;
+ model->motion_params.wmmat[1] =
+ clamp(model->motion_params.wmmat[1], GM_TRANS_MIN, GM_TRANS_MAX) *
+ GM_TRANS_DECODE_FACTOR;
+
+ for (i = 2; i < n_params; ++i) {
+ model->motion_params.wmmat[i] =
+ (int)floor(H[i] * (1 << GM_ALPHA_PREC_BITS) + 0.5);
+ model->motion_params.wmmat[i] =
+ clamp(model->motion_params.wmmat[i], GM_ALPHA_MIN, GM_ALPHA_MAX) *
+ GM_ALPHA_DECODE_FACTOR;
+ alpha_present |= (model->motion_params.wmmat[i] != 0);
+ }
+
+ if (!alpha_present) {
+ if (abs(model->motion_params.wmmat[0]) < MIN_TRANS_THRESH &&
+ abs(model->motion_params.wmmat[1]) < MIN_TRANS_THRESH) {
+ model->motion_params.wmmat[0] = 0;
+ model->motion_params.wmmat[1] = 0;
+ }
+ }
+}
+
+static void convert_model_to_params(double *H, TransformationType type,
+ Global_Motion_Params *model) {
+ // TODO(sarahparker) implement for homography
+ if (type > HOMOGRAPHY) convert_to_params(H, type, model);
+ model->gmtype = get_gmtype(model);
+ model->motion_params.wmtype = gm_to_trans_type(model->gmtype);
+}
+#endif // CONFIG_GLOBAL_MOTION
+
+static void encode_frame_internal(VP10_COMP *cpi) {
+ ThreadData *const td = &cpi->td;
+ MACROBLOCK *const x = &td->mb;
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ RD_COUNTS *const rdc = &cpi->td.rd_counts;
+ int i;
+
+ x->min_partition_size = VPXMIN(x->min_partition_size, cm->sb_size);
+ x->max_partition_size = VPXMIN(x->max_partition_size, cm->sb_size);
+
+ xd->mi = cm->mi_grid_visible;
+ xd->mi[0] = cm->mi;
+
+ vp10_zero(*td->counts);
+ vp10_zero(rdc->coef_counts);
+ vp10_zero(rdc->comp_pred_diff);
+ rdc->m_search_count = 0; // Count of motion search hits.
+ rdc->ex_search_count = 0; // Exhaustive mesh search hits.
+
+#if CONFIG_GLOBAL_MOTION
+ vpx_clear_system_state();
+ vp10_zero(cpi->global_motion_used);
+ if (cpi->common.frame_type == INTER_FRAME && cpi->Source) {
+ YV12_BUFFER_CONFIG *ref_buf;
+ int frame;
+ double H[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
+ ref_buf = get_ref_frame_buffer(cpi, frame);
+ if (ref_buf) {
+ if (compute_global_motion_feature_based(GLOBAL_MOTION_MODEL,
+ cpi->Source, ref_buf, H)) {
+ convert_model_to_params(H, GLOBAL_MOTION_MODEL,
+ &cm->global_motion[frame]);
+ if (get_gmtype(&cm->global_motion[frame]) > GLOBAL_ZERO) {
+ // compute the advantage of using gm parameters over 0 motion
+ double erroradvantage = vp10_warp_erroradv(
+ &cm->global_motion[frame].motion_params,
+#if CONFIG_VP9_HIGHBITDEPTH
+ xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ ref_buf->y_buffer, ref_buf->y_width, ref_buf->y_height,
+ ref_buf->y_stride, cpi->Source->y_buffer, 0, 0,
+ cpi->Source->y_width, cpi->Source->y_height,
+ cpi->Source->y_stride, 0, 0, 16, 16);
+ if (erroradvantage > GLOBAL_MOTION_ADVANTAGE_THRESH)
+ // Not enough advantage in using a global model. Make 0.
+ memset(&cm->global_motion[frame], 0,
+ sizeof(cm->global_motion[frame]));
+ }
+ }
+ }
+ }
+ }
+#endif // CONFIG_GLOBAL_MOTION
+
+ for (i = 0; i < MAX_SEGMENTS; ++i) {
+ const int qindex = cm->seg.enabled
+ ? vp10_get_qindex(&cm->seg, i, cm->base_qindex)
+ : cm->base_qindex;
+ xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
+ cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
+ }
+
+ if (!cm->seg.enabled && xd->lossless[0]) x->optimize = 0;
+
+ cm->tx_mode = select_tx_mode(cpi, xd);
+ vp10_frame_init_quantizer(cpi);
+
+ vp10_initialize_rd_consts(cpi);
+ vp10_initialize_me_consts(cpi, x, cm->base_qindex);
+ init_encode_frame_mb_context(cpi);
+
+ cm->use_prev_frame_mvs =
+ !cm->error_resilient_mode && cm->width == cm->last_width &&
+ cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame;
+#if CONFIG_EXT_REFS
+ // NOTE(zoeliu): As cm->prev_frame can take neither a frame of
+ // show_exisiting_frame=1, nor can it take a frame not used as
+ // a reference, it is probable that by the time it is being
+ // referred to, the frame buffer it originally points to may
+ // already get expired and have been reassigned to the current
+ // newly coded frame. Hence, we need to check whether this is
+ // the case, and if yes, we have 2 choices:
+ // (1) Simply disable the use of previous frame mvs; or
+ // (2) Have cm->prev_frame point to one reference frame buffer,
+ // e.g. LAST_FRAME.
+ if (cm->use_prev_frame_mvs && !enc_is_ref_frame_buf(cpi, cm->prev_frame)) {
+ // Reassign the LAST_FRAME buffer to cm->prev_frame.
+ const int last_fb_buf_idx = get_ref_frame_buf_idx(cpi, LAST_FRAME);
+ cm->prev_frame = &cm->buffer_pool->frame_bufs[last_fb_buf_idx];
+ }
+#endif // CONFIG_EXT_REFS
+
+ // Special case: set prev_mi to NULL when the previous mode info
+ // context cannot be used.
+ cm->prev_mi =
+ cm->use_prev_frame_mvs ? cm->prev_mip + cm->mi_stride + 1 : NULL;
+
+#if CONFIG_VAR_TX
+#if CONFIG_REF_MV
+ vp10_zero(x->blk_skip_drl);
+#endif
+#endif
+
+ if (cpi->sf.partition_search_type == VAR_BASED_PARTITION &&
+ cpi->td.var_root[0] == NULL)
+ vp10_setup_var_tree(&cpi->common, &cpi->td);
+
+ {
+ struct vpx_usec_timer emr_timer;
+ vpx_usec_timer_start(&emr_timer);
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,
+ &cpi->twopass.this_frame_mb_stats);
+ }
+#endif
+
+ // If allowed, encoding tiles in parallel with one thread handling one tile.
+ // TODO(geza.lore): The multi-threaded encoder is not safe with more than
+ // 1 tile rows, as it uses the single above_context et al arrays from
+ // cpi->common
+ if (VPXMIN(cpi->oxcf.max_threads, cm->tile_cols) > 1 && cm->tile_rows == 1)
+ vp10_encode_tiles_mt(cpi);
+ else
+ encode_tiles(cpi);
+
+ vpx_usec_timer_mark(&emr_timer);
+ cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
+ }
+
+#if 0
+ // Keep record of the total distortion this time around for future use
+ cpi->last_frame_distortion = cpi->frame_distortion;
+#endif
+}
+
+void vp10_encode_frame(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+
+ // In the longer term the encoder should be generalized to match the
+ // decoder such that we allow compound where one of the 3 buffers has a
+ // different sign bias and that buffer is then the fixed ref. However, this
+ // requires further work in the rd loop. For now the only supported encoder
+ // side behavior is where the ALT ref buffer has opposite sign bias to
+ // the other two.
+ if (!frame_is_intra_only(cm)) {
+ if ((cm->ref_frame_sign_bias[ALTREF_FRAME] ==
+ cm->ref_frame_sign_bias[GOLDEN_FRAME]) ||
+ (cm->ref_frame_sign_bias[ALTREF_FRAME] ==
+ cm->ref_frame_sign_bias[LAST_FRAME])) {
+ cpi->allow_comp_inter_inter = 0;
+ } else {
+ cpi->allow_comp_inter_inter = 1;
+
+#if CONFIG_EXT_REFS
+ cm->comp_fwd_ref[0] = LAST_FRAME;
+ cm->comp_fwd_ref[1] = LAST2_FRAME;
+ cm->comp_fwd_ref[2] = LAST3_FRAME;
+ cm->comp_fwd_ref[3] = GOLDEN_FRAME;
+ cm->comp_bwd_ref[0] = BWDREF_FRAME;
+ cm->comp_bwd_ref[1] = ALTREF_FRAME;
+#else
+ cm->comp_fixed_ref = ALTREF_FRAME;
+ cm->comp_var_ref[0] = LAST_FRAME;
+ cm->comp_var_ref[1] = GOLDEN_FRAME;
+#endif // CONFIG_EXT_REFS
+ }
+ } else {
+ cpi->allow_comp_inter_inter = 0;
+ }
+
+ if (cpi->sf.frame_parameter_update) {
+ int i;
+ RD_OPT *const rd_opt = &cpi->rd;
+ FRAME_COUNTS *counts = cpi->td.counts;
+ RD_COUNTS *const rdc = &cpi->td.rd_counts;
+
+ // This code does a single RD pass over the whole frame assuming
+ // either compound, single or hybrid prediction as per whatever has
+ // worked best for that type of frame in the past.
+ // It also predicts whether another coding mode would have worked
+ // better than this coding mode. If that is the case, it remembers
+ // that for subsequent frames.
+ // It does the same analysis for transform size selection also.
+ //
+ // TODO(zoeliu): To investigate whether a frame_type other than
+ // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
+ const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
+ int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
+ const int is_alt_ref = frame_type == ALTREF_FRAME;
+
+ /* prediction (compound, single or hybrid) mode selection */
+ if (is_alt_ref || !cpi->allow_comp_inter_inter)
+ cm->reference_mode = SINGLE_REFERENCE;
+ else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] &&
+ mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] &&
+ check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100)
+ cm->reference_mode = COMPOUND_REFERENCE;
+ else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT])
+ cm->reference_mode = SINGLE_REFERENCE;
+ else
+ cm->reference_mode = REFERENCE_MODE_SELECT;
+
+#if CONFIG_DUAL_FILTER
+ cm->interp_filter = SWITCHABLE;
+#endif
+
+ encode_frame_internal(cpi);
+
+ for (i = 0; i < REFERENCE_MODES; ++i)
+ mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
+
+ if (cm->reference_mode == REFERENCE_MODE_SELECT) {
+ int single_count_zero = 0;
+ int comp_count_zero = 0;
+
+ for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
+ single_count_zero += counts->comp_inter[i][0];
+ comp_count_zero += counts->comp_inter[i][1];
+ }
+
+ if (comp_count_zero == 0) {
+ cm->reference_mode = SINGLE_REFERENCE;
+ vp10_zero(counts->comp_inter);
+ } else if (single_count_zero == 0) {
+ cm->reference_mode = COMPOUND_REFERENCE;
+ vp10_zero(counts->comp_inter);
+ }
+ }
+
+#if !CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT) {
+ int count4x4 = 0;
+ int count8x8_lp = 0, count8x8_8x8p = 0;
+ int count16x16_16x16p = 0, count16x16_lp = 0;
+ int count32x32 = 0;
+ for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
+ count4x4 += counts->tx_size[0][i][TX_4X4];
+ count4x4 += counts->tx_size[1][i][TX_4X4];
+ count4x4 += counts->tx_size[2][i][TX_4X4];
+
+ count8x8_lp += counts->tx_size[1][i][TX_8X8];
+ count8x8_lp += counts->tx_size[2][i][TX_8X8];
+ count8x8_8x8p += counts->tx_size[0][i][TX_8X8];
+
+ count16x16_16x16p += counts->tx_size[1][i][TX_16X16];
+ count16x16_lp += counts->tx_size[2][i][TX_16X16];
+ count32x32 += counts->tx_size[2][i][TX_32X32];
+ }
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ count4x4 += counts->tx_size_implied[0][TX_4X4];
+ count4x4 += counts->tx_size_implied[1][TX_4X4];
+ count4x4 += counts->tx_size_implied[2][TX_4X4];
+ count4x4 += counts->tx_size_implied[3][TX_4X4];
+ count8x8_lp += counts->tx_size_implied[2][TX_8X8];
+ count8x8_lp += counts->tx_size_implied[3][TX_8X8];
+ count8x8_8x8p += counts->tx_size_implied[1][TX_8X8];
+ count16x16_lp += counts->tx_size_implied[3][TX_16X16];
+ count16x16_16x16p += counts->tx_size_implied[2][TX_16X16];
+ count32x32 += counts->tx_size_implied[3][TX_32X32];
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+ if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
+#if CONFIG_SUPERTX
+ cm->counts.supertx_size[TX_16X16] == 0 &&
+ cm->counts.supertx_size[TX_32X32] == 0 &&
+#endif // CONFIG_SUPERTX
+ count32x32 == 0) {
+ cm->tx_mode = ALLOW_8X8;
+ reset_skip_tx_size(cm, TX_8X8);
+ } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
+ count8x8_lp == 0 && count16x16_lp == 0 &&
+#if CONFIG_SUPERTX
+ cm->counts.supertx_size[TX_8X8] == 0 &&
+ cm->counts.supertx_size[TX_16X16] == 0 &&
+ cm->counts.supertx_size[TX_32X32] == 0 &&
+#endif // CONFIG_SUPERTX
+ count32x32 == 0) {
+ cm->tx_mode = ONLY_4X4;
+ reset_skip_tx_size(cm, TX_4X4);
+ } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
+ cm->tx_mode = ALLOW_32X32;
+ } else if (count32x32 == 0 && count8x8_lp == 0 &&
+#if CONFIG_SUPERTX
+ cm->counts.supertx_size[TX_32X32] == 0 &&
+#endif // CONFIG_SUPERTX
+ count4x4 == 0) {
+ cm->tx_mode = ALLOW_16X16;
+ reset_skip_tx_size(cm, TX_16X16);
+ }
+ }
+#endif
+ } else {
+ encode_frame_internal(cpi);
+ }
+}
+
+static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi,
+ const MODE_INFO *above_mi, const MODE_INFO *left_mi,
+ const int intraonly) {
+ const PREDICTION_MODE y_mode = mi->mbmi.mode;
+ const PREDICTION_MODE uv_mode = mi->mbmi.uv_mode;
+ const BLOCK_SIZE bsize = mi->mbmi.sb_type;
+
+ if (bsize < BLOCK_8X8) {
+ int idx, idy;
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
+ for (idy = 0; idy < 2; idy += num_4x4_h)
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
+ const int bidx = idy * 2 + idx;
+ const PREDICTION_MODE bmode = mi->bmi[bidx].as_mode;
+ if (intraonly) {
+ const PREDICTION_MODE a = vp10_above_block_mode(mi, above_mi, bidx);
+ const PREDICTION_MODE l = vp10_left_block_mode(mi, left_mi, bidx);
+ ++counts->kf_y_mode[a][l][bmode];
+ } else {
+ ++counts->y_mode[0][bmode];
+ }
+ }
+ } else {
+ if (intraonly) {
+ const PREDICTION_MODE above = vp10_above_block_mode(mi, above_mi, 0);
+ const PREDICTION_MODE left = vp10_left_block_mode(mi, left_mi, 0);
+ ++counts->kf_y_mode[above][left][y_mode];
+ } else {
+ ++counts->y_mode[size_group_lookup[bsize]][y_mode];
+ }
+ }
+
+ ++counts->uv_mode[y_mode][uv_mode];
+}
+
+#if CONFIG_VAR_TX
+static void update_txfm_count(MACROBLOCKD *xd, FRAME_COUNTS *counts,
+ TX_SIZE tx_size, int blk_row, int blk_col) {
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ const int tx_row = blk_row >> 1;
+ const int tx_col = blk_col >> 1;
+ int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ int ctx = txfm_partition_context(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, tx_size);
+ const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
+
+ if (xd->mb_to_bottom_edge < 0) max_blocks_high += xd->mb_to_bottom_edge >> 5;
+ if (xd->mb_to_right_edge < 0) max_blocks_wide += xd->mb_to_right_edge >> 5;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+ if (tx_size == plane_tx_size) {
+ ++counts->txfm_partition[ctx][0];
+ mbmi->tx_size = tx_size;
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, tx_size);
+ } else {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bh = num_4x4_blocks_high_lookup[bsize];
+ int i;
+ ++counts->txfm_partition[ctx][1];
+
+ if (tx_size == TX_8X8) {
+ mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4;
+ mbmi->tx_size = TX_4X4;
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, TX_4X4);
+ return;
+ }
+
+ for (i = 0; i < 4; ++i) {
+ int offsetr = (i >> 1) * bh / 2;
+ int offsetc = (i & 0x01) * bh / 2;
+ update_txfm_count(xd, counts, tx_size - 1, blk_row + offsetr,
+ blk_col + offsetc);
+ }
+ }
+}
+
+static void tx_partition_count_update(VP10_COMMON *cm, MACROBLOCKD *xd,
+ BLOCK_SIZE plane_bsize, int mi_row,
+ int mi_col, FRAME_COUNTS *td_counts) {
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+
+ for (idy = 0; idy < mi_height; idy += bh)
+ for (idx = 0; idx < mi_width; idx += bh)
+ update_txfm_count(xd, td_counts, max_tx_size, idy, idx);
+}
+
+static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
+ int blk_col) {
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ const int tx_row = blk_row >> 1;
+ const int tx_col = blk_col >> 1;
+ int max_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
+
+ if (xd->mb_to_bottom_edge < 0) max_blocks_high += xd->mb_to_bottom_edge >> 5;
+ if (xd->mb_to_right_edge < 0) max_blocks_wide += xd->mb_to_right_edge >> 5;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+ if (tx_size == plane_tx_size) {
+ mbmi->tx_size = tx_size;
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, tx_size);
+
+ } else {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ if (tx_size == TX_8X8) {
+ mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4;
+ mbmi->tx_size = TX_4X4;
+ txfm_partition_update(xd->above_txfm_context + tx_col,
+ xd->left_txfm_context + tx_row, TX_4X4);
+ return;
+ }
+
+ assert(bsl > 0);
+ --bsl;
+ for (i = 0; i < 4; ++i) {
+ int offsetr = (i >> 1) << bsl;
+ int offsetc = (i & 0x01) << bsl;
+ set_txfm_context(xd, tx_size - 1, blk_row + offsetr, blk_col + offsetc);
+ }
+ }
+}
+
+static void tx_partition_set_contexts(VP10_COMMON *cm, MACROBLOCKD *xd,
+ BLOCK_SIZE plane_bsize, int mi_row,
+ int mi_col) {
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+
+ xd->above_txfm_context = cm->above_txfm_context + mi_col;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+
+ for (idy = 0; idy < mi_height; idy += bh)
+ for (idx = 0; idx < mi_width; idx += bh)
+ set_txfm_context(xd, max_tx_size, idy, idx);
+}
+#endif
+
+static void encode_superblock(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+ int output_enabled, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO **mi_8x8 = xd->mi;
+ MODE_INFO *mi = mi_8x8[0];
+ MB_MODE_INFO *mbmi = &mi->mbmi;
+ const int seg_skip =
+ segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
+ const int mis = cm->mi_stride;
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+
+ x->skip_optimize = ctx->is_coded;
+ ctx->is_coded = 1;
+ x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
+
+ if (!is_inter_block(mbmi)) {
+ int plane;
+ mbmi->skip = 1;
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane)
+ vp10_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1);
+ if (output_enabled)
+ sum_intra_stats(td->counts, mi, xd->above_mi, xd->left_mi,
+ frame_is_intra_only(cm));
+
+#if CONFIG_EXT_INTRA
+ if (output_enabled && bsize >= BLOCK_8X8) {
+ FRAME_COUNTS *counts = td->counts;
+ if (mbmi->mode == DC_PRED && mbmi->palette_mode_info.palette_size[0] == 0)
+ ++counts->ext_intra[0][mbmi->ext_intra_mode_info.use_ext_intra_mode[0]];
+ if (mbmi->uv_mode == DC_PRED &&
+ mbmi->palette_mode_info.palette_size[1] == 0)
+ ++counts->ext_intra[1][mbmi->ext_intra_mode_info.use_ext_intra_mode[1]];
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) {
+ int p_angle;
+ const int intra_filter_ctx = vp10_get_pred_context_intra_interp(xd);
+ p_angle =
+ mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+ if (vp10_is_intra_filter_switchable(p_angle))
+ ++counts->intra_filter[intra_filter_ctx][mbmi->intra_filter];
+ }
+ }
+#endif // CONFIG_EXT_INTRA
+
+ if (bsize >= BLOCK_8X8 && output_enabled) {
+ for (plane = 0; plane <= 1; ++plane) {
+ if (mbmi->palette_mode_info.palette_size[plane] > 0) {
+ mbmi->palette_mode_info.palette_first_color_idx[plane] =
+ xd->plane[plane].color_index_map[0];
+ // TODO(huisu): this increases the use of token buffer. Needs stretch
+ // test to verify.
+ vp10_tokenize_palette_sb(td, bsize, plane, t);
+ }
+ }
+ }
+ vp10_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8));
+ } else {
+ int ref;
+ const int is_compound = has_second_ref(mbmi);
+
+ set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]);
+ assert(cfg != NULL);
+ vp10_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
+ &xd->block_refs[ref]->sf);
+ }
+ if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip)
+ vp10_build_inter_predictors_sby(xd, mi_row, mi_col,
+ VPXMAX(bsize, BLOCK_8X8));
+
+ vp10_build_inter_predictors_sbuv(xd, mi_row, mi_col,
+ VPXMAX(bsize, BLOCK_8X8));
+
+#if CONFIG_OBMC
+ if (mbmi->motion_variation == OBMC_CAUSAL) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
+ int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+
+ assert(mbmi->sb_type >= BLOCK_8X8);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ int len = sizeof(uint16_t);
+ dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
+ dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
+ dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len);
+ dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
+ dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
+ dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len);
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ dst_buf1[0] = tmp_buf1;
+ dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
+ dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2;
+ dst_buf2[0] = tmp_buf2;
+ dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
+ dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2;
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vp10_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1,
+ dst_width1, dst_height1,
+ dst_stride1);
+ vp10_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
+ dst_width2, dst_height2, dst_stride2);
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row,
+ mi_col);
+ vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1,
+ dst_stride1, dst_buf2, dst_stride2);
+ }
+#endif // CONFIG_OBMC
+
+ vp10_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
+#if CONFIG_VAR_TX
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ if (mbmi->tx_size >= TX_SIZES)
+ vp10_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8));
+ else
+#endif
+ vp10_tokenize_sb_inter(cpi, td, t, !output_enabled, mi_row, mi_col,
+ VPXMAX(bsize, BLOCK_8X8));
+#else
+ vp10_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8));
+#endif
+ }
+
+ if (output_enabled) {
+ if (cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8 &&
+ !(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) {
+ const int is_inter = is_inter_block(mbmi);
+ const int tx_size_ctx = get_tx_size_context(xd);
+ const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
+ : intra_tx_size_cat_lookup[bsize];
+ const TX_SIZE coded_tx_size = txsize_sqr_up_map[mbmi->tx_size];
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(mbmi)));
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+#if CONFIG_VAR_TX
+ if (is_inter)
+ tx_partition_count_update(cm, xd, bsize, mi_row, mi_col, td->counts);
+#endif
+ ++td->counts->tx_size[tx_size_cat][tx_size_ctx][coded_tx_size];
+ } else {
+ int x, y;
+ TX_SIZE tx_size;
+ // The new intra coding scheme requires no change of transform size
+ if (is_inter_block(&mi->mbmi)) {
+ if (xd->lossless[mbmi->segment_id]) {
+ tx_size = TX_4X4;
+ } else {
+ tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode, 1);
+ }
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ ++td->counts->tx_size_implied[max_txsize_lookup[bsize]]
+ [txsize_sqr_up_map[mbmi->tx_size]];
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+ } else {
+ tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
+ }
+
+ for (y = 0; y < mi_height; y++)
+ for (x = 0; x < mi_width; x++)
+ if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows)
+ mi_8x8[mis * y + x]->mbmi.tx_size = tx_size;
+ }
+ ++td->counts->tx_size_totals[txsize_sqr_map[mbmi->tx_size]];
+ ++td->counts->tx_size_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter_block(mbmi)) > 1 &&
+ cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ int eset = get_ext_tx_set(mbmi->tx_size, bsize, is_inter_block(mbmi));
+ if (eset > 0) {
+ if (is_inter_block(mbmi)) {
+ ++td->counts->inter_ext_tx[eset][txsize_sqr_map[mbmi->tx_size]]
+ [mbmi->tx_type];
+ } else {
+ ++td->counts
+ ->intra_ext_tx[eset][mbmi->tx_size][mbmi->mode][mbmi->tx_type];
+ }
+ }
+ }
+#else
+ if (mbmi->tx_size < TX_32X32 && cm->base_qindex > 0 && !mbmi->skip &&
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ if (is_inter_block(mbmi)) {
+ ++td->counts->inter_ext_tx[mbmi->tx_size][mbmi->tx_type];
+ } else {
+ ++td->counts
+ ->intra_ext_tx[mbmi->tx_size][intra_mode_to_tx_type_context
+ [mbmi->mode]][mbmi->tx_type];
+ }
+ }
+#endif // CONFIG_EXT_TX
+ }
+
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8 &&
+ is_inter_block(mbmi) && !(mbmi->skip || seg_skip)) {
+ if (!output_enabled)
+ tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col);
+ } else {
+ TX_SIZE tx_size;
+ // The new intra coding scheme requires no change of transform size
+ if (is_inter_block(mbmi))
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ {
+ tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
+ max_txsize_lookup[bsize]);
+ if (txsize_sqr_map[max_txsize_rect_lookup[bsize]] <= tx_size)
+ tx_size = max_txsize_rect_lookup[bsize];
+ if (xd->lossless[mbmi->segment_id]) tx_size = TX_4X4;
+ }
+#else
+ tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
+ max_txsize_lookup[bsize]);
+#endif
+ else
+ tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
+ mbmi->tx_size = tx_size;
+ set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, xd);
+ }
+#endif
+}
+
+#if CONFIG_SUPERTX
+static int check_intra_b(PICK_MODE_CONTEXT *ctx) {
+ if (!is_inter_mode((&ctx->mic)->mbmi.mode)) return 1;
+#if CONFIG_EXT_INTER
+ if (ctx->mic.mbmi.ref_frame[1] == INTRA_FRAME) return 1;
+#endif // CONFIG_EXT_INTER
+ return 0;
+}
+
+static int check_intra_sb(VP10_COMP *cpi, const TileInfo *const tile,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ PC_TREE *pc_tree) {
+ const VP10_COMMON *const cm = &cpi->common;
+
+ const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
+ const PARTITION_TYPE partition = pc_tree->partitioning;
+ const BLOCK_SIZE subsize = get_subsize(bsize, partition);
+#if CONFIG_EXT_PARTITION_TYPES
+ int i;
+#endif
+
+ assert(bsize >= BLOCK_8X8);
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return 1;
+
+ switch (partition) {
+ case PARTITION_NONE: return check_intra_b(&pc_tree->none); break;
+ case PARTITION_VERT:
+ if (check_intra_b(&pc_tree->vertical[0])) return 1;
+ if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
+ if (check_intra_b(&pc_tree->vertical[1])) return 1;
+ }
+ break;
+ case PARTITION_HORZ:
+ if (check_intra_b(&pc_tree->horizontal[0])) return 1;
+ if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
+ if (check_intra_b(&pc_tree->horizontal[1])) return 1;
+ }
+ break;
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8) {
+ if (check_intra_b(pc_tree->leaf_split[0])) return 1;
+ } else {
+ if (check_intra_sb(cpi, tile, mi_row, mi_col, subsize,
+ pc_tree->split[0]))
+ return 1;
+ if (check_intra_sb(cpi, tile, mi_row, mi_col + hbs, subsize,
+ pc_tree->split[1]))
+ return 1;
+ if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col, subsize,
+ pc_tree->split[2]))
+ return 1;
+ if (check_intra_sb(cpi, tile, mi_row + hbs, mi_col + hbs, subsize,
+ pc_tree->split[3]))
+ return 1;
+ }
+ break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ for (i = 0; i < 3; i++) {
+ if (check_intra_b(&pc_tree->horizontala[i])) return 1;
+ }
+ break;
+ case PARTITION_HORZ_B:
+ for (i = 0; i < 3; i++) {
+ if (check_intra_b(&pc_tree->horizontalb[i])) return 1;
+ }
+ break;
+ case PARTITION_VERT_A:
+ for (i = 0; i < 3; i++) {
+ if (check_intra_b(&pc_tree->verticala[i])) return 1;
+ }
+ break;
+ case PARTITION_VERT_B:
+ for (i = 0; i < 3; i++) {
+ if (check_intra_b(&pc_tree->verticalb[i])) return 1;
+ }
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
+ default: assert(0);
+ }
+ return 0;
+}
+
+static int check_supertx_b(TX_SIZE supertx_size, PICK_MODE_CONTEXT *ctx) {
+ return ctx->mic.mbmi.tx_size == supertx_size;
+}
+
+static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size,
+ PC_TREE *pc_tree) {
+ PARTITION_TYPE partition;
+ BLOCK_SIZE subsize;
+
+ partition = pc_tree->partitioning;
+ subsize = get_subsize(bsize, partition);
+ switch (partition) {
+ case PARTITION_NONE: return check_supertx_b(supertx_size, &pc_tree->none);
+ case PARTITION_VERT:
+ return check_supertx_b(supertx_size, &pc_tree->vertical[0]);
+ case PARTITION_HORZ:
+ return check_supertx_b(supertx_size, &pc_tree->horizontal[0]);
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8)
+ return check_supertx_b(supertx_size, pc_tree->leaf_split[0]);
+ else
+ return check_supertx_sb(subsize, supertx_size, pc_tree->split[0]);
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ return check_supertx_b(supertx_size, &pc_tree->horizontala[0]);
+ case PARTITION_HORZ_B:
+ return check_supertx_b(supertx_size, &pc_tree->horizontalb[0]);
+ case PARTITION_VERT_A:
+ return check_supertx_b(supertx_size, &pc_tree->verticala[0]);
+ case PARTITION_VERT_B:
+ return check_supertx_b(supertx_size, &pc_tree->verticalb[0]);
+#endif // CONFIG_EXT_PARTITION_TYPES
+ default: assert(0); return 0;
+ }
+}
+
+static void predict_superblock(VP10_COMP *cpi, ThreadData *td,
+#if CONFIG_EXT_INTER
+ int mi_row_ori, int mi_col_ori,
+#endif // CONFIG_EXT_INTER
+ int mi_row_pred, int mi_col_pred,
+ BLOCK_SIZE bsize_pred, int b_sub8x8, int block) {
+ // Used in supertx
+ // (mi_row_ori, mi_col_ori): location for mv
+ // (mi_row_pred, mi_col_pred, bsize_pred): region to predict
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *mi_8x8 = xd->mi[0];
+ MODE_INFO *mi = mi_8x8;
+ MB_MODE_INFO *mbmi = &mi->mbmi;
+ int ref;
+ const int is_compound = has_second_ref(mbmi);
+
+ set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]);
+ vp10_setup_pre_planes(xd, ref, cfg, mi_row_pred, mi_col_pred,
+ &xd->block_refs[ref]->sf);
+ }
+
+ if (!b_sub8x8)
+ vp10_build_inter_predictors_sb_extend(xd,
+#if CONFIG_EXT_INTER
+ mi_row_ori, mi_col_ori,
+#endif // CONFIG_EXT_INTER
+ mi_row_pred, mi_col_pred, bsize_pred);
+ else
+ vp10_build_inter_predictors_sb_sub8x8_extend(xd,
+#if CONFIG_EXT_INTER
+ mi_row_ori, mi_col_ori,
+#endif // CONFIG_EXT_INTER
+ mi_row_pred, mi_col_pred,
+ bsize_pred, block);
+}
+
+static void predict_b_extend(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile, int block,
+ int mi_row_ori, int mi_col_ori, int mi_row_pred,
+ int mi_col_pred, int mi_row_top, int mi_col_top,
+ uint8_t *dst_buf[3], int dst_stride[3],
+ BLOCK_SIZE bsize_top, BLOCK_SIZE bsize_pred,
+ int output_enabled, int b_sub8x8, int bextend) {
+ // Used in supertx
+ // (mi_row_ori, mi_col_ori): location for mv
+ // (mi_row_pred, mi_col_pred, bsize_pred): region to predict
+ // (mi_row_top, mi_col_top, bsize_top): region of the top partition size
+ // block: sub location of sub8x8 blocks
+ // b_sub8x8: 1: ori is sub8x8; 0: ori is not sub8x8
+ // bextend: 1: region to predict is an extension of ori; 0: not
+
+ MACROBLOCK *const x = &td->mb;
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int r = (mi_row_pred - mi_row_top) * MI_SIZE;
+ int c = (mi_col_pred - mi_col_top) * MI_SIZE;
+ const int mi_width_top = num_8x8_blocks_wide_lookup[bsize_top];
+ const int mi_height_top = num_8x8_blocks_high_lookup[bsize_top];
+
+ if (mi_row_pred < mi_row_top || mi_col_pred < mi_col_top ||
+ mi_row_pred >= mi_row_top + mi_height_top ||
+ mi_col_pred >= mi_col_top + mi_width_top || mi_row_pred >= cm->mi_rows ||
+ mi_col_pred >= cm->mi_cols)
+ return;
+
+ set_offsets_extend(cpi, td, tile, mi_row_pred, mi_col_pred, mi_row_ori,
+ mi_col_ori, bsize_pred);
+ xd->plane[0].dst.stride = dst_stride[0];
+ xd->plane[1].dst.stride = dst_stride[1];
+ xd->plane[2].dst.stride = dst_stride[2];
+ xd->plane[0].dst.buf = dst_buf[0] +
+ (r >> xd->plane[0].subsampling_y) * dst_stride[0] +
+ (c >> xd->plane[0].subsampling_x);
+ xd->plane[1].dst.buf = dst_buf[1] +
+ (r >> xd->plane[1].subsampling_y) * dst_stride[1] +
+ (c >> xd->plane[1].subsampling_x);
+ xd->plane[2].dst.buf = dst_buf[2] +
+ (r >> xd->plane[2].subsampling_y) * dst_stride[2] +
+ (c >> xd->plane[2].subsampling_x);
+
+ predict_superblock(cpi, td,
+#if CONFIG_EXT_INTER
+ mi_row_ori, mi_col_ori,
+#endif // CONFIG_EXT_INTER
+ mi_row_pred, mi_col_pred, bsize_pred, b_sub8x8, block);
+
+ if (output_enabled && !bextend) update_stats(&cpi->common, td, 1);
+}
+
+static void extend_dir(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile, int block, BLOCK_SIZE bsize,
+ BLOCK_SIZE top_bsize, int mi_row, int mi_col,
+ int mi_row_top, int mi_col_top, int output_enabled,
+ uint8_t *dst_buf[3], int dst_stride[3], int dir) {
+ // dir: 0-lower, 1-upper, 2-left, 3-right
+ // 4-lowerleft, 5-upperleft, 6-lowerright, 7-upperright
+ MACROBLOCKD *xd = &td->mb.e_mbd;
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ int xss = xd->plane[1].subsampling_x;
+ int yss = xd->plane[1].subsampling_y;
+ int b_sub8x8 = (bsize < BLOCK_8X8) ? 1 : 0;
+
+ BLOCK_SIZE extend_bsize;
+ int unit, mi_row_pred, mi_col_pred;
+
+ if (dir == 0 || dir == 1) { // lower and upper
+ extend_bsize = (mi_width == 1 || bsize < BLOCK_8X8 || xss < yss)
+ ? BLOCK_8X8
+ : BLOCK_16X8;
+ unit = num_8x8_blocks_wide_lookup[extend_bsize];
+ mi_row_pred = mi_row + ((dir == 0) ? mi_height : -1);
+ mi_col_pred = mi_col;
+
+ predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred,
+ mi_col_pred, mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, extend_bsize, output_enabled, b_sub8x8, 1);
+
+ if (mi_width > unit) {
+ int i;
+ for (i = 0; i < mi_width / unit - 1; i++) {
+ mi_col_pred += unit;
+ predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred,
+ mi_col_pred, mi_row_top, mi_col_top, dst_buf,
+ dst_stride, top_bsize, extend_bsize, output_enabled,
+ b_sub8x8, 1);
+ }
+ }
+ } else if (dir == 2 || dir == 3) { // left and right
+ extend_bsize = (mi_height == 1 || bsize < BLOCK_8X8 || yss < xss)
+ ? BLOCK_8X8
+ : BLOCK_8X16;
+ unit = num_8x8_blocks_high_lookup[extend_bsize];
+ mi_row_pred = mi_row;
+ mi_col_pred = mi_col + ((dir == 3) ? mi_width : -1);
+
+ predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred,
+ mi_col_pred, mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, extend_bsize, output_enabled, b_sub8x8, 1);
+
+ if (mi_height > unit) {
+ int i;
+ for (i = 0; i < mi_height / unit - 1; i++) {
+ mi_row_pred += unit;
+ predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred,
+ mi_col_pred, mi_row_top, mi_col_top, dst_buf,
+ dst_stride, top_bsize, extend_bsize, output_enabled,
+ b_sub8x8, 1);
+ }
+ }
+ } else {
+ extend_bsize = BLOCK_8X8;
+ mi_row_pred = mi_row + ((dir == 4 || dir == 6) ? mi_height : -1);
+ mi_col_pred = mi_col + ((dir == 6 || dir == 7) ? mi_width : -1);
+
+ predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred,
+ mi_col_pred, mi_row_top, mi_col_top, dst_buf, dst_stride,
+ top_bsize, extend_bsize, output_enabled, b_sub8x8, 1);
+ }
+}
+
+static void extend_all(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile, int block, BLOCK_SIZE bsize,
+ BLOCK_SIZE top_bsize, int mi_row, int mi_col,
+ int mi_row_top, int mi_col_top, int output_enabled,
+ uint8_t *dst_buf[3], int dst_stride[3]) {
+ assert(block >= 0 && block < 4);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
+ mi_col_top, output_enabled, dst_buf, dst_stride, 0);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
+ mi_col_top, output_enabled, dst_buf, dst_stride, 1);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
+ mi_col_top, output_enabled, dst_buf, dst_stride, 2);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
+ mi_col_top, output_enabled, dst_buf, dst_stride, 3);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
+ mi_col_top, output_enabled, dst_buf, dst_stride, 4);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
+ mi_col_top, output_enabled, dst_buf, dst_stride, 5);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
+ mi_col_top, output_enabled, dst_buf, dst_stride, 6);
+ extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top,
+ mi_col_top, output_enabled, dst_buf, dst_stride, 7);
+}
+
+// This function generates prediction for multiple blocks, between which
+// discontinuity around boundary is reduced by smoothing masks. The basic
+// smoothing mask is a soft step function along horz/vert direction. In more
+// complicated case when a block is split into 4 subblocks, the basic mask is
+// first applied to neighboring subblocks (2 pairs) in horizontal direction and
+// then applied to the 2 masked prediction mentioned above in vertical direction
+// If the block is split into more than one level, at every stage, masked
+// prediction is stored in dst_buf[] passed from higher level.
+static void predict_sb_complex(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile, int mi_row,
+ int mi_col, int mi_row_top, int mi_col_top,
+ int output_enabled, BLOCK_SIZE bsize,
+ BLOCK_SIZE top_bsize, uint8_t *dst_buf[3],
+ int dst_stride[3], PC_TREE *pc_tree) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
+ const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
+ const PARTITION_TYPE partition = pc_tree->partitioning;
+ const BLOCK_SIZE subsize = get_subsize(bsize, partition);
+#if CONFIG_EXT_PARTITION_TYPES
+ const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT);
+#endif
+
+ int i;
+ uint8_t *dst_buf1[3], *dst_buf2[3], *dst_buf3[3];
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf3[MAX_MB_PLANE * MAX_TX_SQUARE * 2]);
+ int dst_stride1[3] = { MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE };
+ int dst_stride2[3] = { MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE };
+ int dst_stride3[3] = { MAX_TX_SIZE, MAX_TX_SIZE, MAX_TX_SIZE };
+
+ assert(bsize >= BLOCK_8X8);
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ int len = sizeof(uint16_t);
+ dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
+ dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_TX_SQUARE * len);
+ dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_TX_SQUARE * len);
+ dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
+ dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_TX_SQUARE * len);
+ dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_TX_SQUARE * len);
+ dst_buf3[0] = CONVERT_TO_BYTEPTR(tmp_buf3);
+ dst_buf3[1] = CONVERT_TO_BYTEPTR(tmp_buf3 + MAX_TX_SQUARE * len);
+ dst_buf3[2] = CONVERT_TO_BYTEPTR(tmp_buf3 + 2 * MAX_TX_SQUARE * len);
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ dst_buf1[0] = tmp_buf1;
+ dst_buf1[1] = tmp_buf1 + MAX_TX_SQUARE;
+ dst_buf1[2] = tmp_buf1 + 2 * MAX_TX_SQUARE;
+ dst_buf2[0] = tmp_buf2;
+ dst_buf2[1] = tmp_buf2 + MAX_TX_SQUARE;
+ dst_buf2[2] = tmp_buf2 + 2 * MAX_TX_SQUARE;
+ dst_buf3[0] = tmp_buf3;
+ dst_buf3[1] = tmp_buf3 + MAX_TX_SQUARE;
+ dst_buf3[2] = tmp_buf3 + 2 * MAX_TX_SQUARE;
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (output_enabled && bsize < top_bsize)
+ cm->counts.partition[ctx][partition]++;
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ }
+
+ switch (partition) {
+ case PARTITION_NONE:
+ assert(bsize < top_bsize);
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
+ bsize, output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row, mi_col, mi_row_top,
+ mi_col_top, output_enabled, dst_buf, dst_stride);
+ break;
+ case PARTITION_HORZ:
+ if (bsize == BLOCK_8X8) {
+ // Fisrt half
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
+ BLOCK_8X8, output_enabled, 1, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf,
+ dst_stride);
+
+ // Second half
+ predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ top_bsize, BLOCK_8X8, output_enabled, 1, 1);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf1,
+ dst_stride1);
+
+ // Smooth
+ xd->plane[0].dst.buf = dst_buf[0];
+ xd->plane[0].dst.stride = dst_stride[0];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[0], dst_stride[0], dst_buf1[0], dst_stride1[0], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_HORZ,
+ 0);
+ } else {
+ // First half
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
+ subsize, output_enabled, 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf,
+ dst_stride);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf,
+ dst_stride, 0);
+
+ if (mi_row + hbs < cm->mi_rows) {
+ // Second half
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1, top_bsize, subsize, output_enabled, 0,
+ 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, output_enabled, dst_buf1,
+ dst_stride1);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, output_enabled, dst_buf1,
+ dst_stride1, 1);
+
+ // Smooth
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ }
+ }
+ break;
+ case PARTITION_VERT:
+ if (bsize == BLOCK_8X8) {
+ // First half
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
+ BLOCK_8X8, output_enabled, 1, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf,
+ dst_stride);
+
+ // Second half
+ predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ top_bsize, BLOCK_8X8, output_enabled, 1, 1);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf1,
+ dst_stride1);
+
+ // Smooth
+ xd->plane[0].dst.buf = dst_buf[0];
+ xd->plane[0].dst.stride = dst_stride[0];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[0], dst_stride[0], dst_buf1[0], dst_stride1[0], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_VERT,
+ 0);
+ } else {
+ // bsize: not important, not useful
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
+ subsize, output_enabled, 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf,
+ dst_stride);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf,
+ dst_stride, 3);
+
+ if (mi_col + hbs < cm->mi_cols) {
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1, top_bsize, subsize, output_enabled, 0,
+ 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, output_enabled,
+ dst_buf1, dst_stride1);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, output_enabled,
+ dst_buf1, dst_stride1, 2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_VERT, i);
+ }
+ }
+ }
+ break;
+ case PARTITION_SPLIT:
+ if (bsize == BLOCK_8X8) {
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
+ BLOCK_8X8, output_enabled, 1, 0);
+ predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ top_bsize, BLOCK_8X8, output_enabled, 1, 1);
+ predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf2, dst_stride2,
+ top_bsize, BLOCK_8X8, output_enabled, 1, 1);
+ predict_b_extend(cpi, td, tile, 3, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf3, dst_stride3,
+ top_bsize, BLOCK_8X8, output_enabled, 1, 1);
+
+ if (bsize < top_bsize) {
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf,
+ dst_stride);
+ extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf1,
+ dst_stride1);
+ extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf2,
+ dst_stride2);
+ extend_all(cpi, td, tile, 3, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf3,
+ dst_stride3);
+ }
+ } else {
+ predict_sb_complex(cpi, td, tile, mi_row, mi_col, mi_row_top,
+ mi_col_top, output_enabled, subsize, top_bsize,
+ dst_buf, dst_stride, pc_tree->split[0]);
+ if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols)
+ predict_sb_complex(cpi, td, tile, mi_row, mi_col + hbs, mi_row_top,
+ mi_col_top, output_enabled, subsize, top_bsize,
+ dst_buf1, dst_stride1, pc_tree->split[1]);
+ if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols)
+ predict_sb_complex(cpi, td, tile, mi_row + hbs, mi_col, mi_row_top,
+ mi_col_top, output_enabled, subsize, top_bsize,
+ dst_buf2, dst_stride2, pc_tree->split[2]);
+ if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols)
+ predict_sb_complex(cpi, td, tile, mi_row + hbs, mi_col + hbs,
+ mi_row_top, mi_col_top, output_enabled, subsize,
+ top_bsize, dst_buf3, dst_stride3,
+ pc_tree->split[3]);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ if (bsize == BLOCK_8X8 && i != 0)
+ continue; // Skip <4x4 chroma smoothing
+ if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_VERT, i);
+ if (mi_row + hbs < cm->mi_rows) {
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf2[i], dst_stride2[i], dst_buf3[i], dst_stride3[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_VERT, i);
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) {
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ }
+ break;
+#if CONFIG_EXT_PARTITION_TYPES
+ case PARTITION_HORZ_A:
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
+ bsize2, output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1, top_bsize, bsize2, output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, output_enabled, dst_buf1, dst_stride1);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf2, dst_stride2,
+ top_bsize, subsize, output_enabled, 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf2,
+ dst_stride2);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf2,
+ dst_stride2, 1);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_VERT,
+ i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_HORZ,
+ i);
+ }
+
+ break;
+ case PARTITION_VERT_A:
+
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
+ bsize2, output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ top_bsize, bsize2, output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf1, dst_stride1);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf2,
+ dst_stride2, top_bsize, subsize, output_enabled, 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, output_enabled, dst_buf2,
+ dst_stride2);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, output_enabled, dst_buf2,
+ dst_stride2, 2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_HORZ,
+ i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_VERT,
+ i);
+ }
+ break;
+ case PARTITION_HORZ_B:
+
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
+ subsize, output_enabled, 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride,
+ 0);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, mi_row + hbs,
+ mi_col, mi_row_top, mi_col_top, dst_buf1, dst_stride1,
+ top_bsize, bsize2, output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row + hbs, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf1, dst_stride1);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col + hbs,
+ mi_row + hbs, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2, top_bsize, bsize2, output_enabled,
+ 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row + hbs,
+ mi_col + hbs, mi_row_top, mi_col_top, output_enabled, dst_buf2,
+ dst_stride2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf1[i];
+ xd->plane[i].dst.stride = dst_stride1[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf1[i], dst_stride1[i], dst_buf2[i], dst_stride2[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_VERT, i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_HORZ,
+ i);
+ }
+ break;
+ case PARTITION_VERT_B:
+
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col,
+ mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize,
+ subsize, output_enabled, 0, 0);
+ if (bsize < top_bsize)
+ extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride);
+ else
+ extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col,
+ mi_row_top, mi_col_top, output_enabled, dst_buf, dst_stride,
+ 3);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row,
+ mi_col + hbs, mi_row_top, mi_col_top, dst_buf1,
+ dst_stride1, top_bsize, bsize2, output_enabled, 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row, mi_col + hbs,
+ mi_row_top, mi_col_top, output_enabled, dst_buf1, dst_stride1);
+
+ predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col + hbs,
+ mi_row + hbs, mi_col + hbs, mi_row_top, mi_col_top,
+ dst_buf2, dst_stride2, top_bsize, bsize2, output_enabled,
+ 0, 0);
+ extend_all(cpi, td, tile, 0, bsize2, top_bsize, mi_row + hbs,
+ mi_col + hbs, mi_row_top, mi_col_top, output_enabled, dst_buf2,
+ dst_stride2);
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf1[i];
+ xd->plane[i].dst.stride = dst_stride1[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf1[i], dst_stride1[i], dst_buf2[i], dst_stride2[i],
+ mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize,
+ PARTITION_HORZ, i);
+ }
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = dst_buf[i];
+ xd->plane[i].dst.stride = dst_stride[i];
+ vp10_build_masked_inter_predictor_complex(
+ xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], mi_row,
+ mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_VERT,
+ i);
+ }
+ break;
+#endif // CONFIG_EXT_PARTITION_TYPES
+ default: assert(0);
+ }
+
+#if CONFIG_EXT_PARTITION_TYPES
+ if (bsize < top_bsize)
+ update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
+#else
+ if (bsize < top_bsize && (partition != PARTITION_SPLIT || bsize == BLOCK_8X8))
+ update_partition_context(xd, mi_row, mi_col, subsize, bsize);
+#endif // CONFIG_EXT_PARTITION_TYPES
+}
+
+static void rd_supertx_sb(VP10_COMP *cpi, ThreadData *td,
+ const TileInfo *const tile, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int *tmp_rate, int64_t *tmp_dist,
+ TX_TYPE *best_tx, PC_TREE *pc_tree) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int plane, pnskip, skippable, skippable_uv, rate_uv, this_rate,
+ base_rate = *tmp_rate;
+ int64_t sse, pnsse, sse_uv, this_dist, dist_uv;
+ uint8_t *dst_buf[3];
+ int dst_stride[3];
+ TX_SIZE tx_size;
+ MB_MODE_INFO *mbmi;
+ TX_TYPE tx_type, best_tx_nostx;
+#if CONFIG_EXT_TX
+ int ext_tx_set;
+#endif // CONFIG_EXT_TX
+ int tmp_rate_tx = 0, skip_tx = 0;
+ int64_t tmp_dist_tx = 0, rd_tx, bestrd_tx = INT64_MAX;
+
+ set_skip_context(xd, mi_row, mi_col);
+ set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
+ update_state_sb_supertx(cpi, td, tile, mi_row, mi_col, bsize, 0, pc_tree);
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
+ for (plane = 0; plane < MAX_MB_PLANE; plane++) {
+ dst_buf[plane] = xd->plane[plane].dst.buf;
+ dst_stride[plane] = xd->plane[plane].dst.stride;
+ }
+ predict_sb_complex(cpi, td, tile, mi_row, mi_col, mi_row, mi_col, 0, bsize,
+ bsize, dst_buf, dst_stride, pc_tree);
+
+ set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
+ set_segment_id_supertx(cpi, x, mi_row, mi_col, bsize);
+
+ mbmi = &xd->mi[0]->mbmi;
+ best_tx_nostx = mbmi->tx_type;
+
+ *best_tx = DCT_DCT;
+
+ // chroma
+ skippable_uv = 1;
+ rate_uv = 0;
+ dist_uv = 0;
+ sse_uv = 0;
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_VAR_TX
+ ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
+ ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ int coeff_ctx = 1;
+
+ this_rate = 0;
+ this_dist = 0;
+ pnsse = 0;
+ pnskip = 1;
+
+ tx_size = max_txsize_lookup[bsize];
+ tx_size = get_uv_tx_size_impl(tx_size, bsize, cm->subsampling_x,
+ cm->subsampling_y);
+ vp10_get_entropy_contexts(bsize, tx_size, pd, ctxa, ctxl);
+ coeff_ctx = combine_entropy_contexts(ctxa[0], ctxl[0]);
+
+ vp10_subtract_plane(x, bsize, plane);
+ vp10_tx_block_rd_b(cpi, x, tx_size, 0, 0, plane, 0,
+ get_plane_block_size(bsize, pd), coeff_ctx, &this_rate,
+ &this_dist, &pnsse, &pnskip);
+#else
+ tx_size = max_txsize_lookup[bsize];
+ tx_size = get_uv_tx_size_impl(tx_size, bsize, cm->subsampling_x,
+ cm->subsampling_y);
+ vp10_subtract_plane(x, bsize, plane);
+ vp10_txfm_rd_in_plane_supertx(x, cpi, &this_rate, &this_dist, &pnskip,
+ &pnsse, INT64_MAX, plane, bsize, tx_size, 0);
+#endif // CONFIG_VAR_TX
+
+ rate_uv += this_rate;
+ dist_uv += this_dist;
+ sse_uv += pnsse;
+ skippable_uv &= pnskip;
+ }
+
+ // luma
+ tx_size = max_txsize_lookup[bsize];
+ vp10_subtract_plane(x, bsize, 0);
+#if CONFIG_EXT_TX
+ ext_tx_set = get_ext_tx_set(tx_size, bsize, 1);
+#endif // CONFIG_EXT_TX
+ for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+#if CONFIG_VAR_TX
+ ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
+ ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
+ const struct macroblockd_plane *const pd = &xd->plane[0];
+ int coeff_ctx = 1;
+#endif // CONFIG_VAR_TX
+#if CONFIG_EXT_TX
+ if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
+#else
+ if (tx_size >= TX_32X32 && tx_type != DCT_DCT) continue;
+#endif // CONFIG_EXT_TX
+ mbmi->tx_type = tx_type;
+
+#if CONFIG_VAR_TX
+ this_rate = 0;
+ this_dist = 0;
+ pnsse = 0;
+ pnskip = 1;
+
+ vp10_get_entropy_contexts(bsize, tx_size, pd, ctxa, ctxl);
+ coeff_ctx = combine_entropy_contexts(ctxa[0], ctxl[0]);
+ vp10_tx_block_rd_b(cpi, x, tx_size, 0, 0, 0, 0, bsize, coeff_ctx,
+ &this_rate, &this_dist, &pnsse, &pnskip);
+#else
+ vp10_txfm_rd_in_plane_supertx(x, cpi, &this_rate, &this_dist, &pnskip,
+ &pnsse, INT64_MAX, 0, bsize, tx_size, 0);
+#endif // CONFIG_VAR_TX
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(tx_size, bsize, 1) > 1 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id] && this_rate != INT_MAX) {
+ if (ext_tx_set > 0)
+ this_rate +=
+ cpi->inter_tx_type_costs[ext_tx_set][mbmi->tx_size][mbmi->tx_type];
+ }
+#else
+ if (tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+ this_rate != INT_MAX) {
+ this_rate += cpi->inter_tx_type_costs[tx_size][mbmi->tx_type];
+ }
+#endif // CONFIG_EXT_TX
+ *tmp_rate = rate_uv + this_rate;
+ *tmp_dist = dist_uv + this_dist;
+ sse = sse_uv + pnsse;
+ skippable = skippable_uv && pnskip;
+ if (skippable) {
+ *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ x->skip = 1;
+ } else {
+ if (RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist) <
+ RDCOST(x->rdmult, x->rddiv, 0, sse)) {
+ *tmp_rate += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
+ x->skip = 0;
+ } else {
+ *tmp_dist = sse;
+ *tmp_rate = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ x->skip = 1;
+ }
+ }
+ *tmp_rate += base_rate;
+ rd_tx = RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist);
+ if (rd_tx < bestrd_tx * 0.99 || tx_type == DCT_DCT) {
+ *best_tx = tx_type;
+ bestrd_tx = rd_tx;
+ tmp_rate_tx = *tmp_rate;
+ tmp_dist_tx = *tmp_dist;
+ skip_tx = x->skip;
+ }
+ }
+ *tmp_rate = tmp_rate_tx;
+ *tmp_dist = tmp_dist_tx;
+ x->skip = skip_tx;
+#if CONFIG_VAR_TX
+ for (plane = 0; plane < 1; ++plane)
+ memset(x->blk_skip[plane], x->skip,
+ sizeof(uint8_t) * pc_tree->none.num_4x4_blk);
+#endif // CONFIG_VAR_TX
+ xd->mi[0]->mbmi.tx_type = best_tx_nostx;
+}
+#endif // CONFIG_SUPERTX
diff --git a/av1/encoder/encodeframe.h b/av1/encoder/encodeframe.h
new file mode 100644
index 0000000..338cb86
--- /dev/null
+++ b/av1/encoder/encodeframe.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_ENCODEFRAME_H_
+#define VP10_ENCODER_ENCODEFRAME_H_
+
+#include "aom/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct macroblock;
+struct yv12_buffer_config;
+struct VP10_COMP;
+struct ThreadData;
+
+// Constants used in SOURCE_VAR_BASED_PARTITION
+#define VAR_HIST_MAX_BG_VAR 1000
+#define VAR_HIST_FACTOR 10
+#define VAR_HIST_BINS (VAR_HIST_MAX_BG_VAR / VAR_HIST_FACTOR + 1)
+#define VAR_HIST_LARGE_CUT_OFF 75
+#define VAR_HIST_SMALL_CUT_OFF 45
+
+void vp10_setup_src_planes(struct macroblock *x,
+ const struct yv12_buffer_config *src, int mi_row,
+ int mi_col);
+
+void vp10_encode_frame(struct VP10_COMP *cpi);
+
+void vp10_init_tile_data(struct VP10_COMP *cpi);
+void vp10_encode_tile(struct VP10_COMP *cpi, struct ThreadData *td,
+ int tile_row, int tile_col);
+
+void vp10_set_variance_partition_thresholds(struct VP10_COMP *cpi, int q);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_ENCODEFRAME_H_
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
new file mode 100644
index 0000000..e72db2d
--- /dev/null
+++ b/av1/encoder/encodemb.c
@@ -0,0 +1,1158 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "aom_dsp/quantize.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+
+#include "av1/common/idct.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/reconintra.h"
+#include "av1/common/scan.h"
+
+#include "av1/encoder/encodemb.h"
+#include "av1/encoder/hybrid_fwd_txfm.h"
+#include "av1/encoder/quantize.h"
+#include "av1/encoder/rd.h"
+#include "av1/encoder/tokenize.h"
+
+void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
+ struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
+ p->src.stride, pd->dst.buf, pd->dst.stride,
+ x->e_mbd.bd);
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride);
+}
+
+typedef struct vp10_token_state {
+ int rate;
+ int64_t error;
+ int next;
+ int16_t token;
+ tran_low_t qc;
+ tran_low_t dqc;
+} vp10_token_state;
+
+// These numbers are empirically obtained.
+static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
+ { 10, 6 }, { 8, 5 },
+};
+
+#define UPDATE_RD_COST() \
+ { \
+ rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); \
+ rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \
+ }
+
+int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
+ int ctx) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ struct macroblock_plane *const p = &mb->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int ref = is_inter_block(&xd->mi[0]->mbmi);
+ vp10_token_state tokens[MAX_TX_SQUARE + 1][2];
+ unsigned best_index[MAX_TX_SQUARE + 1][2];
+ uint8_t token_cache[MAX_TX_SQUARE];
+ const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
+ tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ const int eob = p->eobs[block];
+ const PLANE_TYPE type = pd->plane_type;
+ const int default_eob = get_tx2d_size(tx_size);
+ const int16_t *const dequant_ptr = pd->dequant;
+ const uint8_t *const band_translate = get_band_translate(tx_size);
+ TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
+ const scan_order *const so =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+ const int16_t *const scan = so->scan;
+ const int16_t *const nb = so->neighbors;
+#if CONFIG_AOM_QM
+ int seg_id = xd->mi[0]->mbmi.segment_id;
+ int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
+ const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
+#endif
+ const int shift = get_tx_scale(xd, tx_type, tx_size);
+#if CONFIG_NEW_QUANT
+ int dq = get_dq_profile_from_ctx(ctx);
+ const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
+#else
+ const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
+#endif // CONFIG_NEW_QUANT
+ int next = eob, sz = 0;
+ const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][type]) >> 1;
+ const int64_t rddiv = mb->rddiv;
+ int64_t rd_cost0, rd_cost1;
+ int rate0, rate1;
+ int64_t error0, error1;
+ int16_t t0, t1;
+ int best, band = (eob < default_eob) ? band_translate[eob]
+ : band_translate[eob - 1];
+ int pt, i, final_eob;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
+#else
+ const int *cat6_high_cost = vp10_get_high_cost_table(8);
+#endif
+ unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
+ mb->token_costs[txsize_sqr_map[tx_size]][type][ref];
+ const uint16_t *band_counts = &band_count_table[tx_size][band];
+ uint16_t band_left = eob - band_cum_count_table[tx_size][band] + 1;
+ int shortcut = 0;
+ int next_shortcut = 0;
+
+ token_costs += band;
+
+ assert((!type && !plane) || (type && plane));
+ assert(eob <= default_eob);
+
+ /* Now set up a Viterbi trellis to evaluate alternative roundings. */
+ /* Initialize the sentinel node of the trellis. */
+ tokens[eob][0].rate = 0;
+ tokens[eob][0].error = 0;
+ tokens[eob][0].next = default_eob;
+ tokens[eob][0].token = EOB_TOKEN;
+ tokens[eob][0].qc = 0;
+ tokens[eob][1] = tokens[eob][0];
+
+ for (i = 0; i < eob; i++) {
+ const int rc = scan[i];
+ tokens[i][0].rate = vp10_get_token_cost(qcoeff[rc], &t0, cat6_high_cost);
+ tokens[i][0].token = t0;
+ token_cache[rc] = vp10_pt_energy_class[t0];
+ }
+
+ for (i = eob; i-- > 0;) {
+ int base_bits, dx;
+ int64_t d2;
+ const int rc = scan[i];
+#if CONFIG_AOM_QM
+ int iwt = iqmatrix[rc];
+#endif
+ int x = qcoeff[rc];
+ next_shortcut = shortcut;
+
+ /* Only add a trellis state for non-zero coefficients. */
+ if (UNLIKELY(x)) {
+ error0 = tokens[next][0].error;
+ error1 = tokens[next][1].error;
+ /* Evaluate the first possibility for this state. */
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+
+ if (next_shortcut) {
+ /* Consider both possible successor states. */
+ if (next < default_eob) {
+ pt = get_coef_context(nb, token_cache, i + 1);
+ rate0 += (*token_costs)[0][pt][tokens[next][0].token];
+ rate1 += (*token_costs)[0][pt][tokens[next][1].token];
+ }
+ UPDATE_RD_COST();
+ /* And pick the best. */
+ best = rd_cost1 < rd_cost0;
+ } else {
+ if (next < default_eob) {
+ pt = get_coef_context(nb, token_cache, i + 1);
+ rate0 += (*token_costs)[0][pt][tokens[next][0].token];
+ }
+ best = 0;
+ }
+
+ dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ dx >>= xd->bd - 8;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ d2 = (int64_t)dx * dx;
+ tokens[i][0].rate += (best ? rate1 : rate0);
+ tokens[i][0].error = d2 + (best ? error1 : error0);
+ tokens[i][0].next = next;
+ tokens[i][0].qc = x;
+ tokens[i][0].dqc = dqcoeff[rc];
+ best_index[i][0] = best;
+
+ /* Evaluate the second possibility for this state. */
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+
+ // The threshold of 3 is empirically obtained.
+ if (UNLIKELY(abs(x) > 3)) {
+ shortcut = 0;
+ } else {
+#if CONFIG_NEW_QUANT
+ shortcut = ((vp10_dequant_abscoeff_nuq(abs(x), dequant_ptr[rc != 0],
+ dequant_val[band_translate[i]]) >
+ (abs(coeff[rc]) << shift)) &&
+ (vp10_dequant_abscoeff_nuq(abs(x) - 1, dequant_ptr[rc != 0],
+ dequant_val[band_translate[i]]) <
+ (abs(coeff[rc]) << shift)));
+#else // CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+ if ((abs(x) * dequant_ptr[rc != 0] * iwt >
+ ((abs(coeff[rc]) << shift) << AOM_QM_BITS)) &&
+ (abs(x) * dequant_ptr[rc != 0] * iwt <
+ (((abs(coeff[rc]) << shift) + dequant_ptr[rc != 0])
+ << AOM_QM_BITS)))
+#else
+ if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
+ (abs(x) * dequant_ptr[rc != 0] <
+ (abs(coeff[rc]) << shift) + dequant_ptr[rc != 0]))
+#endif // CONFIG_AOM_QM
+ shortcut = 1;
+ else
+ shortcut = 0;
+#endif // CONFIG_NEW_QUANT
+ }
+
+ if (shortcut) {
+ sz = -(x < 0);
+ x -= 2 * sz + 1;
+ } else {
+ tokens[i][1] = tokens[i][0];
+ best_index[i][1] = best_index[i][0];
+ next = i;
+
+ if (UNLIKELY(!(--band_left))) {
+ --band_counts;
+ band_left = *band_counts;
+ --token_costs;
+ }
+ continue;
+ }
+
+ /* Consider both possible successor states. */
+ if (!x) {
+ /* If we reduced this coefficient to zero, check to see if
+ * we need to move the EOB back here.
+ */
+ t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
+ t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
+ base_bits = 0;
+ } else {
+ base_bits = vp10_get_token_cost(x, &t0, cat6_high_cost);
+ t1 = t0;
+ }
+
+ if (next_shortcut) {
+ if (LIKELY(next < default_eob)) {
+ if (t0 != EOB_TOKEN) {
+ token_cache[rc] = vp10_pt_energy_class[t0];
+ pt = get_coef_context(nb, token_cache, i + 1);
+ rate0 += (*token_costs)[!x][pt][tokens[next][0].token];
+ }
+ if (t1 != EOB_TOKEN) {
+ token_cache[rc] = vp10_pt_energy_class[t1];
+ pt = get_coef_context(nb, token_cache, i + 1);
+ rate1 += (*token_costs)[!x][pt][tokens[next][1].token];
+ }
+ }
+
+ UPDATE_RD_COST();
+ /* And pick the best. */
+ best = rd_cost1 < rd_cost0;
+ } else {
+ // The two states in next stage are identical.
+ if (next < default_eob && t0 != EOB_TOKEN) {
+ token_cache[rc] = vp10_pt_energy_class[t0];
+ pt = get_coef_context(nb, token_cache, i + 1);
+ rate0 += (*token_costs)[!x][pt][tokens[next][0].token];
+ }
+ best = 0;
+ }
+
+#if CONFIG_NEW_QUANT
+ dx = vp10_dequant_coeff_nuq(x, dequant_ptr[rc != 0],
+ dequant_val[band_translate[i]]) -
+ (coeff[rc] << shift);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ dx >>= xd->bd - 8;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#else // CONFIG_NEW_QUANT
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
+ } else {
+ dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
+ }
+#else
+ dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_NEW_QUANT
+ d2 = (int64_t)dx * dx;
+
+ tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
+ tokens[i][1].error = d2 + (best ? error1 : error0);
+ tokens[i][1].next = next;
+ tokens[i][1].token = best ? t1 : t0;
+ tokens[i][1].qc = x;
+
+ if (x) {
+#if CONFIG_NEW_QUANT
+ tokens[i][1].dqc = vp10_dequant_abscoeff_nuq(
+ abs(x), dequant_ptr[rc != 0], dequant_val[band_translate[i]]);
+ tokens[i][1].dqc = shift ? ROUND_POWER_OF_TWO(tokens[i][1].dqc, shift)
+ : tokens[i][1].dqc;
+ if (sz) tokens[i][1].dqc = -tokens[i][1].dqc;
+#else
+ tran_low_t offset = dq_step[rc != 0];
+ // The 32x32 transform coefficient uses half quantization step size.
+ // Account for the rounding difference in the dequantized coefficeint
+ // value when the quantization index is dropped from an even number
+ // to an odd number.
+ if (shift & x) offset += (dequant_ptr[rc != 0] & 0x01);
+
+ if (sz == 0)
+ tokens[i][1].dqc = dqcoeff[rc] - offset;
+ else
+ tokens[i][1].dqc = dqcoeff[rc] + offset;
+#endif // CONFIG_NEW_QUANT
+ } else {
+ tokens[i][1].dqc = 0;
+ }
+
+ best_index[i][1] = best;
+ /* Finally, make this the new head of the trellis. */
+ next = i;
+ } else {
+ /* There's no choice to make for a zero coefficient, so we don't
+ * add a new trellis node, but we do need to update the costs.
+ */
+ t0 = tokens[next][0].token;
+ t1 = tokens[next][1].token;
+ pt = get_coef_context(nb, token_cache, i + 1);
+ /* Update the cost of each path if we're past the EOB token. */
+ if (t0 != EOB_TOKEN) {
+ tokens[next][0].rate += (*token_costs)[1][pt][t0];
+ tokens[next][0].token = ZERO_TOKEN;
+ }
+ if (t1 != EOB_TOKEN) {
+ tokens[next][1].rate += (*token_costs)[1][pt][t1];
+ tokens[next][1].token = ZERO_TOKEN;
+ }
+ best_index[i][0] = best_index[i][1] = 0;
+ shortcut = (tokens[next][0].rate != tokens[next][1].rate);
+ /* Don't update next, because we didn't add a new node. */
+ }
+
+ if (UNLIKELY(!(--band_left))) {
+ --band_counts;
+ band_left = *band_counts;
+ --token_costs;
+ }
+ }
+
+ /* Now pick the best path through the whole trellis. */
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+ error0 = tokens[next][0].error;
+ error1 = tokens[next][1].error;
+ t0 = tokens[next][0].token;
+ t1 = tokens[next][1].token;
+ rate0 += (*token_costs)[0][ctx][t0];
+ rate1 += (*token_costs)[0][ctx][t1];
+ UPDATE_RD_COST();
+ best = rd_cost1 < rd_cost0;
+
+ final_eob = -1;
+
+ for (i = next; i < eob; i = next) {
+ const int x = tokens[i][best].qc;
+ const int rc = scan[i];
+#if CONFIG_AOM_QM
+ const int iwt = iqmatrix[rc];
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+#endif
+
+ if (x) final_eob = i;
+ qcoeff[rc] = x;
+ dqcoeff[rc] = tokens[i][best].dqc;
+
+ next = tokens[i][best].next;
+ best = best_index[i][best];
+ }
+ final_eob++;
+
+ mb->plane[plane].eobs[block] = final_eob;
+ assert(final_eob <= default_eob);
+ return final_eob;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef enum QUANT_FUNC {
+ QUANT_FUNC_LOWBD = 0,
+ QUANT_FUNC_HIGHBD = 1,
+ QUANT_FUNC_LAST = 2
+} QUANT_FUNC;
+
+static VP10_QUANT_FACADE
+ quant_func_list[VP10_XFORM_QUANT_LAST][QUANT_FUNC_LAST] = {
+ { vp10_quantize_fp_facade, vp10_highbd_quantize_fp_facade },
+ { vp10_quantize_b_facade, vp10_highbd_quantize_b_facade },
+ { vp10_quantize_dc_facade, vp10_highbd_quantize_dc_facade },
+ { NULL, NULL }
+ };
+
+#else
+typedef enum QUANT_FUNC {
+ QUANT_FUNC_LOWBD = 0,
+ QUANT_FUNC_LAST = 1
+} QUANT_FUNC;
+
+static VP10_QUANT_FACADE
+ quant_func_list[VP10_XFORM_QUANT_LAST][QUANT_FUNC_LAST] = {
+ { vp10_quantize_fp_facade },
+ { vp10_quantize_b_facade },
+ { vp10_quantize_dc_facade },
+ { NULL }
+ };
+#endif
+
+static FWD_TXFM_OPT fwd_txfm_opt_list[VP10_XFORM_QUANT_LAST] = {
+ FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_DC, FWD_TXFM_OPT_NORMAL
+};
+
+void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ VP10_XFORM_QUANT xform_quant_idx) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *const scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint16_t *const eob = &p->eobs[block];
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+#if CONFIG_AOM_QM
+ int seg_id = xd->mi[0]->mbmi.segment_id;
+ int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
+ const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size];
+ const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
+#endif
+ const int16_t *src_diff;
+ const int tx2d_size = get_tx2d_size(tx_size);
+
+ FWD_TXFM_PARAM fwd_txfm_param;
+ QUANT_PARAM qparam;
+
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[xform_quant_idx];
+ fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+ fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+ qparam.log_scale = get_tx_scale(xd, tx_type, tx_size);
+#if CONFIG_VP9_HIGHBITDEPTH
+ fwd_txfm_param.bd = xd->bd;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (xform_quant_idx != VP10_XFORM_QUANT_SKIP_QUANT) {
+ if (LIKELY(!x->skip_block)) {
+ quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD](
+ coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam
+#if CONFIG_AOM_QM
+ ,
+ qmatrix, iqmatrix
+#endif // CONFIG_AOM_QM
+ );
+ } else {
+ vp10_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
+ }
+ }
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (xform_quant_idx != VP10_XFORM_QUANT_SKIP_QUANT) {
+ if (LIKELY(!x->skip_block)) {
+ quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD](
+ coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam
+#if CONFIG_AOM_QM
+ ,
+ qmatrix, iqmatrix
+#endif // CONFIG_AOM_QM
+ );
+ } else {
+ vp10_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
+ }
+ }
+}
+
+#if CONFIG_NEW_QUANT
+void vp10_xform_quant_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ int ctx) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *const scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ int dq = get_dq_profile_from_ctx(ctx);
+ uint16_t *const eob = &p->eobs[block];
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int16_t *src_diff;
+ const uint8_t *band = get_band_translate(tx_size);
+
+ FWD_TXFM_PARAM fwd_txfm_param;
+
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_FP];
+ fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+ fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+// TODO(sarahparker) add all of these new quant quantize functions
+// to quant_func_list, just trying to get this expr to work for now
+#if CONFIG_VP9_HIGHBITDEPTH
+ fwd_txfm_param.bd = xd->bd;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ highbd_quantize_32x32_nuq(
+ coeff, get_tx2d_size(tx_size), x->skip_block, p->quant,
+ p->quant_shift, pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff,
+ dqcoeff, eob, scan_order->scan, band);
+ } else {
+ highbd_quantize_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant, p->quant_shift, pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
+ qcoeff, dqcoeff, eob, scan_order->scan, band);
+ }
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ quantize_32x32_nuq(coeff, 1024, x->skip_block, p->quant, p->quant_shift,
+ pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
+ qcoeff, dqcoeff, eob, scan_order->scan, band);
+ } else {
+ quantize_nuq(coeff, get_tx2d_size(tx_size), x->skip_block, p->quant,
+ p->quant_shift, pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff,
+ dqcoeff, eob, scan_order->scan, band);
+ }
+}
+
+void vp10_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, int ctx) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ int dq = get_dq_profile_from_ctx(ctx);
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *const scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint16_t *const eob = &p->eobs[block];
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int16_t *src_diff;
+ const uint8_t *band = get_band_translate(tx_size);
+
+ FWD_TXFM_PARAM fwd_txfm_param;
+
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_FP];
+ fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+ fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+// TODO(sarahparker) add all of these new quant quantize functions
+// to quant_func_list, just trying to get this expr to work for now
+#if CONFIG_VP9_HIGHBITDEPTH
+ fwd_txfm_param.bd = xd->bd;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ highbd_quantize_32x32_fp_nuq(
+ coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp,
+ pd->dequant, (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff,
+ dqcoeff, eob, scan_order->scan, band);
+ } else {
+ highbd_quantize_fp_nuq(
+ coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp,
+ pd->dequant, (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff,
+ dqcoeff, eob, scan_order->scan, band);
+ }
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ quantize_32x32_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant_fp, pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
+ qcoeff, dqcoeff, eob, scan_order->scan, band);
+ } else {
+ quantize_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp,
+ pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
+ qcoeff, dqcoeff, eob, scan_order->scan, band);
+ }
+}
+
+void vp10_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, int ctx) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint16_t *const eob = &p->eobs[block];
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int16_t *src_diff;
+ int dq = get_dq_profile_from_ctx(ctx);
+
+ FWD_TXFM_PARAM fwd_txfm_param;
+
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_DC];
+ fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+ fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+// TODO(sarahparker) add all of these new quant quantize functions
+// to quant_func_list, just trying to get this expr to work for now
+#if CONFIG_VP9_HIGHBITDEPTH
+ fwd_txfm_param.bd = xd->bd;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ highbd_quantize_dc_32x32_nuq(
+ coeff, get_tx2d_size(tx_size), x->skip_block, p->quant[0],
+ p->quant_shift[0], pd->dequant[0], p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
+ } else {
+ highbd_quantize_dc_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant[0], p->quant_shift[0], pd->dequant[0],
+ p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
+ }
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ quantize_dc_32x32_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant[0], p->quant_shift[0], pd->dequant[0],
+ p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
+ } else {
+ quantize_dc_nuq(coeff, get_tx2d_size(tx_size), x->skip_block, p->quant[0],
+ p->quant_shift[0], pd->dequant[0], p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
+ }
+}
+
+void vp10_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block,
+ int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ int ctx) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint16_t *const eob = &p->eobs[block];
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int16_t *src_diff;
+ int dq = get_dq_profile_from_ctx(ctx);
+
+ FWD_TXFM_PARAM fwd_txfm_param;
+
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_DC];
+ fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+ fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+// TODO(sarahparker) add all of these new quant quantize functions
+// to quant_func_list, just trying to get this expr to work for now
+#if CONFIG_VP9_HIGHBITDEPTH
+ fwd_txfm_param.bd = xd->bd;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ highbd_quantize_dc_32x32_fp_nuq(
+ coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp[0],
+ pd->dequant[0], p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
+ } else {
+ highbd_quantize_dc_fp_nuq(
+ coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp[0],
+ pd->dequant[0], p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
+ }
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ quantize_dc_32x32_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant_fp[0], pd->dequant[0],
+ p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
+ } else {
+ quantize_dc_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant_fp[0], pd->dequant[0], p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
+ }
+}
+#endif // CONFIG_NEW_QUANT
+
+static void encode_block(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
+ struct encode_b_args *const args = arg;
+ MACROBLOCK *const x = args->x;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int ctx;
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint8_t *dst;
+ ENTROPY_CONTEXT *a, *l;
+ INV_TXFM_PARAM inv_txfm_param;
+#if CONFIG_VAR_TX
+ int i;
+ const int bwl = b_width_log2_lookup[plane_bsize];
+#endif
+ dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
+ a = &args->ta[blk_col];
+ l = &args->tl[blk_row];
+#if CONFIG_VAR_TX
+ ctx = get_entropy_context(tx_size, a, l);
+#else
+ ctx = combine_entropy_contexts(*a, *l);
+#endif
+
+#if CONFIG_VAR_TX
+ // Assert not magic number (uninitialised).
+ assert(x->blk_skip[plane][(blk_row << bwl) + blk_col] != 234);
+
+ if (x->blk_skip[plane][(blk_row << bwl) + blk_col] == 0) {
+#else
+ {
+#endif
+#if CONFIG_NEW_QUANT
+ vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, ctx);
+#else
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_FP);
+#endif // CONFIG_NEW_QUANT
+ }
+#if CONFIG_VAR_TX
+ else {
+ p->eobs[block] = 0;
+ }
+#endif
+
+ if (p->eobs[block]) {
+ *a = *l = vp10_optimize_b(x, plane, block, tx_size, ctx) > 0;
+ } else {
+ *a = *l = p->eobs[block] > 0;
+ }
+
+#if CONFIG_VAR_TX
+ for (i = 0; i < num_4x4_blocks_wide_txsize_lookup[tx_size]; ++i) {
+ a[i] = a[0];
+ }
+ for (i = 0; i < num_4x4_blocks_high_txsize_lookup[tx_size]; ++i) {
+ l[i] = l[0];
+ }
+#endif
+
+ if (p->eobs[block]) *(args->skip) = 0;
+
+ if (p->eobs[block] == 0) return;
+
+ // inverse transform parameters
+ inv_txfm_param.tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
+ inv_txfm_param.tx_size = tx_size;
+ inv_txfm_param.eob = p->eobs[block];
+ inv_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ inv_txfm_param.bd = xd->bd;
+ highbd_inv_txfm_add(dqcoeff, dst, pd->dst.stride, &inv_txfm_param);
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ inv_txfm_add(dqcoeff, dst, pd->dst.stride, &inv_txfm_param);
+}
+
+#if CONFIG_VAR_TX
+static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ void *arg) {
+ struct encode_b_args *const args = arg;
+ MACROBLOCK *const x = args->x;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ TX_SIZE plane_tx_size;
+
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+ plane_tx_size = plane ? get_uv_tx_size_impl(
+ mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0)
+ : mbmi->inter_tx_size[tx_row][tx_col];
+
+ if (tx_size == plane_tx_size) {
+ encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+#if CONFIG_EXT_TX
+ assert(tx_size < TX_SIZES);
+#endif // CONFIG_EXT_TX
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+ int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
+
+ encode_block_inter(plane, block + i * step, offsetr, offsetc, plane_bsize,
+ tx_size - 1, arg);
+ }
+ }
+}
+#endif
+
+static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ void *arg) {
+ MACROBLOCK *const x = (MACROBLOCK *)arg;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint8_t *dst;
+#if CONFIG_NEW_QUANT
+ int ctx;
+#endif // CONFIG_NEW_QUANT
+ dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
+
+#if CONFIG_NEW_QUANT
+ ctx = 0;
+ vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, ctx);
+#else
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_B);
+#endif // CONFIG_NEW_QUANT
+
+ if (p->eobs[block] > 0) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ vp10_highbd_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
+ xd->bd);
+ } else {
+ vp10_highbd_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
+ xd->bd);
+ }
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ vp10_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+ } else {
+ vp10_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+ }
+ }
+}
+
+void vp10_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
+ vp10_subtract_plane(x, bsize, 0);
+ vp10_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
+ encode_block_pass1, x);
+}
+
+void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct optimize_ctx ctx;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ struct encode_b_args arg = { x, &ctx, &mbmi->skip, NULL, NULL, 1 };
+ int plane;
+
+ mbmi->skip = 1;
+
+ if (x->skip) return;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_VAR_TX
+ // TODO(jingning): Clean this up.
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ const int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+ int block = 0;
+ int step = num_4x4_blocks_txsize_lookup[max_tx_size];
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
+#endif
+ vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctx.ta[plane], ctx.tl[plane]);
+#else
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
+ vp10_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
+#endif
+ vp10_subtract_plane(x, bsize, plane);
+ arg.ta = ctx.ta[plane];
+ arg.tl = ctx.tl[plane];
+
+#if CONFIG_VAR_TX
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ if (tx_size >= TX_SIZES) {
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
+ &arg);
+ } else {
+#endif
+ for (idy = 0; idy < mi_height; idy += bh) {
+ for (idx = 0; idx < mi_width; idx += bh) {
+ encode_block_inter(plane, block, idy, idx, plane_bsize, max_tx_size,
+ &arg);
+ block += step;
+ }
+ }
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ }
+#endif
+#else
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
+ &arg);
+#endif
+ }
+}
+
+#if CONFIG_SUPERTX
+void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct optimize_ctx ctx;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ struct encode_b_args arg = { x, &ctx, &mbmi->skip, NULL, NULL, 1 };
+ int plane;
+
+ mbmi->skip = 1;
+ if (x->skip) return;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+#if CONFIG_VAR_TX
+ const TX_SIZE tx_size = TX_4X4;
+#else
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
+#endif
+ vp10_subtract_plane(x, bsize, plane);
+ vp10_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
+ arg.ta = ctx.ta[plane];
+ arg.tl = ctx.tl[plane];
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
+ &arg);
+ }
+}
+#endif // CONFIG_SUPERTX
+
+void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ void *arg) {
+ struct encode_b_args *const args = arg;
+ MACROBLOCK *const x = args->x;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ PREDICTION_MODE mode;
+ const int bwl = b_width_log2_lookup[plane_bsize];
+ const int bhl = b_height_log2_lookup[plane_bsize];
+ const int diff_stride = 4 * (1 << bwl);
+ uint8_t *src, *dst;
+ int16_t *src_diff;
+ uint16_t *eob = &p->eobs[block];
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
+ const int tx1d_width = num_4x4_blocks_wide_txsize_lookup[tx_size] << 2;
+ const int tx1d_height = num_4x4_blocks_high_txsize_lookup[tx_size] << 2;
+ ENTROPY_CONTEXT *a = NULL, *l = NULL;
+ int ctx;
+
+ INV_TXFM_PARAM inv_txfm_param;
+
+ assert(tx1d_width == tx1d_height);
+
+ dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
+ src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
+ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+ mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
+ vp10_predict_intra_block(xd, bwl, bhl, tx_size, mode, dst, dst_stride, dst,
+ dst_stride, blk_col, blk_row, plane);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vpx_highbd_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride,
+ src, src_stride, dst, dst_stride, xd->bd);
+ } else {
+ vpx_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride, src,
+ src_stride, dst, dst_stride);
+ }
+#else
+ vpx_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride, src,
+ src_stride, dst, dst_stride);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ a = &args->ta[blk_col];
+ l = &args->tl[blk_row];
+ ctx = combine_entropy_contexts(*a, *l);
+
+ if (args->enable_optimize_b) {
+#if CONFIG_NEW_QUANT
+ vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, ctx);
+#else // CONFIG_NEW_QUANT
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_FP);
+#endif // CONFIG_NEW_QUANT
+ if (p->eobs[block]) {
+ *a = *l = vp10_optimize_b(x, plane, block, tx_size, ctx) > 0;
+ } else {
+ *a = *l = 0;
+ }
+ } else {
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_B);
+ *a = *l = p->eobs[block] > 0;
+ }
+
+ if (*eob) {
+ // inverse transform
+ inv_txfm_param.tx_type = tx_type;
+ inv_txfm_param.tx_size = tx_size;
+ inv_txfm_param.eob = *eob;
+ inv_txfm_param.lossless = xd->lossless[mbmi->segment_id];
+#if CONFIG_VP9_HIGHBITDEPTH
+ inv_txfm_param.bd = xd->bd;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_inv_txfm_add(dqcoeff, dst, dst_stride, &inv_txfm_param);
+ } else {
+ inv_txfm_add(dqcoeff, dst, dst_stride, &inv_txfm_param);
+ }
+#else
+ inv_txfm_add(dqcoeff, dst, dst_stride, &inv_txfm_param);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ *(args->skip) = 0;
+ }
+}
+
+void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
+ int enable_optimize_b) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
+ ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
+
+ struct encode_b_args arg = { x, NULL, &xd->mi[0]->mbmi.skip,
+ ta, tl, enable_optimize_b };
+ if (enable_optimize_b) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const TX_SIZE tx_size =
+ plane ? get_uv_tx_size(&xd->mi[0]->mbmi, pd) : xd->mi[0]->mbmi.tx_size;
+ vp10_get_entropy_contexts(bsize, tx_size, pd, ta, tl);
+ }
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane,
+ vp10_encode_block_intra, &arg);
+}
diff --git a/av1/encoder/encodemb.c.orig b/av1/encoder/encodemb.c.orig
new file mode 100644
index 0000000..4c94032
--- /dev/null
+++ b/av1/encoder/encodemb.c.orig
@@ -0,0 +1,1158 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "aom_dsp/quantize.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+
+#include "av1/common/idct.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/reconintra.h"
+#include "av1/common/scan.h"
+
+#include "av1/encoder/encodemb.h"
+#include "av1/encoder/hybrid_fwd_txfm.h"
+#include "av1/encoder/quantize.h"
+#include "av1/encoder/rd.h"
+#include "av1/encoder/tokenize.h"
+
+void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
+ struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
+ p->src.stride, pd->dst.buf, pd->dst.stride,
+ x->e_mbd.bd);
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride);
+}
+
+typedef struct vp10_token_state {
+ int rate;
+ int64_t error;
+ int next;
+ int16_t token;
+ tran_low_t qc;
+ tran_low_t dqc;
+} vp10_token_state;
+
+// These numbers are empirically obtained.
+static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
+ { 10, 6 }, { 8, 5 },
+};
+
+#define UPDATE_RD_COST() \
+ { \
+ rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); \
+ rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \
+ }
+
+int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
+ int ctx) {
+ MACROBLOCKD *const xd = &mb->e_mbd;
+ struct macroblock_plane *const p = &mb->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int ref = is_inter_block(&xd->mi[0]->mbmi);
+ vp10_token_state tokens[MAX_TX_SQUARE + 1][2];
+ unsigned best_index[MAX_TX_SQUARE + 1][2];
+ uint8_t token_cache[MAX_TX_SQUARE];
+ const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
+ tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ const int eob = p->eobs[block];
+ const PLANE_TYPE type = pd->plane_type;
+ const int default_eob = get_tx2d_size(tx_size);
+ const int16_t *const dequant_ptr = pd->dequant;
+ const uint8_t *const band_translate = get_band_translate(tx_size);
+ TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
+ const scan_order *const so =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+ const int16_t *const scan = so->scan;
+ const int16_t *const nb = so->neighbors;
+<<<<<<< HEAD
+ const int shift = get_tx_scale(xd, tx_type, tx_size);
+=======
+#if CONFIG_AOM_QM
+ int seg_id = xd->mi[0]->mbmi.segment_id;
+ int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
+ const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
+#endif
+#if CONFIG_AOM_QM
+ int seg_id = xd->mi[0]->mbmi.segment_id;
+ int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
+ const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
+#endif
+>>>>>>> 10d6f02... Port commits related to clpf and qm experiments
+#if CONFIG_NEW_QUANT
+ int dq = get_dq_profile_from_ctx(ctx);
+ const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
+#else
+ const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
+#endif // CONFIG_NEW_QUANT
+ int next = eob, sz = 0;
+ const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][type]) >> 1;
+ const int64_t rddiv = mb->rddiv;
+ int64_t rd_cost0, rd_cost1;
+ int rate0, rate1;
+ int64_t error0, error1;
+ int16_t t0, t1;
+ int best, band = (eob < default_eob) ? band_translate[eob]
+ : band_translate[eob - 1];
+ int pt, i, final_eob;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
+#else
+ const int *cat6_high_cost = vp10_get_high_cost_table(8);
+#endif
+ unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
+ mb->token_costs[txsize_sqr_map[tx_size]][type][ref];
+ const uint16_t *band_counts = &band_count_table[tx_size][band];
+ uint16_t band_left = eob - band_cum_count_table[tx_size][band] + 1;
+ int shortcut = 0;
+ int next_shortcut = 0;
+
+ token_costs += band;
+
+ assert((!type && !plane) || (type && plane));
+ assert(eob <= default_eob);
+
+ /* Now set up a Viterbi trellis to evaluate alternative roundings. */
+ /* Initialize the sentinel node of the trellis. */
+ tokens[eob][0].rate = 0;
+ tokens[eob][0].error = 0;
+ tokens[eob][0].next = default_eob;
+ tokens[eob][0].token = EOB_TOKEN;
+ tokens[eob][0].qc = 0;
+ tokens[eob][1] = tokens[eob][0];
+
+ for (i = 0; i < eob; i++) {
+ const int rc = scan[i];
+ tokens[i][0].rate = vp10_get_token_cost(qcoeff[rc], &t0, cat6_high_cost);
+ tokens[i][0].token = t0;
+ token_cache[rc] = vp10_pt_energy_class[t0];
+ }
+
+ for (i = eob; i-- > 0;) {
+ int base_bits, dx;
+ int64_t d2;
+ const int rc = scan[i];
+#if CONFIG_AOM_QM
+ int iwt = iqmatrix[rc];
+#endif
+ int x = qcoeff[rc];
+ next_shortcut = shortcut;
+
+ /* Only add a trellis state for non-zero coefficients. */
+ if (UNLIKELY(x)) {
+ error0 = tokens[next][0].error;
+ error1 = tokens[next][1].error;
+ /* Evaluate the first possibility for this state. */
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+
+ if (next_shortcut) {
+ /* Consider both possible successor states. */
+ if (next < default_eob) {
+ pt = get_coef_context(nb, token_cache, i + 1);
+ rate0 += (*token_costs)[0][pt][tokens[next][0].token];
+ rate1 += (*token_costs)[0][pt][tokens[next][1].token];
+ }
+ UPDATE_RD_COST();
+ /* And pick the best. */
+ best = rd_cost1 < rd_cost0;
+ } else {
+ if (next < default_eob) {
+ pt = get_coef_context(nb, token_cache, i + 1);
+ rate0 += (*token_costs)[0][pt][tokens[next][0].token];
+ }
+ best = 0;
+ }
+
+ dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ dx >>= xd->bd - 8;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ d2 = (int64_t)dx * dx;
+ tokens[i][0].rate += (best ? rate1 : rate0);
+ tokens[i][0].error = d2 + (best ? error1 : error0);
+ tokens[i][0].next = next;
+ tokens[i][0].qc = x;
+ tokens[i][0].dqc = dqcoeff[rc];
+ best_index[i][0] = best;
+
+ /* Evaluate the second possibility for this state. */
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+
+ // The threshold of 3 is empirically obtained.
+ if (UNLIKELY(abs(x) > 3)) {
+ shortcut = 0;
+ } else {
+#if CONFIG_NEW_QUANT
+ shortcut = ((vp10_dequant_abscoeff_nuq(abs(x), dequant_ptr[rc != 0],
+ dequant_val[band_translate[i]]) >
+ (abs(coeff[rc]) << shift)) &&
+ (vp10_dequant_abscoeff_nuq(abs(x) - 1, dequant_ptr[rc != 0],
+ dequant_val[band_translate[i]]) <
+ (abs(coeff[rc]) << shift)));
+#else // CONFIG_NEW_QUANT
+#if CONFIG_AOM_QM
+ if ((abs(x) * dequant_ptr[rc != 0] * iwt >
+ ((abs(coeff[rc]) << shift) << AOM_QM_BITS)) &&
+ (abs(x) * dequant_ptr[rc != 0] * iwt <
+ (((abs(coeff[rc]) << shift) + dequant_ptr[rc != 0]) << AOM_QM_BITS)))
+#else
+ if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
+ (abs(x) * dequant_ptr[rc != 0] <
+ (abs(coeff[rc]) << shift) + dequant_ptr[rc != 0]))
+#endif // CONFIG_AOM_QM
+ shortcut = 1;
+ else
+ shortcut = 0;
+#endif // CONFIG_NEW_QUANT
+ }
+
+ if (shortcut) {
+ sz = -(x < 0);
+ x -= 2 * sz + 1;
+ } else {
+ tokens[i][1] = tokens[i][0];
+ best_index[i][1] = best_index[i][0];
+ next = i;
+
+ if (UNLIKELY(!(--band_left))) {
+ --band_counts;
+ band_left = *band_counts;
+ --token_costs;
+ }
+ continue;
+ }
+
+ /* Consider both possible successor states. */
+ if (!x) {
+ /* If we reduced this coefficient to zero, check to see if
+ * we need to move the EOB back here.
+ */
+ t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
+ t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
+ base_bits = 0;
+ } else {
+ base_bits = vp10_get_token_cost(x, &t0, cat6_high_cost);
+ t1 = t0;
+ }
+
+ if (next_shortcut) {
+ if (LIKELY(next < default_eob)) {
+ if (t0 != EOB_TOKEN) {
+ token_cache[rc] = vp10_pt_energy_class[t0];
+ pt = get_coef_context(nb, token_cache, i + 1);
+ rate0 += (*token_costs)[!x][pt][tokens[next][0].token];
+ }
+ if (t1 != EOB_TOKEN) {
+ token_cache[rc] = vp10_pt_energy_class[t1];
+ pt = get_coef_context(nb, token_cache, i + 1);
+ rate1 += (*token_costs)[!x][pt][tokens[next][1].token];
+ }
+ }
+
+ UPDATE_RD_COST();
+ /* And pick the best. */
+ best = rd_cost1 < rd_cost0;
+ } else {
+ // The two states in next stage are identical.
+ if (next < default_eob && t0 != EOB_TOKEN) {
+ token_cache[rc] = vp10_pt_energy_class[t0];
+ pt = get_coef_context(nb, token_cache, i + 1);
+ rate0 += (*token_costs)[!x][pt][tokens[next][0].token];
+ }
+ best = 0;
+ }
+
+#if CONFIG_NEW_QUANT
+ dx = vp10_dequant_coeff_nuq(x, dequant_ptr[rc != 0],
+ dequant_val[band_translate[i]]) -
+ (coeff[rc] << shift);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ dx >>= xd->bd - 8;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#else // CONFIG_NEW_QUANT
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
+ } else {
+ dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
+ }
+#else
+ dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_NEW_QUANT
+ d2 = (int64_t)dx * dx;
+
+ tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
+ tokens[i][1].error = d2 + (best ? error1 : error0);
+ tokens[i][1].next = next;
+ tokens[i][1].token = best ? t1 : t0;
+ tokens[i][1].qc = x;
+
+ if (x) {
+#if CONFIG_NEW_QUANT
+ tokens[i][1].dqc = vp10_dequant_abscoeff_nuq(
+ abs(x), dequant_ptr[rc != 0], dequant_val[band_translate[i]]);
+ tokens[i][1].dqc = shift ? ROUND_POWER_OF_TWO(tokens[i][1].dqc, shift)
+ : tokens[i][1].dqc;
+ if (sz) tokens[i][1].dqc = -tokens[i][1].dqc;
+#else
+ tran_low_t offset = dq_step[rc != 0];
+ // The 32x32 transform coefficient uses half quantization step size.
+ // Account for the rounding difference in the dequantized coefficeint
+ // value when the quantization index is dropped from an even number
+ // to an odd number.
+ if (shift & x) offset += (dequant_ptr[rc != 0] & 0x01);
+
+ if (sz == 0)
+ tokens[i][1].dqc = dqcoeff[rc] - offset;
+ else
+ tokens[i][1].dqc = dqcoeff[rc] + offset;
+#endif // CONFIG_NEW_QUANT
+ } else {
+ tokens[i][1].dqc = 0;
+ }
+
+ best_index[i][1] = best;
+ /* Finally, make this the new head of the trellis. */
+ next = i;
+ } else {
+ /* There's no choice to make for a zero coefficient, so we don't
+ * add a new trellis node, but we do need to update the costs.
+ */
+ t0 = tokens[next][0].token;
+ t1 = tokens[next][1].token;
+ pt = get_coef_context(nb, token_cache, i + 1);
+ /* Update the cost of each path if we're past the EOB token. */
+ if (t0 != EOB_TOKEN) {
+ tokens[next][0].rate += (*token_costs)[1][pt][t0];
+ tokens[next][0].token = ZERO_TOKEN;
+ }
+ if (t1 != EOB_TOKEN) {
+ tokens[next][1].rate += (*token_costs)[1][pt][t1];
+ tokens[next][1].token = ZERO_TOKEN;
+ }
+ best_index[i][0] = best_index[i][1] = 0;
+ shortcut = (tokens[next][0].rate != tokens[next][1].rate);
+ /* Don't update next, because we didn't add a new node. */
+ }
+
+ if (UNLIKELY(!(--band_left))) {
+ --band_counts;
+ band_left = *band_counts;
+ --token_costs;
+ }
+ }
+
+ /* Now pick the best path through the whole trellis. */
+ rate0 = tokens[next][0].rate;
+ rate1 = tokens[next][1].rate;
+ error0 = tokens[next][0].error;
+ error1 = tokens[next][1].error;
+ t0 = tokens[next][0].token;
+ t1 = tokens[next][1].token;
+ rate0 += (*token_costs)[0][ctx][t0];
+ rate1 += (*token_costs)[0][ctx][t1];
+ UPDATE_RD_COST();
+ best = rd_cost1 < rd_cost0;
+
+ final_eob = -1;
+
+ for (i = next; i < eob; i = next) {
+ const int x = tokens[i][best].qc;
+ const int rc = scan[i];
+#if CONFIG_AOM_QM
+ const int iwt = iqmatrix[rc];
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
+#endif
+
+ if (x) final_eob = i;
+ qcoeff[rc] = x;
+ dqcoeff[rc] = tokens[i][best].dqc;
+
+ next = tokens[i][best].next;
+ best = best_index[i][best];
+ }
+ final_eob++;
+
+ mb->plane[plane].eobs[block] = final_eob;
+ assert(final_eob <= default_eob);
+ return final_eob;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef enum QUANT_FUNC {
+ QUANT_FUNC_LOWBD = 0,
+ QUANT_FUNC_HIGHBD = 1,
+ QUANT_FUNC_LAST = 2
+} QUANT_FUNC;
+
+static VP10_QUANT_FACADE
+ quant_func_list[VP10_XFORM_QUANT_LAST][QUANT_FUNC_LAST] = {
+ { vp10_quantize_fp_facade, vp10_highbd_quantize_fp_facade },
+ { vp10_quantize_b_facade, vp10_highbd_quantize_b_facade },
+ { vp10_quantize_dc_facade, vp10_highbd_quantize_dc_facade },
+ { NULL, NULL }
+ };
+
+#else
+typedef enum QUANT_FUNC {
+ QUANT_FUNC_LOWBD = 0,
+ QUANT_FUNC_LAST = 1
+} QUANT_FUNC;
+
+static VP10_QUANT_FACADE
+ quant_func_list[VP10_XFORM_QUANT_LAST][QUANT_FUNC_LAST] = {
+ { vp10_quantize_fp_facade },
+ { vp10_quantize_b_facade },
+ { vp10_quantize_dc_facade },
+ { NULL }
+ };
+#endif
+
+static FWD_TXFM_OPT fwd_txfm_opt_list[VP10_XFORM_QUANT_LAST] = {
+ FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_DC, FWD_TXFM_OPT_NORMAL
+};
+
+void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ VP10_XFORM_QUANT xform_quant_idx) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *const scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint16_t *const eob = &p->eobs[block];
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+#if CONFIG_AOM_QM
+ int seg_id = xd->mi[0]->mbmi.segment_id;
+ int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
+ const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size];
+ const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
+#endif
+ const int16_t *src_diff;
+ const int tx2d_size = get_tx2d_size(tx_size);
+
+ FWD_TXFM_PARAM fwd_txfm_param;
+ QUANT_PARAM qparam;
+
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[xform_quant_idx];
+ fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+ fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+ qparam.log_scale = get_tx_scale(xd, tx_type, tx_size);
+#if CONFIG_VP9_HIGHBITDEPTH
+ fwd_txfm_param.bd = xd->bd;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (xform_quant_idx != VP10_XFORM_QUANT_SKIP_QUANT) {
+ if (LIKELY(!x->skip_block)) {
+ quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD](
+<<<<<<< HEAD
+ coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
+ } else {
+ vp10_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
+=======
+ coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam
+#if CONFIG_AOM_QM
+ , qmatrix, iqmatrix
+#endif // CONFIG_AOM_QM
+ );
+>>>>>>> 10d6f02... Port commits related to clpf and qm experiments
+ }
+ }
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (xform_quant_idx != VP10_XFORM_QUANT_SKIP_QUANT) {
+ if (LIKELY(!x->skip_block)) {
+ quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD](
+<<<<<<< HEAD
+ coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
+ } else {
+ vp10_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
+=======
+ coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam
+#if CONFIG_AOM_QM
+ , qmatrix, iqmatrix
+#endif // CONFIG_AOM_QM
+ );
+>>>>>>> 10d6f02... Port commits related to clpf and qm experiments
+ }
+ }
+}
+
+#if CONFIG_NEW_QUANT
+void vp10_xform_quant_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ int ctx) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *const scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ int dq = get_dq_profile_from_ctx(ctx);
+ uint16_t *const eob = &p->eobs[block];
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int16_t *src_diff;
+ const uint8_t *band = get_band_translate(tx_size);
+
+ FWD_TXFM_PARAM fwd_txfm_param;
+
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_FP];
+ fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+ fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+// TODO(sarahparker) add all of these new quant quantize functions
+// to quant_func_list, just trying to get this expr to work for now
+#if CONFIG_VP9_HIGHBITDEPTH
+ fwd_txfm_param.bd = xd->bd;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ highbd_quantize_32x32_nuq(
+ coeff, get_tx2d_size(tx_size), x->skip_block, p->quant,
+ p->quant_shift, pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff,
+ dqcoeff, eob, scan_order->scan, band);
+ } else {
+ highbd_quantize_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant, p->quant_shift, pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
+ qcoeff, dqcoeff, eob, scan_order->scan, band);
+ }
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ quantize_32x32_nuq(coeff, 1024, x->skip_block, p->quant, p->quant_shift,
+ pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
+ qcoeff, dqcoeff, eob, scan_order->scan, band);
+ } else {
+ quantize_nuq(coeff, get_tx2d_size(tx_size), x->skip_block, p->quant,
+ p->quant_shift, pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff,
+ dqcoeff, eob, scan_order->scan, band);
+ }
+}
+
+void vp10_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, int ctx) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ int dq = get_dq_profile_from_ctx(ctx);
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *const scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint16_t *const eob = &p->eobs[block];
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int16_t *src_diff;
+ const uint8_t *band = get_band_translate(tx_size);
+
+ FWD_TXFM_PARAM fwd_txfm_param;
+
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_FP];
+ fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+ fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+// TODO(sarahparker) add all of these new quant quantize functions
+// to quant_func_list, just trying to get this expr to work for now
+#if CONFIG_VP9_HIGHBITDEPTH
+ fwd_txfm_param.bd = xd->bd;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ highbd_quantize_32x32_fp_nuq(
+ coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp,
+ pd->dequant, (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff,
+ dqcoeff, eob, scan_order->scan, band);
+ } else {
+ highbd_quantize_fp_nuq(
+ coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp,
+ pd->dequant, (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff,
+ dqcoeff, eob, scan_order->scan, band);
+ }
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ quantize_32x32_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant_fp, pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
+ qcoeff, dqcoeff, eob, scan_order->scan, band);
+ } else {
+ quantize_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp,
+ pd->dequant,
+ (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq],
+ (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq],
+ qcoeff, dqcoeff, eob, scan_order->scan, band);
+ }
+}
+
+void vp10_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, int ctx) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint16_t *const eob = &p->eobs[block];
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int16_t *src_diff;
+ int dq = get_dq_profile_from_ctx(ctx);
+
+ FWD_TXFM_PARAM fwd_txfm_param;
+
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_DC];
+ fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+ fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+// TODO(sarahparker) add all of these new quant quantize functions
+// to quant_func_list, just trying to get this expr to work for now
+#if CONFIG_VP9_HIGHBITDEPTH
+ fwd_txfm_param.bd = xd->bd;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ highbd_quantize_dc_32x32_nuq(
+ coeff, get_tx2d_size(tx_size), x->skip_block, p->quant[0],
+ p->quant_shift[0], pd->dequant[0], p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
+ } else {
+ highbd_quantize_dc_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant[0], p->quant_shift[0], pd->dequant[0],
+ p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
+ }
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ quantize_dc_32x32_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant[0], p->quant_shift[0], pd->dequant[0],
+ p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
+ } else {
+ quantize_dc_nuq(coeff, get_tx2d_size(tx_size), x->skip_block, p->quant[0],
+ p->quant_shift[0], pd->dequant[0], p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
+ }
+}
+
+void vp10_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block,
+ int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ int ctx) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint16_t *const eob = &p->eobs[block];
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int16_t *src_diff;
+ int dq = get_dq_profile_from_ctx(ctx);
+
+ FWD_TXFM_PARAM fwd_txfm_param;
+
+ fwd_txfm_param.tx_type = tx_type;
+ fwd_txfm_param.tx_size = tx_size;
+ fwd_txfm_param.fwd_txfm_opt = fwd_txfm_opt_list[VP10_XFORM_QUANT_DC];
+ fwd_txfm_param.rd_transform = x->use_lp32x32fdct;
+ fwd_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+// TODO(sarahparker) add all of these new quant quantize functions
+// to quant_func_list, just trying to get this expr to work for now
+#if CONFIG_VP9_HIGHBITDEPTH
+ fwd_txfm_param.bd = xd->bd;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ highbd_quantize_dc_32x32_fp_nuq(
+ coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp[0],
+ pd->dequant[0], p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
+ } else {
+ highbd_quantize_dc_fp_nuq(
+ coeff, get_tx2d_size(tx_size), x->skip_block, p->quant_fp[0],
+ pd->dequant[0], p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0],
+ qcoeff, dqcoeff, eob);
+ }
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
+ if (tx_size == TX_32X32) {
+ quantize_dc_32x32_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant_fp[0], pd->dequant[0],
+ p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
+ } else {
+ quantize_dc_fp_nuq(coeff, get_tx2d_size(tx_size), x->skip_block,
+ p->quant_fp[0], pd->dequant[0], p->cuml_bins_nuq[dq][0],
+ pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob);
+ }
+}
+#endif // CONFIG_NEW_QUANT
+
+static void encode_block(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
+ struct encode_b_args *const args = arg;
+ MACROBLOCK *const x = args->x;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int ctx;
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint8_t *dst;
+ ENTROPY_CONTEXT *a, *l;
+ INV_TXFM_PARAM inv_txfm_param;
+#if CONFIG_VAR_TX
+ int i;
+ const int bwl = b_width_log2_lookup[plane_bsize];
+#endif
+ dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
+ a = &args->ta[blk_col];
+ l = &args->tl[blk_row];
+#if CONFIG_VAR_TX
+ ctx = get_entropy_context(tx_size, a, l);
+#else
+ ctx = combine_entropy_contexts(*a, *l);
+#endif
+
+#if CONFIG_VAR_TX
+ // Assert not magic number (uninitialised).
+ assert(x->blk_skip[plane][(blk_row << bwl) + blk_col] != 234);
+
+ if (x->blk_skip[plane][(blk_row << bwl) + blk_col] == 0) {
+#else
+ {
+#endif
+#if CONFIG_NEW_QUANT
+ vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, ctx);
+#else
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_FP);
+#endif // CONFIG_NEW_QUANT
+ }
+#if CONFIG_VAR_TX
+ else {
+ p->eobs[block] = 0;
+ }
+#endif
+
+ if (p->eobs[block]) {
+ *a = *l = vp10_optimize_b(x, plane, block, tx_size, ctx) > 0;
+ } else {
+ *a = *l = p->eobs[block] > 0;
+ }
+
+#if CONFIG_VAR_TX
+ for (i = 0; i < num_4x4_blocks_wide_txsize_lookup[tx_size]; ++i) {
+ a[i] = a[0];
+ }
+ for (i = 0; i < num_4x4_blocks_high_txsize_lookup[tx_size]; ++i) {
+ l[i] = l[0];
+ }
+#endif
+
+ if (p->eobs[block]) *(args->skip) = 0;
+
+ if (p->eobs[block] == 0) return;
+
+ // inverse transform parameters
+ inv_txfm_param.tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
+ inv_txfm_param.tx_size = tx_size;
+ inv_txfm_param.eob = p->eobs[block];
+ inv_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ inv_txfm_param.bd = xd->bd;
+ highbd_inv_txfm_add(dqcoeff, dst, pd->dst.stride, &inv_txfm_param);
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ inv_txfm_add(dqcoeff, dst, pd->dst.stride, &inv_txfm_param);
+}
+
+#if CONFIG_VAR_TX
+static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ void *arg) {
+ struct encode_b_args *const args = arg;
+ MACROBLOCK *const x = args->x;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ const TX_SIZE plane_tx_size =
+ plane ? get_uv_tx_size_impl(mbmi->inter_tx_size[tx_row][tx_col], bsize, 0,
+ 0)
+ : mbmi->inter_tx_size[tx_row][tx_col];
+
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+ if (tx_size == plane_tx_size) {
+ encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+#if CONFIG_EXT_TX
+ assert(tx_size < TX_SIZES);
+#endif // CONFIG_EXT_TX
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+ int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
+
+ encode_block_inter(plane, block + i * step, offsetr, offsetc, plane_bsize,
+ tx_size - 1, arg);
+ }
+ }
+}
+#endif
+
+static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ void *arg) {
+ MACROBLOCK *const x = (MACROBLOCK *)arg;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint8_t *dst;
+#if CONFIG_NEW_QUANT
+ int ctx;
+#endif // CONFIG_NEW_QUANT
+ dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
+
+#if CONFIG_NEW_QUANT
+ ctx = 0;
+ vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, ctx);
+#else
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_B);
+#endif // CONFIG_NEW_QUANT
+
+ if (p->eobs[block] > 0) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ vp10_highbd_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
+ xd->bd);
+ } else {
+ vp10_highbd_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
+ xd->bd);
+ }
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ vp10_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+ } else {
+ vp10_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
+ }
+ }
+}
+
+void vp10_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
+ vp10_subtract_plane(x, bsize, 0);
+ vp10_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
+ encode_block_pass1, x);
+}
+
+void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct optimize_ctx ctx;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ struct encode_b_args arg = { x, &ctx, &mbmi->skip, NULL, NULL, 1 };
+ int plane;
+
+ mbmi->skip = 1;
+
+ if (x->skip) return;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_VAR_TX
+ // TODO(jingning): Clean this up.
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ const int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+ int block = 0;
+ int step = num_4x4_blocks_txsize_lookup[max_tx_size];
+ vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctx.ta[plane], ctx.tl[plane]);
+#else
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
+ vp10_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
+#endif
+ vp10_subtract_plane(x, bsize, plane);
+ arg.ta = ctx.ta[plane];
+ arg.tl = ctx.tl[plane];
+
+#if CONFIG_VAR_TX
+ for (idy = 0; idy < mi_height; idy += bh) {
+ for (idx = 0; idx < mi_width; idx += bh) {
+ encode_block_inter(plane, block, idy, idx, plane_bsize, max_tx_size,
+ &arg);
+ block += step;
+ }
+ }
+#else
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
+ &arg);
+#endif
+ }
+}
+
+#if CONFIG_SUPERTX
+void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct optimize_ctx ctx;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ struct encode_b_args arg = { x, &ctx, &mbmi->skip, NULL, NULL, 1 };
+ int plane;
+
+ mbmi->skip = 1;
+ if (x->skip) return;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+#if CONFIG_VAR_TX
+ const TX_SIZE tx_size = TX_4X4;
+#else
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
+#endif
+ vp10_subtract_plane(x, bsize, plane);
+ vp10_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
+ arg.ta = ctx.ta[plane];
+ arg.tl = ctx.tl[plane];
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
+ &arg);
+ }
+}
+#endif // CONFIG_SUPERTX
+
+void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ void *arg) {
+ struct encode_b_args *const args = arg;
+ MACROBLOCK *const x = args->x;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ PREDICTION_MODE mode;
+ const int bwl = b_width_log2_lookup[plane_bsize];
+ const int bhl = b_height_log2_lookup[plane_bsize];
+ const int diff_stride = 4 * (1 << bwl);
+ uint8_t *src, *dst;
+ int16_t *src_diff;
+ uint16_t *eob = &p->eobs[block];
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
+ const int tx1d_width = num_4x4_blocks_wide_txsize_lookup[tx_size] << 2;
+ const int tx1d_height = num_4x4_blocks_high_txsize_lookup[tx_size] << 2;
+ ENTROPY_CONTEXT *a = NULL, *l = NULL;
+ int ctx;
+
+ INV_TXFM_PARAM inv_txfm_param;
+
+ assert(tx1d_width == tx1d_height);
+
+ dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
+ src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
+ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+ mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
+ vp10_predict_intra_block(xd, bwl, bhl, tx_size, mode, dst, dst_stride, dst,
+ dst_stride, blk_col, blk_row, plane);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vpx_highbd_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride,
+ src, src_stride, dst, dst_stride, xd->bd);
+ } else {
+ vpx_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride, src,
+ src_stride, dst, dst_stride);
+ }
+#else
+ vpx_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride, src,
+ src_stride, dst, dst_stride);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ a = &args->ta[blk_col];
+ l = &args->tl[blk_row];
+ ctx = combine_entropy_contexts(*a, *l);
+
+ if (args->enable_optimize_b) {
+#if CONFIG_NEW_QUANT
+ vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, ctx);
+#else // CONFIG_NEW_QUANT
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_FP);
+#endif // CONFIG_NEW_QUANT
+ if (p->eobs[block]) {
+ *a = *l = vp10_optimize_b(x, plane, block, tx_size, ctx) > 0;
+ } else {
+ *a = *l = 0;
+ }
+ } else {
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_B);
+ *a = *l = p->eobs[block] > 0;
+ }
+
+ if (*eob) {
+ // inverse transform
+ inv_txfm_param.tx_type = tx_type;
+ inv_txfm_param.tx_size = tx_size;
+ inv_txfm_param.eob = *eob;
+ inv_txfm_param.lossless = xd->lossless[mbmi->segment_id];
+#if CONFIG_VP9_HIGHBITDEPTH
+ inv_txfm_param.bd = xd->bd;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_inv_txfm_add(dqcoeff, dst, dst_stride, &inv_txfm_param);
+ } else {
+ inv_txfm_add(dqcoeff, dst, dst_stride, &inv_txfm_param);
+ }
+#else
+ inv_txfm_add(dqcoeff, dst, dst_stride, &inv_txfm_param);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ *(args->skip) = 0;
+ }
+}
+
+void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
+ int enable_optimize_b) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
+ ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
+
+ struct encode_b_args arg = { x, NULL, &xd->mi[0]->mbmi.skip,
+ ta, tl, enable_optimize_b };
+ if (enable_optimize_b) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const TX_SIZE tx_size =
+ plane ? get_uv_tx_size(&xd->mi[0]->mbmi, pd) : xd->mi[0]->mbmi.tx_size;
+ vp10_get_entropy_contexts(bsize, tx_size, pd, ta, tl);
+ }
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane,
+ vp10_encode_block_intra, &arg);
+}
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
new file mode 100644
index 0000000..4b88831
--- /dev/null
+++ b/av1/encoder/encodemb.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_ENCODEMB_H_
+#define VP10_ENCODER_ENCODEMB_H_
+
+#include "./vpx_config.h"
+#include "av1/encoder/block.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct optimize_ctx {
+ ENTROPY_CONTEXT ta[MAX_MB_PLANE][2 * MAX_MIB_SIZE];
+ ENTROPY_CONTEXT tl[MAX_MB_PLANE][2 * MAX_MIB_SIZE];
+};
+
+struct encode_b_args {
+ MACROBLOCK *x;
+ struct optimize_ctx *ctx;
+ int8_t *skip;
+ ENTROPY_CONTEXT *ta;
+ ENTROPY_CONTEXT *tl;
+ int8_t enable_optimize_b;
+};
+
+typedef enum VP10_XFORM_QUANT {
+ VP10_XFORM_QUANT_FP = 0,
+ VP10_XFORM_QUANT_B = 1,
+ VP10_XFORM_QUANT_DC = 2,
+ VP10_XFORM_QUANT_SKIP_QUANT = 3,
+ VP10_XFORM_QUANT_LAST = 4
+} VP10_XFORM_QUANT;
+
+void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
+#if CONFIG_SUPERTX
+void vp10_encode_sb_supertx(MACROBLOCK *x, BLOCK_SIZE bsize);
+#endif // CONFIG_SUPERTX
+void vp10_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
+void vp10_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ VP10_XFORM_QUANT xform_quant_idx);
+#if CONFIG_NEW_QUANT
+void vp10_xform_quant_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ int ctx);
+void vp10_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, int ctx);
+void vp10_xform_quant_fp_nuq(MACROBLOCK *x, int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, int ctx);
+void vp10_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block,
+ int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ int ctx);
+#endif
+
+int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
+ int ctx);
+
+void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
+
+void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ void *arg);
+
+void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
+ int enable_optimize_b);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_ENCODEMB_H_
diff --git a/av1/encoder/encodemv.c b/av1/encoder/encodemv.c
new file mode 100644
index 0000000..78da2b7
--- /dev/null
+++ b/av1/encoder/encodemv.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include "av1/common/common.h"
+#include "av1/common/entropymode.h"
+
+#include "av1/encoder/cost.h"
+#include "av1/encoder/encodemv.h"
+#include "av1/encoder/subexp.h"
+
+#include "aom_dsp/vpx_dsp_common.h"
+
+static struct vp10_token mv_joint_encodings[MV_JOINTS];
+static struct vp10_token mv_class_encodings[MV_CLASSES];
+static struct vp10_token mv_fp_encodings[MV_FP_SIZE];
+static struct vp10_token mv_class0_encodings[CLASS0_SIZE];
+
+void vp10_entropy_mv_init(void) {
+ vp10_tokens_from_tree(mv_joint_encodings, vp10_mv_joint_tree);
+ vp10_tokens_from_tree(mv_class_encodings, vp10_mv_class_tree);
+ vp10_tokens_from_tree(mv_class0_encodings, vp10_mv_class0_tree);
+ vp10_tokens_from_tree(mv_fp_encodings, vp10_mv_fp_tree);
+}
+
+static void encode_mv_component(vp10_writer *w, int comp,
+ const nmv_component *mvcomp, int usehp) {
+ int offset;
+ const int sign = comp < 0;
+ const int mag = sign ? -comp : comp;
+ const int mv_class = vp10_get_mv_class(mag - 1, &offset);
+ const int d = offset >> 3; // int mv data
+ const int fr = (offset >> 1) & 3; // fractional mv data
+ const int hp = offset & 1; // high precision mv data
+
+ assert(comp != 0);
+
+ // Sign
+ vp10_write(w, sign, mvcomp->sign);
+
+ // Class
+ vp10_write_token(w, vp10_mv_class_tree, mvcomp->classes,
+ &mv_class_encodings[mv_class]);
+
+ // Integer bits
+ if (mv_class == MV_CLASS_0) {
+ vp10_write_token(w, vp10_mv_class0_tree, mvcomp->class0,
+ &mv_class0_encodings[d]);
+ } else {
+ int i;
+ const int n = mv_class + CLASS0_BITS - 1; // number of bits
+ for (i = 0; i < n; ++i) vp10_write(w, (d >> i) & 1, mvcomp->bits[i]);
+ }
+
+ // Fractional bits
+ vp10_write_token(w, vp10_mv_fp_tree,
+ mv_class == MV_CLASS_0 ? mvcomp->class0_fp[d] : mvcomp->fp,
+ &mv_fp_encodings[fr]);
+
+ // High precision bit
+ if (usehp)
+ vp10_write(w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp);
+}
+
+static void build_nmv_component_cost_table(int *mvcost,
+ const nmv_component *const mvcomp,
+ int usehp) {
+ int i, v;
+ int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
+ int bits_cost[MV_OFFSET_BITS][2];
+ int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE], fp_cost[MV_FP_SIZE];
+ int class0_hp_cost[2], hp_cost[2];
+
+ sign_cost[0] = vp10_cost_zero(mvcomp->sign);
+ sign_cost[1] = vp10_cost_one(mvcomp->sign);
+ vp10_cost_tokens(class_cost, mvcomp->classes, vp10_mv_class_tree);
+ vp10_cost_tokens(class0_cost, mvcomp->class0, vp10_mv_class0_tree);
+ for (i = 0; i < MV_OFFSET_BITS; ++i) {
+ bits_cost[i][0] = vp10_cost_zero(mvcomp->bits[i]);
+ bits_cost[i][1] = vp10_cost_one(mvcomp->bits[i]);
+ }
+
+ for (i = 0; i < CLASS0_SIZE; ++i)
+ vp10_cost_tokens(class0_fp_cost[i], mvcomp->class0_fp[i], vp10_mv_fp_tree);
+ vp10_cost_tokens(fp_cost, mvcomp->fp, vp10_mv_fp_tree);
+
+ if (usehp) {
+ class0_hp_cost[0] = vp10_cost_zero(mvcomp->class0_hp);
+ class0_hp_cost[1] = vp10_cost_one(mvcomp->class0_hp);
+ hp_cost[0] = vp10_cost_zero(mvcomp->hp);
+ hp_cost[1] = vp10_cost_one(mvcomp->hp);
+ }
+ mvcost[0] = 0;
+ for (v = 1; v <= MV_MAX; ++v) {
+ int z, c, o, d, e, f, cost = 0;
+ z = v - 1;
+ c = vp10_get_mv_class(z, &o);
+ cost += class_cost[c];
+ d = (o >> 3); /* int mv data */
+ f = (o >> 1) & 3; /* fractional pel mv data */
+ e = (o & 1); /* high precision mv data */
+ if (c == MV_CLASS_0) {
+ cost += class0_cost[d];
+ } else {
+ int i, b;
+ b = c + CLASS0_BITS - 1; /* number of bits */
+ for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)];
+ }
+ if (c == MV_CLASS_0) {
+ cost += class0_fp_cost[d][f];
+ } else {
+ cost += fp_cost[f];
+ }
+ if (usehp) {
+ if (c == MV_CLASS_0) {
+ cost += class0_hp_cost[e];
+ } else {
+ cost += hp_cost[e];
+ }
+ }
+ mvcost[v] = cost + sign_cost[0];
+ mvcost[-v] = cost + sign_cost[1];
+ }
+}
+
+static void update_mv(vp10_writer *w, const unsigned int ct[2], vpx_prob *cur_p,
+ vpx_prob upd_p) {
+ (void)upd_p;
+ vp10_cond_prob_diff_update(w, cur_p, ct);
+}
+
+static void write_mv_update(const vpx_tree_index *tree,
+ vpx_prob probs[/*n - 1*/],
+ const unsigned int counts[/*n - 1*/], int n,
+ vp10_writer *w) {
+ int i;
+ unsigned int branch_ct[32][2];
+
+ // Assuming max number of probabilities <= 32
+ assert(n <= 32);
+
+ vp10_tree_probs_from_distribution(tree, branch_ct, counts);
+ for (i = 0; i < n - 1; ++i)
+ update_mv(w, branch_ct[i], &probs[i], MV_UPDATE_PROB);
+}
+
+void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vp10_writer *w,
+ nmv_context_counts *const nmv_counts) {
+ int i, j;
+#if CONFIG_REF_MV
+ int nmv_ctx = 0;
+ for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
+ nmv_context *const mvc = &cm->fc->nmvc[nmv_ctx];
+ nmv_context_counts *const counts = &nmv_counts[nmv_ctx];
+ write_mv_update(vp10_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS,
+ w);
+
+ vp10_cond_prob_diff_update(w, &mvc->zero_rmv, counts->zero_rmv);
+
+ for (i = 0; i < 2; ++i) {
+ nmv_component *comp = &mvc->comps[i];
+ nmv_component_counts *comp_counts = &counts->comps[i];
+
+ update_mv(w, comp_counts->sign, &comp->sign, MV_UPDATE_PROB);
+ write_mv_update(vp10_mv_class_tree, comp->classes, comp_counts->classes,
+ MV_CLASSES, w);
+ write_mv_update(vp10_mv_class0_tree, comp->class0, comp_counts->class0,
+ CLASS0_SIZE, w);
+ for (j = 0; j < MV_OFFSET_BITS; ++j)
+ update_mv(w, comp_counts->bits[j], &comp->bits[j], MV_UPDATE_PROB);
+ }
+
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < CLASS0_SIZE; ++j)
+ write_mv_update(vp10_mv_fp_tree, mvc->comps[i].class0_fp[j],
+ counts->comps[i].class0_fp[j], MV_FP_SIZE, w);
+
+ write_mv_update(vp10_mv_fp_tree, mvc->comps[i].fp, counts->comps[i].fp,
+ MV_FP_SIZE, w);
+ }
+
+ if (usehp) {
+ for (i = 0; i < 2; ++i) {
+ update_mv(w, counts->comps[i].class0_hp, &mvc->comps[i].class0_hp,
+ MV_UPDATE_PROB);
+ update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, MV_UPDATE_PROB);
+ }
+ }
+ }
+#else
+ nmv_context *const mvc = &cm->fc->nmvc;
+ nmv_context_counts *const counts = nmv_counts;
+
+ write_mv_update(vp10_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS,
+ w);
+
+ for (i = 0; i < 2; ++i) {
+ nmv_component *comp = &mvc->comps[i];
+ nmv_component_counts *comp_counts = &counts->comps[i];
+
+ update_mv(w, comp_counts->sign, &comp->sign, MV_UPDATE_PROB);
+ write_mv_update(vp10_mv_class_tree, comp->classes, comp_counts->classes,
+ MV_CLASSES, w);
+ write_mv_update(vp10_mv_class0_tree, comp->class0, comp_counts->class0,
+ CLASS0_SIZE, w);
+ for (j = 0; j < MV_OFFSET_BITS; ++j)
+ update_mv(w, comp_counts->bits[j], &comp->bits[j], MV_UPDATE_PROB);
+ }
+
+ for (i = 0; i < 2; ++i) {
+ for (j = 0; j < CLASS0_SIZE; ++j)
+ write_mv_update(vp10_mv_fp_tree, mvc->comps[i].class0_fp[j],
+ counts->comps[i].class0_fp[j], MV_FP_SIZE, w);
+
+ write_mv_update(vp10_mv_fp_tree, mvc->comps[i].fp, counts->comps[i].fp,
+ MV_FP_SIZE, w);
+ }
+
+ if (usehp) {
+ for (i = 0; i < 2; ++i) {
+ update_mv(w, counts->comps[i].class0_hp, &mvc->comps[i].class0_hp,
+ MV_UPDATE_PROB);
+ update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, MV_UPDATE_PROB);
+ }
+ }
+#endif
+}
+
+void vp10_encode_mv(VP10_COMP *cpi, vp10_writer *w, const MV *mv, const MV *ref,
+#if CONFIG_REF_MV
+ int is_compound,
+#endif
+ const nmv_context *mvctx, int usehp) {
+ const MV diff = { mv->row - ref->row, mv->col - ref->col };
+ const MV_JOINT_TYPE j = vp10_get_mv_joint(&diff);
+ usehp = usehp && vp10_use_mv_hp(ref);
+
+#if CONFIG_REF_MV && !CONFIG_EXT_INTER
+ if (is_compound) {
+ vp10_write(w, (j == MV_JOINT_ZERO), mvctx->zero_rmv);
+ if (j == MV_JOINT_ZERO) return;
+ } else {
+ if (j == MV_JOINT_ZERO) assert(0);
+ }
+#endif
+
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ (void)is_compound;
+#endif
+
+ vp10_write_token(w, vp10_mv_joint_tree, mvctx->joints,
+ &mv_joint_encodings[j]);
+ if (mv_joint_vertical(j))
+ encode_mv_component(w, diff.row, &mvctx->comps[0], usehp);
+
+ if (mv_joint_horizontal(j))
+ encode_mv_component(w, diff.col, &mvctx->comps[1], usehp);
+
+ // If auto_mv_step_size is enabled then keep track of the largest
+ // motion vector component used.
+ if (cpi->sf.mv.auto_mv_step_size) {
+ unsigned int maxv = VPXMAX(abs(mv->row), abs(mv->col)) >> 3;
+ cpi->max_mv_magnitude = VPXMAX(maxv, cpi->max_mv_magnitude);
+ }
+}
+
+void vp10_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
+ const nmv_context *ctx, int usehp) {
+ vp10_cost_tokens(mvjoint, ctx->joints, vp10_mv_joint_tree);
+ build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], usehp);
+ build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp);
+}
+
+#if CONFIG_EXT_INTER
+static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
+ const int_mv mvs[2],
+#if CONFIG_REF_MV
+ const int_mv pred_mvs[2],
+#endif
+ nmv_context_counts *nmv_counts) {
+ int i;
+ PREDICTION_MODE mode = mbmi->mode;
+ int mv_idx = (mode == NEWFROMNEARMV);
+#if !CONFIG_REF_MV
+ nmv_context_counts *counts = nmv_counts;
+#endif
+
+ if (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV) {
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+ const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][mv_idx].as_mv;
+ const MV diff = { mvs[i].as_mv.row - ref->row,
+ mvs[i].as_mv.col - ref->col };
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
+ nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+ (void)pred_mvs;
+#endif
+ vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
+ }
+ } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
+ const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv;
+ const MV diff = { mvs[1].as_mv.row - ref->row,
+ mvs[1].as_mv.col - ref->col };
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
+ nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
+ vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
+ } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
+ const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
+ const MV diff = { mvs[0].as_mv.row - ref->row,
+ mvs[0].as_mv.col - ref->col };
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
+ nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
+ vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
+ }
+}
+
+static void inc_mvs_sub8x8(const MODE_INFO *mi, int block, const int_mv mvs[2],
+#if CONFIG_REF_MV
+ const MB_MODE_INFO_EXT *mbmi_ext,
+#endif
+ nmv_context_counts *nmv_counts) {
+ int i;
+ PREDICTION_MODE mode = mi->bmi[block].as_mode;
+#if CONFIG_REF_MV
+ const MB_MODE_INFO *mbmi = &mi->mbmi;
+#else
+ nmv_context_counts *counts = nmv_counts;
+#endif
+
+ if (mode == NEWMV || mode == NEWFROMNEARMV || mode == NEW_NEWMV) {
+ for (i = 0; i < 1 + has_second_ref(&mi->mbmi); ++i) {
+ const MV *ref = &mi->bmi[block].ref_mv[i].as_mv;
+ const MV diff = { mvs[i].as_mv.row - ref->row,
+ mvs[i].as_mv.col - ref->col };
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
+ nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
+ vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
+ }
+ } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
+ const MV *ref = &mi->bmi[block].ref_mv[1].as_mv;
+ const MV diff = { mvs[1].as_mv.row - ref->row,
+ mvs[1].as_mv.col - ref->col };
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[1]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[1]]);
+ nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
+ vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
+ } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
+ const MV *ref = &mi->bmi[block].ref_mv[0].as_mv;
+ const MV diff = { mvs[0].as_mv.row - ref->row,
+ mvs[0].as_mv.col - ref->col };
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[0]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]]);
+ nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+#endif
+ vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
+ }
+}
+#else
+static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
+ const int_mv mvs[2],
+#if CONFIG_REF_MV
+ const int_mv pred_mvs[2],
+#endif
+ nmv_context_counts *nmv_counts) {
+ int i;
+#if !CONFIG_REF_MV
+ nmv_context_counts *counts = nmv_counts;
+#endif
+
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+#if CONFIG_REF_MV
+ int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[mbmi->ref_frame[i]],
+ mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]]);
+ nmv_context_counts *counts = &nmv_counts[nmv_ctx];
+ const MV *ref = &pred_mvs[i].as_mv;
+#else
+ const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_mv;
+#endif
+ const MV diff = { mvs[i].as_mv.row - ref->row,
+ mvs[i].as_mv.col - ref->col };
+ vp10_inc_mv(&diff, counts, vp10_use_mv_hp(ref));
+ }
+}
+#endif // CONFIG_EXT_INTER
+
+void vp10_update_mv_count(ThreadData *td) {
+ const MACROBLOCKD *xd = &td->mb.e_mbd;
+ const MODE_INFO *mi = xd->mi[0];
+ const MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const MB_MODE_INFO_EXT *mbmi_ext = td->mb.mbmi_ext;
+
+ if (mbmi->sb_type < BLOCK_8X8) {
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int idx, idy;
+
+ for (idy = 0; idy < 2; idy += num_4x4_h) {
+ for (idx = 0; idx < 2; idx += num_4x4_w) {
+ const int i = idy * 2 + idx;
+
+#if CONFIG_EXT_INTER
+ if (have_newmv_in_inter_mode(mi->bmi[i].as_mode))
+ inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv,
+#if CONFIG_REF_MV
+ mbmi_ext, td->counts->mv);
+#else
+ &td->counts->mv);
+#endif
+#else
+ if (mi->bmi[i].as_mode == NEWMV)
+ inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv,
+#if CONFIG_REF_MV
+ mi->bmi[i].pred_mv_s8, td->counts->mv);
+#else
+ &td->counts->mv);
+#endif
+#endif // CONFIG_EXT_INTER
+ }
+ }
+ } else {
+#if CONFIG_EXT_INTER
+ if (have_newmv_in_inter_mode(mbmi->mode))
+#else
+ if (mbmi->mode == NEWMV)
+#endif // CONFIG_EXT_INTER
+ inc_mvs(mbmi, mbmi_ext, mbmi->mv,
+#if CONFIG_REF_MV
+ mbmi->pred_mv, td->counts->mv);
+#else
+ &td->counts->mv);
+#endif
+ }
+}
diff --git a/av1/encoder/encodemv.h b/av1/encoder/encodemv.h
new file mode 100644
index 0000000..6cb57c2
--- /dev/null
+++ b/av1/encoder/encodemv.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_ENCODEMV_H_
+#define VP10_ENCODER_ENCODEMV_H_
+
+#include "av1/encoder/encoder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_entropy_mv_init(void);
+
+void vp10_write_nmv_probs(VP10_COMMON *cm, int usehp, vp10_writer *w,
+ nmv_context_counts *const counts);
+
+void vp10_encode_mv(VP10_COMP *cpi, vp10_writer *w, const MV *mv, const MV *ref,
+#if CONFIG_REF_MV
+ int is_compound,
+#endif
+ const nmv_context *mvctx, int usehp);
+
+void vp10_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
+ const nmv_context *mvctx, int usehp);
+
+void vp10_update_mv_count(ThreadData *td);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_ENCODEMV_H_
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
new file mode 100644
index 0000000..5196d9c
--- /dev/null
+++ b/av1/encoder/encoder.c
@@ -0,0 +1,5607 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+
+#include "av1/common/alloccommon.h"
+#if CONFIG_CLPF
+#include "av1/common/clpf.h"
+#endif
+#if CONFIG_DERING
+#include "av1/common/dering.h"
+#endif // CONFIG_DERING
+#include "av1/common/filter.h"
+#include "av1/common/idct.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/reconintra.h"
+#include "av1/common/tile_common.h"
+
+#include "av1/encoder/aq_complexity.h"
+#include "av1/encoder/aq_cyclicrefresh.h"
+#include "av1/encoder/aq_variance.h"
+#include "av1/encoder/bitstream.h"
+#if CONFIG_ANS
+#include "av1/encoder/buf_ans.h"
+#endif
+#include "av1/encoder/context_tree.h"
+#include "av1/encoder/encodeframe.h"
+#include "av1/encoder/encodemv.h"
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/ethread.h"
+#include "av1/encoder/firstpass.h"
+#include "av1/encoder/mbgraph.h"
+#include "av1/encoder/picklpf.h"
+#if CONFIG_LOOP_RESTORATION
+#include "av1/encoder/pickrst.h"
+#endif // CONFIG_LOOP_RESTORATION
+#include "av1/encoder/ratectrl.h"
+#include "av1/encoder/rd.h"
+#include "av1/encoder/resize.h"
+#include "av1/encoder/segmentation.h"
+#include "av1/encoder/speed_features.h"
+#include "av1/encoder/temporal_filter.h"
+
+#include "./vp10_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_scale_rtcd.h"
+#include "aom_dsp/psnr.h"
+#if CONFIG_INTERNAL_STATS
+#include "aom_dsp/ssim.h"
+#endif
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_dsp/vpx_filter.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/system_state.h"
+#include "aom_ports/vpx_timer.h"
+#include "aom_scale/vpx_scale.h"
+#include "aom_util/debug_util.h"
+
+#define AM_SEGMENT_ID_INACTIVE 7
+#define AM_SEGMENT_ID_ACTIVE 0
+
+#define SHARP_FILTER_QTHRESH 0 /* Q threshold for 8-tap sharp filter */
+
+#define ALTREF_HIGH_PRECISION_MV 1 // Whether to use high precision mv
+ // for altref computation.
+#define HIGH_PRECISION_MV_QTHRESH 200 // Q threshold for high precision
+ // mv. Choose a very high value for
+ // now so that HIGH_PRECISION is always
+ // chosen.
+// #define OUTPUT_YUV_REC
+#ifdef OUTPUT_YUV_DENOISED
+FILE *yuv_denoised_file = NULL;
+#endif
+#ifdef OUTPUT_YUV_SKINMAP
+FILE *yuv_skinmap_file = NULL;
+#endif
+#ifdef OUTPUT_YUV_REC
+FILE *yuv_rec_file;
+#define FILE_NAME_LEN 100
+#endif
+
+#if 0
+FILE *framepsnr;
+FILE *kf_list;
+FILE *keyfile;
+#endif
+
+static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
+ switch (mode) {
+ case NORMAL:
+ *hr = 1;
+ *hs = 1;
+ break;
+ case FOURFIVE:
+ *hr = 4;
+ *hs = 5;
+ break;
+ case THREEFIVE:
+ *hr = 3;
+ *hs = 5;
+ break;
+ case ONETWO:
+ *hr = 1;
+ *hs = 2;
+ break;
+ default:
+ *hr = 1;
+ *hs = 1;
+ assert(0);
+ break;
+ }
+}
+
+// Mark all inactive blocks as active. Other segmentation features may be set
+// so memset cannot be used, instead only inactive blocks should be reset.
+static void suppress_active_map(VP10_COMP *cpi) {
+ unsigned char *const seg_map = cpi->segmentation_map;
+ int i;
+ if (cpi->active_map.enabled || cpi->active_map.update)
+ for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
+ if (seg_map[i] == AM_SEGMENT_ID_INACTIVE)
+ seg_map[i] = AM_SEGMENT_ID_ACTIVE;
+}
+
+static void apply_active_map(VP10_COMP *cpi) {
+ struct segmentation *const seg = &cpi->common.seg;
+ unsigned char *const seg_map = cpi->segmentation_map;
+ const unsigned char *const active_map = cpi->active_map.map;
+ int i;
+
+ assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE);
+
+ if (frame_is_intra_only(&cpi->common)) {
+ cpi->active_map.enabled = 0;
+ cpi->active_map.update = 1;
+ }
+
+ if (cpi->active_map.update) {
+ if (cpi->active_map.enabled) {
+ for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i)
+ if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i];
+ vp10_enable_segmentation(seg);
+ vp10_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
+ vp10_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
+ // Setting the data to -MAX_LOOP_FILTER will result in the computed loop
+ // filter level being zero regardless of the value of seg->abs_delta.
+ vp10_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF,
+ -MAX_LOOP_FILTER);
+ } else {
+ vp10_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP);
+ vp10_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF);
+ if (seg->enabled) {
+ seg->update_data = 1;
+ seg->update_map = 1;
+ }
+ }
+ cpi->active_map.update = 0;
+ }
+}
+
+int vp10_set_active_map(VP10_COMP *cpi, unsigned char *new_map_16x16, int rows,
+ int cols) {
+ if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
+ unsigned char *const active_map_8x8 = cpi->active_map.map;
+ const int mi_rows = cpi->common.mi_rows;
+ const int mi_cols = cpi->common.mi_cols;
+ cpi->active_map.update = 1;
+ if (new_map_16x16) {
+ int r, c;
+ for (r = 0; r < mi_rows; ++r) {
+ for (c = 0; c < mi_cols; ++c) {
+ active_map_8x8[r * mi_cols + c] =
+ new_map_16x16[(r >> 1) * cols + (c >> 1)]
+ ? AM_SEGMENT_ID_ACTIVE
+ : AM_SEGMENT_ID_INACTIVE;
+ }
+ }
+ cpi->active_map.enabled = 1;
+ } else {
+ cpi->active_map.enabled = 0;
+ }
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+int vp10_get_active_map(VP10_COMP *cpi, unsigned char *new_map_16x16, int rows,
+ int cols) {
+ if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols &&
+ new_map_16x16) {
+ unsigned char *const seg_map_8x8 = cpi->segmentation_map;
+ const int mi_rows = cpi->common.mi_rows;
+ const int mi_cols = cpi->common.mi_cols;
+ memset(new_map_16x16, !cpi->active_map.enabled, rows * cols);
+ if (cpi->active_map.enabled) {
+ int r, c;
+ for (r = 0; r < mi_rows; ++r) {
+ for (c = 0; c < mi_cols; ++c) {
+ // Cyclic refresh segments are considered active despite not having
+ // AM_SEGMENT_ID_ACTIVE
+ new_map_16x16[(r >> 1) * cols + (c >> 1)] |=
+ seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE;
+ }
+ }
+ }
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+void vp10_set_high_precision_mv(VP10_COMP *cpi, int allow_high_precision_mv) {
+ MACROBLOCK *const mb = &cpi->td.mb;
+ cpi->common.allow_high_precision_mv = allow_high_precision_mv;
+
+#if CONFIG_REF_MV
+ if (cpi->common.allow_high_precision_mv) {
+ int i;
+ for (i = 0; i < NMV_CONTEXTS; ++i) {
+ mb->mv_cost_stack[i] = mb->nmvcost_hp[i];
+ mb->mvsadcost = mb->nmvsadcost_hp;
+ }
+ } else {
+ int i;
+ for (i = 0; i < NMV_CONTEXTS; ++i) {
+ mb->mv_cost_stack[i] = mb->nmvcost[i];
+ mb->mvsadcost = mb->nmvsadcost;
+ }
+ }
+#else
+ if (cpi->common.allow_high_precision_mv) {
+ mb->mvcost = mb->nmvcost_hp;
+ mb->mvsadcost = mb->nmvcost_hp;
+ } else {
+ mb->mvcost = mb->nmvcost;
+ mb->mvsadcost = mb->nmvcost;
+ }
+#endif
+}
+
+static BLOCK_SIZE select_sb_size(const VP10_COMP *const cpi) {
+#if CONFIG_EXT_PARTITION
+ if (cpi->oxcf.superblock_size == VPX_SUPERBLOCK_SIZE_64X64)
+ return BLOCK_64X64;
+
+ if (cpi->oxcf.superblock_size == VPX_SUPERBLOCK_SIZE_128X128)
+ return BLOCK_128X128;
+
+ assert(cpi->oxcf.superblock_size == VPX_SUPERBLOCK_SIZE_DYNAMIC);
+
+ assert(IMPLIES(cpi->common.tile_cols > 1,
+ cpi->common.tile_width % MAX_MIB_SIZE == 0));
+ assert(IMPLIES(cpi->common.tile_rows > 1,
+ cpi->common.tile_height % MAX_MIB_SIZE == 0));
+
+ // TODO(any): Possibly could improve this with a heuristic.
+ return BLOCK_128X128;
+#else
+ (void)cpi;
+ return BLOCK_64X64;
+#endif // CONFIG_EXT_PARTITION
+}
+
+static void setup_frame(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ // Set up entropy context depending on frame type. The decoder mandates
+ // the use of the default context, index 0, for keyframes and inter
+ // frames where the error_resilient_mode or intra_only flag is set. For
+ // other inter-frames the encoder currently uses only two contexts;
+ // context 1 for ALTREF frames and context 0 for the others.
+ if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
+ vp10_setup_past_independence(cm);
+ } else {
+#if CONFIG_EXT_REFS
+ const GF_GROUP *gf_group = &cpi->twopass.gf_group;
+ if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW)
+ cm->frame_context_idx = EXT_ARF_FRAME;
+ else if (cpi->refresh_alt_ref_frame)
+ cm->frame_context_idx = ARF_FRAME;
+#else
+ if (cpi->refresh_alt_ref_frame) cm->frame_context_idx = ARF_FRAME;
+#endif
+ else if (cpi->rc.is_src_frame_alt_ref)
+ cm->frame_context_idx = OVERLAY_FRAME;
+ else if (cpi->refresh_golden_frame)
+ cm->frame_context_idx = GLD_FRAME;
+#if CONFIG_EXT_REFS
+ else if (cpi->refresh_bwd_ref_frame)
+ cm->frame_context_idx = BRF_FRAME;
+#endif
+ else
+ cm->frame_context_idx = REGULAR_FRAME;
+ }
+
+ if (cm->frame_type == KEY_FRAME) {
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 1;
+ vp10_zero(cpi->interp_filter_selected);
+ } else {
+ *cm->fc = cm->frame_contexts[cm->frame_context_idx];
+ vp10_zero(cpi->interp_filter_selected[0]);
+ }
+
+ cpi->vaq_refresh = 0;
+
+ set_sb_size(cm, select_sb_size(cpi));
+}
+
+static void vp10_enc_setup_mi(VP10_COMMON *cm) {
+ int i;
+ cm->mi = cm->mip + cm->mi_stride + 1;
+ memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
+ cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
+ // Clear top border row
+ memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride);
+ // Clear left border column
+ for (i = 1; i < cm->mi_rows + 1; ++i)
+ memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
+
+ cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
+ cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
+
+ memset(cm->mi_grid_base, 0,
+ cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
+}
+
+static int vp10_enc_alloc_mi(VP10_COMMON *cm, int mi_size) {
+ cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
+ if (!cm->mip) return 1;
+ cm->prev_mip = vpx_calloc(mi_size, sizeof(*cm->prev_mip));
+ if (!cm->prev_mip) return 1;
+ cm->mi_alloc_size = mi_size;
+
+ cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
+ if (!cm->mi_grid_base) return 1;
+ cm->prev_mi_grid_base =
+ (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *));
+ if (!cm->prev_mi_grid_base) return 1;
+
+ return 0;
+}
+
+static void vp10_enc_free_mi(VP10_COMMON *cm) {
+ vpx_free(cm->mip);
+ cm->mip = NULL;
+ vpx_free(cm->prev_mip);
+ cm->prev_mip = NULL;
+ vpx_free(cm->mi_grid_base);
+ cm->mi_grid_base = NULL;
+ vpx_free(cm->prev_mi_grid_base);
+ cm->prev_mi_grid_base = NULL;
+}
+
+static void vp10_swap_mi_and_prev_mi(VP10_COMMON *cm) {
+ // Current mip will be the prev_mip for the next frame.
+ MODE_INFO **temp_base = cm->prev_mi_grid_base;
+ MODE_INFO *temp = cm->prev_mip;
+ cm->prev_mip = cm->mip;
+ cm->mip = temp;
+
+ // Update the upper left visible macroblock ptrs.
+ cm->mi = cm->mip + cm->mi_stride + 1;
+ cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
+
+ cm->prev_mi_grid_base = cm->mi_grid_base;
+ cm->mi_grid_base = temp_base;
+ cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
+ cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
+}
+
+void vp10_initialize_enc(void) {
+ static volatile int init_done = 0;
+
+ if (!init_done) {
+ vp10_rtcd();
+ vpx_dsp_rtcd();
+ vpx_scale_rtcd();
+ vp10_init_intra_predictors();
+ vp10_init_me_luts();
+ vp10_rc_init_minq_luts();
+ vp10_entropy_mv_init();
+ vp10_encode_token_init();
+#if CONFIG_EXT_INTER
+ vp10_init_wedge_masks();
+#endif
+ init_done = 1;
+ }
+}
+
+static void dealloc_compressor_data(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ int i;
+
+ vpx_free(cpi->mbmi_ext_base);
+ cpi->mbmi_ext_base = NULL;
+
+ vpx_free(cpi->tile_data);
+ cpi->tile_data = NULL;
+
+ // Delete sementation map
+ vpx_free(cpi->segmentation_map);
+ cpi->segmentation_map = NULL;
+ vpx_free(cpi->coding_context.last_frame_seg_map_copy);
+ cpi->coding_context.last_frame_seg_map_copy = NULL;
+
+#if CONFIG_REF_MV
+ for (i = 0; i < NMV_CONTEXTS; ++i) {
+ vpx_free(cpi->nmv_costs[i][0]);
+ vpx_free(cpi->nmv_costs[i][1]);
+ vpx_free(cpi->nmv_costs_hp[i][0]);
+ vpx_free(cpi->nmv_costs_hp[i][1]);
+ cpi->nmv_costs[i][0] = NULL;
+ cpi->nmv_costs[i][1] = NULL;
+ cpi->nmv_costs_hp[i][0] = NULL;
+ cpi->nmv_costs_hp[i][1] = NULL;
+ }
+#endif
+
+ vpx_free(cpi->nmvcosts[0]);
+ vpx_free(cpi->nmvcosts[1]);
+ cpi->nmvcosts[0] = NULL;
+ cpi->nmvcosts[1] = NULL;
+
+ vpx_free(cpi->nmvcosts_hp[0]);
+ vpx_free(cpi->nmvcosts_hp[1]);
+ cpi->nmvcosts_hp[0] = NULL;
+ cpi->nmvcosts_hp[1] = NULL;
+
+ vpx_free(cpi->nmvsadcosts[0]);
+ vpx_free(cpi->nmvsadcosts[1]);
+ cpi->nmvsadcosts[0] = NULL;
+ cpi->nmvsadcosts[1] = NULL;
+
+ vpx_free(cpi->nmvsadcosts_hp[0]);
+ vpx_free(cpi->nmvsadcosts_hp[1]);
+ cpi->nmvsadcosts_hp[0] = NULL;
+ cpi->nmvsadcosts_hp[1] = NULL;
+
+ vp10_cyclic_refresh_free(cpi->cyclic_refresh);
+ cpi->cyclic_refresh = NULL;
+
+ vpx_free(cpi->active_map.map);
+ cpi->active_map.map = NULL;
+
+ // Free up-sampled reference buffers.
+ for (i = 0; i < (REF_FRAMES + 1); i++)
+ vpx_free_frame_buffer(&cpi->upsampled_ref_bufs[i].buf);
+
+ vp10_free_ref_frame_buffers(cm->buffer_pool);
+ vp10_free_context_buffers(cm);
+
+ vpx_free_frame_buffer(&cpi->last_frame_uf);
+#if CONFIG_LOOP_RESTORATION
+ vpx_free_frame_buffer(&cpi->last_frame_db);
+ vp10_free_restoration_buffers(cm);
+#endif // CONFIG_LOOP_RESTORATION
+ vpx_free_frame_buffer(&cpi->scaled_source);
+ vpx_free_frame_buffer(&cpi->scaled_last_source);
+ vpx_free_frame_buffer(&cpi->alt_ref_buffer);
+ vp10_lookahead_destroy(cpi->lookahead);
+
+ vpx_free(cpi->tile_tok[0][0]);
+ cpi->tile_tok[0][0] = 0;
+
+ vp10_free_pc_tree(&cpi->td);
+ vp10_free_var_tree(&cpi->td);
+
+ if (cpi->common.allow_screen_content_tools)
+ vpx_free(cpi->td.mb.palette_buffer);
+
+ if (cpi->source_diff_var != NULL) {
+ vpx_free(cpi->source_diff_var);
+ cpi->source_diff_var = NULL;
+ }
+#if CONFIG_ANS
+ vp10_buf_ans_free(&cpi->buf_ans);
+#endif // CONFIG_ANS
+}
+
+static void save_coding_context(VP10_COMP *cpi) {
+ CODING_CONTEXT *const cc = &cpi->coding_context;
+ VP10_COMMON *cm = &cpi->common;
+#if CONFIG_REF_MV
+ int i;
+#endif
+
+// Stores a snapshot of key state variables which can subsequently be
+// restored with a call to vp10_restore_coding_context. These functions are
+// intended for use in a re-code loop in vp10_compress_frame where the
+// quantizer value is adjusted between loop iterations.
+#if CONFIG_REF_MV
+ for (i = 0; i < NMV_CONTEXTS; ++i) {
+ vp10_copy(cc->nmv_vec_cost[i], cpi->td.mb.nmv_vec_cost[i]);
+ memcpy(cc->nmv_costs[i][0], cpi->nmv_costs[i][0],
+ MV_VALS * sizeof(*cpi->nmv_costs[i][0]));
+ memcpy(cc->nmv_costs[i][1], cpi->nmv_costs[i][1],
+ MV_VALS * sizeof(*cpi->nmv_costs[i][1]));
+ memcpy(cc->nmv_costs_hp[i][0], cpi->nmv_costs_hp[i][0],
+ MV_VALS * sizeof(*cpi->nmv_costs_hp[i][0]));
+ memcpy(cc->nmv_costs_hp[i][1], cpi->nmv_costs_hp[i][1],
+ MV_VALS * sizeof(*cpi->nmv_costs_hp[i][1]));
+ }
+#else
+ vp10_copy(cc->nmvjointcost, cpi->td.mb.nmvjointcost);
+#endif
+
+ memcpy(cc->nmvcosts[0], cpi->nmvcosts[0],
+ MV_VALS * sizeof(*cpi->nmvcosts[0]));
+ memcpy(cc->nmvcosts[1], cpi->nmvcosts[1],
+ MV_VALS * sizeof(*cpi->nmvcosts[1]));
+ memcpy(cc->nmvcosts_hp[0], cpi->nmvcosts_hp[0],
+ MV_VALS * sizeof(*cpi->nmvcosts_hp[0]));
+ memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1],
+ MV_VALS * sizeof(*cpi->nmvcosts_hp[1]));
+
+ memcpy(cpi->coding_context.last_frame_seg_map_copy, cm->last_frame_seg_map,
+ (cm->mi_rows * cm->mi_cols));
+
+ vp10_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
+ vp10_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
+
+ cc->fc = *cm->fc;
+}
+
+static void restore_coding_context(VP10_COMP *cpi) {
+ CODING_CONTEXT *const cc = &cpi->coding_context;
+ VP10_COMMON *cm = &cpi->common;
+#if CONFIG_REF_MV
+ int i;
+#endif
+
+// Restore key state variables to the snapshot state stored in the
+// previous call to vp10_save_coding_context.
+#if CONFIG_REF_MV
+ for (i = 0; i < NMV_CONTEXTS; ++i) {
+ vp10_copy(cpi->td.mb.nmv_vec_cost[i], cc->nmv_vec_cost[i]);
+ memcpy(cpi->nmv_costs[i][0], cc->nmv_costs[i][0],
+ MV_VALS * sizeof(*cc->nmv_costs[i][0]));
+ memcpy(cpi->nmv_costs[i][1], cc->nmv_costs[i][1],
+ MV_VALS * sizeof(*cc->nmv_costs[i][1]));
+ memcpy(cpi->nmv_costs_hp[i][0], cc->nmv_costs_hp[i][0],
+ MV_VALS * sizeof(*cc->nmv_costs_hp[i][0]));
+ memcpy(cpi->nmv_costs_hp[i][1], cc->nmv_costs_hp[i][1],
+ MV_VALS * sizeof(*cc->nmv_costs_hp[i][1]));
+ }
+#else
+ vp10_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost);
+#endif
+
+ memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0]));
+ memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], MV_VALS * sizeof(*cc->nmvcosts[1]));
+ memcpy(cpi->nmvcosts_hp[0], cc->nmvcosts_hp[0],
+ MV_VALS * sizeof(*cc->nmvcosts_hp[0]));
+ memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1],
+ MV_VALS * sizeof(*cc->nmvcosts_hp[1]));
+
+ memcpy(cm->last_frame_seg_map, cpi->coding_context.last_frame_seg_map_copy,
+ (cm->mi_rows * cm->mi_cols));
+
+ vp10_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
+ vp10_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
+
+ *cm->fc = cc->fc;
+}
+
+static void configure_static_seg_features(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ struct segmentation *const seg = &cm->seg;
+
+ int high_q = (int)(rc->avg_q > 48.0);
+ int qi_delta;
+
+ // Disable and clear down for KF
+ if (cm->frame_type == KEY_FRAME) {
+ // Clear down the global segmentation map
+ memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
+ seg->update_map = 0;
+ seg->update_data = 0;
+ cpi->static_mb_pct = 0;
+
+ // Disable segmentation
+ vp10_disable_segmentation(seg);
+
+ // Clear down the segment features.
+ vp10_clearall_segfeatures(seg);
+ } else if (cpi->refresh_alt_ref_frame) {
+ // If this is an alt ref frame
+ // Clear down the global segmentation map
+ memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
+ seg->update_map = 0;
+ seg->update_data = 0;
+ cpi->static_mb_pct = 0;
+
+ // Disable segmentation and individual segment features by default
+ vp10_disable_segmentation(seg);
+ vp10_clearall_segfeatures(seg);
+
+ // Scan frames from current to arf frame.
+ // This function re-enables segmentation if appropriate.
+ vp10_update_mbgraph_stats(cpi);
+
+ // If segmentation was enabled set those features needed for the
+ // arf itself.
+ if (seg->enabled) {
+ seg->update_map = 1;
+ seg->update_data = 1;
+
+ qi_delta =
+ vp10_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875, cm->bit_depth);
+ vp10_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
+ vp10_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
+
+ vp10_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
+ vp10_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
+
+ // Where relevant assume segment data is delta data
+ seg->abs_delta = SEGMENT_DELTADATA;
+ }
+ } else if (seg->enabled) {
+ // All other frames if segmentation has been enabled
+
+ // First normal frame in a valid gf or alt ref group
+ if (rc->frames_since_golden == 0) {
+ // Set up segment features for normal frames in an arf group
+ if (rc->source_alt_ref_active) {
+ seg->update_map = 0;
+ seg->update_data = 1;
+ seg->abs_delta = SEGMENT_DELTADATA;
+
+ qi_delta = vp10_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125,
+ cm->bit_depth);
+ vp10_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2);
+ vp10_enable_segfeature(seg, 1, SEG_LVL_ALT_Q);
+
+ vp10_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2);
+ vp10_enable_segfeature(seg, 1, SEG_LVL_ALT_LF);
+
+ // Segment coding disabled for compred testing
+ if (high_q || (cpi->static_mb_pct == 100)) {
+ vp10_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
+ vp10_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
+ vp10_enable_segfeature(seg, 1, SEG_LVL_SKIP);
+ }
+ } else {
+ // Disable segmentation and clear down features if alt ref
+ // is not active for this group
+
+ vp10_disable_segmentation(seg);
+
+ memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
+
+ seg->update_map = 0;
+ seg->update_data = 0;
+
+ vp10_clearall_segfeatures(seg);
+ }
+ } else if (rc->is_src_frame_alt_ref) {
+ // Special case where we are coding over the top of a previous
+ // alt ref frame.
+ // Segment coding disabled for compred testing
+
+ // Enable ref frame features for segment 0 as well
+ vp10_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME);
+ vp10_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME);
+
+ // All mbs should use ALTREF_FRAME
+ vp10_clear_segdata(seg, 0, SEG_LVL_REF_FRAME);
+ vp10_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME);
+ vp10_clear_segdata(seg, 1, SEG_LVL_REF_FRAME);
+ vp10_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME);
+
+ // Skip all MBs if high Q (0,0 mv and skip coeffs)
+ if (high_q) {
+ vp10_enable_segfeature(seg, 0, SEG_LVL_SKIP);
+ vp10_enable_segfeature(seg, 1, SEG_LVL_SKIP);
+ }
+ // Enable data update
+ seg->update_data = 1;
+ } else {
+ // All other frames.
+
+ // No updates.. leave things as they are.
+ seg->update_map = 0;
+ seg->update_data = 0;
+ }
+ }
+}
+
+static void update_reference_segmentation_map(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
+ uint8_t *cache_ptr = cm->last_frame_seg_map;
+ int row, col;
+
+ for (row = 0; row < cm->mi_rows; row++) {
+ MODE_INFO **mi_8x8 = mi_8x8_ptr;
+ uint8_t *cache = cache_ptr;
+ for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++)
+ cache[0] = mi_8x8[0]->mbmi.segment_id;
+ mi_8x8_ptr += cm->mi_stride;
+ cache_ptr += cm->mi_cols;
+ }
+}
+
+static void alloc_raw_frame_buffers(VP10_COMP *cpi) {
+ VP10_COMMON *cm = &cpi->common;
+ const VP10EncoderConfig *oxcf = &cpi->oxcf;
+
+ if (!cpi->lookahead)
+ cpi->lookahead = vp10_lookahead_init(oxcf->width, oxcf->height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ oxcf->lag_in_frames);
+ if (!cpi->lookahead)
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate lag buffers");
+
+ // TODO(agrange) Check if ARF is enabled and skip allocation if not.
+ if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, oxcf->width, oxcf->height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VPX_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
+ NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate altref buffer");
+}
+
+static void alloc_util_frame_buffers(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ if (vpx_realloc_frame_buffer(&cpi->last_frame_uf, cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VPX_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
+ NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate last frame buffer");
+
+#if CONFIG_LOOP_RESTORATION
+ if (vpx_realloc_frame_buffer(&cpi->last_frame_db, cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VPX_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
+ NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate last frame deblocked buffer");
+#endif // CONFIG_LOOP_RESTORATION
+
+ if (vpx_realloc_frame_buffer(&cpi->scaled_source, cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VPX_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
+ NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate scaled source buffer");
+
+ if (vpx_realloc_frame_buffer(&cpi->scaled_last_source, cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VPX_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
+ NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate scaled last source buffer");
+}
+
+static int alloc_context_buffers_ext(VP10_COMP *cpi) {
+ VP10_COMMON *cm = &cpi->common;
+ int mi_size = cm->mi_cols * cm->mi_rows;
+
+ cpi->mbmi_ext_base = vpx_calloc(mi_size, sizeof(*cpi->mbmi_ext_base));
+ if (!cpi->mbmi_ext_base) return 1;
+
+ return 0;
+}
+
+void vp10_alloc_compressor_data(VP10_COMP *cpi) {
+ VP10_COMMON *cm = &cpi->common;
+
+ vp10_alloc_context_buffers(cm, cm->width, cm->height);
+
+ alloc_context_buffers_ext(cpi);
+
+ vpx_free(cpi->tile_tok[0][0]);
+
+ {
+ unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols);
+ CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
+ vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
+#if CONFIG_ANS
+ vp10_buf_ans_alloc(&cpi->buf_ans, cm, tokens);
+#endif // CONFIG_ANS
+ }
+
+ vp10_setup_pc_tree(&cpi->common, &cpi->td);
+}
+
+void vp10_new_framerate(VP10_COMP *cpi, double framerate) {
+ cpi->framerate = framerate < 0.1 ? 30 : framerate;
+ vp10_rc_update_framerate(cpi);
+}
+
+static void set_tile_info(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+
+#if CONFIG_EXT_TILE
+#if CONFIG_EXT_PARTITION
+ if (cpi->oxcf.superblock_size != VPX_SUPERBLOCK_SIZE_64X64) {
+ cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 32);
+ cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 32);
+ cm->tile_width <<= MAX_MIB_SIZE_LOG2;
+ cm->tile_height <<= MAX_MIB_SIZE_LOG2;
+ } else {
+ cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 64);
+ cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64);
+ cm->tile_width <<= MAX_MIB_SIZE_LOG2 - 1;
+ cm->tile_height <<= MAX_MIB_SIZE_LOG2 - 1;
+ }
+#else
+ cm->tile_width = clamp(cpi->oxcf.tile_columns, 1, 64);
+ cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64);
+ cm->tile_width <<= MAX_MIB_SIZE_LOG2;
+ cm->tile_height <<= MAX_MIB_SIZE_LOG2;
+#endif // CONFIG_EXT_PARTITION
+
+ cm->tile_width = VPXMIN(cm->tile_width, cm->mi_cols);
+ cm->tile_height = VPXMIN(cm->tile_height, cm->mi_rows);
+
+ assert(cm->tile_width >> MAX_MIB_SIZE <= 32);
+ assert(cm->tile_height >> MAX_MIB_SIZE <= 32);
+
+ // Get the number of tiles
+ cm->tile_cols = 1;
+ while (cm->tile_cols * cm->tile_width < cm->mi_cols) ++cm->tile_cols;
+
+ cm->tile_rows = 1;
+ while (cm->tile_rows * cm->tile_height < cm->mi_rows) ++cm->tile_rows;
+#else
+ int min_log2_tile_cols, max_log2_tile_cols;
+ vp10_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
+
+ cm->log2_tile_cols =
+ clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
+ cm->log2_tile_rows = cpi->oxcf.tile_rows;
+
+ cm->tile_cols = 1 << cm->log2_tile_cols;
+ cm->tile_rows = 1 << cm->log2_tile_rows;
+
+ cm->tile_width = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
+ cm->tile_width >>= cm->log2_tile_cols;
+ cm->tile_height = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
+ cm->tile_height >>= cm->log2_tile_rows;
+
+ // round to integer multiples of max superblock size
+ cm->tile_width = ALIGN_POWER_OF_TWO(cm->tile_width, MAX_MIB_SIZE_LOG2);
+ cm->tile_height = ALIGN_POWER_OF_TWO(cm->tile_height, MAX_MIB_SIZE_LOG2);
+#endif // CONFIG_EXT_TILE
+}
+
+static void update_frame_size(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+
+ vp10_set_mb_mi(cm, cm->width, cm->height);
+ vp10_init_context_buffers(cm);
+ vp10_init_macroblockd(cm, xd, NULL);
+ memset(cpi->mbmi_ext_base, 0,
+ cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
+
+ set_tile_info(cpi);
+}
+
+static void init_buffer_indices(VP10_COMP *cpi) {
+#if CONFIG_EXT_REFS
+ int fb_idx;
+ for (fb_idx = 0; fb_idx < LAST_REF_FRAMES; ++fb_idx)
+ cpi->lst_fb_idxes[fb_idx] = fb_idx;
+ cpi->gld_fb_idx = LAST_REF_FRAMES;
+ cpi->bwd_fb_idx = LAST_REF_FRAMES + 1;
+ cpi->alt_fb_idx = LAST_REF_FRAMES + 2;
+ for (fb_idx = 0; fb_idx < MAX_EXT_ARFS + 1; ++fb_idx)
+ cpi->arf_map[fb_idx] = LAST_REF_FRAMES + 2 + fb_idx;
+#else
+ cpi->lst_fb_idx = 0;
+ cpi->gld_fb_idx = 1;
+ cpi->alt_fb_idx = 2;
+#endif // CONFIG_EXT_REFS
+}
+
+static void init_config(struct VP10_COMP *cpi, VP10EncoderConfig *oxcf) {
+ VP10_COMMON *const cm = &cpi->common;
+
+ cpi->oxcf = *oxcf;
+ cpi->framerate = oxcf->init_framerate;
+
+ cm->profile = oxcf->profile;
+ cm->bit_depth = oxcf->bit_depth;
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth = oxcf->use_highbitdepth;
+#endif
+ cm->color_space = oxcf->color_space;
+ cm->color_range = oxcf->color_range;
+
+ cm->width = oxcf->width;
+ cm->height = oxcf->height;
+ vp10_alloc_compressor_data(cpi);
+
+ // Single thread case: use counts in common.
+ cpi->td.counts = &cm->counts;
+
+ // change includes all joint functionality
+ vp10_change_config(cpi, oxcf);
+
+ cpi->static_mb_pct = 0;
+ cpi->ref_frame_flags = 0;
+
+ init_buffer_indices(cpi);
+}
+
+static void set_rc_buffer_sizes(RATE_CONTROL *rc,
+ const VP10EncoderConfig *oxcf) {
+ const int64_t bandwidth = oxcf->target_bandwidth;
+ const int64_t starting = oxcf->starting_buffer_level_ms;
+ const int64_t optimal = oxcf->optimal_buffer_level_ms;
+ const int64_t maximum = oxcf->maximum_buffer_size_ms;
+
+ rc->starting_buffer_level = starting * bandwidth / 1000;
+ rc->optimal_buffer_level =
+ (optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000;
+ rc->maximum_buffer_size =
+ (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF) \
+ cpi->fn_ptr[BT].sdf = SDF; \
+ cpi->fn_ptr[BT].sdaf = SDAF; \
+ cpi->fn_ptr[BT].vf = VF; \
+ cpi->fn_ptr[BT].svf = SVF; \
+ cpi->fn_ptr[BT].svaf = SVAF; \
+ cpi->fn_ptr[BT].sdx3f = SDX3F; \
+ cpi->fn_ptr[BT].sdx8f = SDX8F; \
+ cpi->fn_ptr[BT].sdx4df = SDX4DF;
+
+#define MAKE_BFP_SAD_WRAPPER(fnname) \
+ static unsigned int fnname##_bits8(const uint8_t *src_ptr, \
+ int source_stride, \
+ const uint8_t *ref_ptr, int ref_stride) { \
+ return fnname(src_ptr, source_stride, ref_ptr, ref_stride); \
+ } \
+ static unsigned int fnname##_bits10( \
+ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
+ int ref_stride) { \
+ return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2; \
+ } \
+ static unsigned int fnname##_bits12( \
+ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
+ int ref_stride) { \
+ return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4; \
+ }
+
+#define MAKE_BFP_SADAVG_WRAPPER(fnname) \
+ static unsigned int fnname##_bits8( \
+ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
+ int ref_stride, const uint8_t *second_pred) { \
+ return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred); \
+ } \
+ static unsigned int fnname##_bits10( \
+ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
+ int ref_stride, const uint8_t *second_pred) { \
+ return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
+ 2; \
+ } \
+ static unsigned int fnname##_bits12( \
+ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
+ int ref_stride, const uint8_t *second_pred) { \
+ return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \
+ 4; \
+ }
+
+#define MAKE_BFP_SAD3_WRAPPER(fnname) \
+ static void fnname##_bits8(const uint8_t *src_ptr, int source_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ unsigned int *sad_array) { \
+ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
+ } \
+ static void fnname##_bits10(const uint8_t *src_ptr, int source_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ unsigned int *sad_array) { \
+ int i; \
+ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
+ for (i = 0; i < 3; i++) sad_array[i] >>= 2; \
+ } \
+ static void fnname##_bits12(const uint8_t *src_ptr, int source_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ unsigned int *sad_array) { \
+ int i; \
+ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
+ for (i = 0; i < 3; i++) sad_array[i] >>= 4; \
+ }
+
+#define MAKE_BFP_SAD8_WRAPPER(fnname) \
+ static void fnname##_bits8(const uint8_t *src_ptr, int source_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ unsigned int *sad_array) { \
+ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
+ } \
+ static void fnname##_bits10(const uint8_t *src_ptr, int source_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ unsigned int *sad_array) { \
+ int i; \
+ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
+ for (i = 0; i < 8; i++) sad_array[i] >>= 2; \
+ } \
+ static void fnname##_bits12(const uint8_t *src_ptr, int source_stride, \
+ const uint8_t *ref_ptr, int ref_stride, \
+ unsigned int *sad_array) { \
+ int i; \
+ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
+ for (i = 0; i < 8; i++) sad_array[i] >>= 4; \
+ }
+#define MAKE_BFP_SAD4D_WRAPPER(fnname) \
+ static void fnname##_bits8(const uint8_t *src_ptr, int source_stride, \
+ const uint8_t *const ref_ptr[], int ref_stride, \
+ unsigned int *sad_array) { \
+ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
+ } \
+ static void fnname##_bits10(const uint8_t *src_ptr, int source_stride, \
+ const uint8_t *const ref_ptr[], int ref_stride, \
+ unsigned int *sad_array) { \
+ int i; \
+ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
+ for (i = 0; i < 4; i++) sad_array[i] >>= 2; \
+ } \
+ static void fnname##_bits12(const uint8_t *src_ptr, int source_stride, \
+ const uint8_t *const ref_ptr[], int ref_stride, \
+ unsigned int *sad_array) { \
+ int i; \
+ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \
+ for (i = 0; i < 4; i++) sad_array[i] >>= 4; \
+ }
+
+#if CONFIG_EXT_PARTITION
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad128x128)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad128x128_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad128x128x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad128x128x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad128x128x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad128x64)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad128x64_avg)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad128x64x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x128)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x128_avg)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x128x4d)
+#endif // CONFIG_EXT_PARTITION
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x32)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x32_avg)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x32x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x32)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x32_avg)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x32x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x64)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x64_avg)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x64x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x32)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x32_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad32x32x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad32x32x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x32x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x64)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x64_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad64x64x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad64x64x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x64x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x16)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x16_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad16x16x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad16x16x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x16x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x8)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x8_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad16x8x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad16x8x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x8x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x16)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x16_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad8x16x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad8x16x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x16x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x8)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x8_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad8x8x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad8x8x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x8x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x4)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x4_avg)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad8x4x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x4x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x8)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x8_avg)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad4x8x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x8x4d)
+MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x4)
+MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x4_avg)
+MAKE_BFP_SAD3_WRAPPER(vpx_highbd_sad4x4x3)
+MAKE_BFP_SAD8_WRAPPER(vpx_highbd_sad4x4x8)
+MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d)
+
+#if CONFIG_EXT_INTER
+#define HIGHBD_MBFP(BT, MSDF, MVF, MSVF) \
+ cpi->fn_ptr[BT].msdf = MSDF; \
+ cpi->fn_ptr[BT].mvf = MVF; \
+ cpi->fn_ptr[BT].msvf = MSVF;
+
+#define MAKE_MBFP_SAD_WRAPPER(fnname) \
+ static unsigned int fnname##_bits8( \
+ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
+ int ref_stride, const uint8_t *m, int m_stride) { \
+ return fnname(src_ptr, source_stride, ref_ptr, ref_stride, m, m_stride); \
+ } \
+ static unsigned int fnname##_bits10( \
+ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
+ int ref_stride, const uint8_t *m, int m_stride) { \
+ return fnname(src_ptr, source_stride, ref_ptr, ref_stride, m, m_stride) >> \
+ 2; \
+ } \
+ static unsigned int fnname##_bits12( \
+ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \
+ int ref_stride, const uint8_t *m, int m_stride) { \
+ return fnname(src_ptr, source_stride, ref_ptr, ref_stride, m, m_stride) >> \
+ 4; \
+ }
+
+#if CONFIG_EXT_PARTITION
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad128x128)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad128x64)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad64x128)
+#endif // CONFIG_EXT_PARTITION
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad64x64)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad64x32)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad32x64)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad32x32)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad32x16)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad16x32)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad16x16)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad16x8)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad8x16)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad8x8)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad8x4)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x8)
+MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x4)
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC
+#define HIGHBD_OBFP(BT, OSDF, OVF, OSVF) \
+ cpi->fn_ptr[BT].osdf = OSDF; \
+ cpi->fn_ptr[BT].ovf = OVF; \
+ cpi->fn_ptr[BT].osvf = OSVF;
+
+#define MAKE_OBFP_SAD_WRAPPER(fnname) \
+ static unsigned int fnname##_bits8(const uint8_t *ref, int ref_stride, \
+ const int32_t *wsrc, \
+ const int32_t *msk) { \
+ return fnname(ref, ref_stride, wsrc, msk); \
+ } \
+ static unsigned int fnname##_bits10(const uint8_t *ref, int ref_stride, \
+ const int32_t *wsrc, \
+ const int32_t *msk) { \
+ return fnname(ref, ref_stride, wsrc, msk) >> 2; \
+ } \
+ static unsigned int fnname##_bits12(const uint8_t *ref, int ref_stride, \
+ const int32_t *wsrc, \
+ const int32_t *msk) { \
+ return fnname(ref, ref_stride, wsrc, msk) >> 4; \
+ }
+
+#if CONFIG_EXT_PARTITION
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad128x128)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad128x64)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad64x128)
+#endif // CONFIG_EXT_PARTITION
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad64x64)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad64x32)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad32x64)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad32x32)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad32x16)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad16x32)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad16x16)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad16x8)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad8x16)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad8x8)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad8x4)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad4x8)
+MAKE_OBFP_SAD_WRAPPER(vpx_highbd_obmc_sad4x4)
+#endif // CONFIG_OBMC
+
+static void highbd_set_var_fns(VP10_COMP *const cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ if (cm->use_highbitdepth) {
+ switch (cm->bit_depth) {
+ case VPX_BITS_8:
+ HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits8,
+ vpx_highbd_sad32x16_avg_bits8, vpx_highbd_8_variance32x16,
+ vpx_highbd_8_sub_pixel_variance32x16,
+ vpx_highbd_8_sub_pixel_avg_variance32x16, NULL, NULL,
+ vpx_highbd_sad32x16x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits8,
+ vpx_highbd_sad16x32_avg_bits8, vpx_highbd_8_variance16x32,
+ vpx_highbd_8_sub_pixel_variance16x32,
+ vpx_highbd_8_sub_pixel_avg_variance16x32, NULL, NULL,
+ vpx_highbd_sad16x32x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits8,
+ vpx_highbd_sad64x32_avg_bits8, vpx_highbd_8_variance64x32,
+ vpx_highbd_8_sub_pixel_variance64x32,
+ vpx_highbd_8_sub_pixel_avg_variance64x32, NULL, NULL,
+ vpx_highbd_sad64x32x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits8,
+ vpx_highbd_sad32x64_avg_bits8, vpx_highbd_8_variance32x64,
+ vpx_highbd_8_sub_pixel_variance32x64,
+ vpx_highbd_8_sub_pixel_avg_variance32x64, NULL, NULL,
+ vpx_highbd_sad32x64x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits8,
+ vpx_highbd_sad32x32_avg_bits8, vpx_highbd_8_variance32x32,
+ vpx_highbd_8_sub_pixel_variance32x32,
+ vpx_highbd_8_sub_pixel_avg_variance32x32,
+ vpx_highbd_sad32x32x3_bits8, vpx_highbd_sad32x32x8_bits8,
+ vpx_highbd_sad32x32x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits8,
+ vpx_highbd_sad64x64_avg_bits8, vpx_highbd_8_variance64x64,
+ vpx_highbd_8_sub_pixel_variance64x64,
+ vpx_highbd_8_sub_pixel_avg_variance64x64,
+ vpx_highbd_sad64x64x3_bits8, vpx_highbd_sad64x64x8_bits8,
+ vpx_highbd_sad64x64x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits8,
+ vpx_highbd_sad16x16_avg_bits8, vpx_highbd_8_variance16x16,
+ vpx_highbd_8_sub_pixel_variance16x16,
+ vpx_highbd_8_sub_pixel_avg_variance16x16,
+ vpx_highbd_sad16x16x3_bits8, vpx_highbd_sad16x16x8_bits8,
+ vpx_highbd_sad16x16x4d_bits8)
+
+ HIGHBD_BFP(
+ BLOCK_16X8, vpx_highbd_sad16x8_bits8, vpx_highbd_sad16x8_avg_bits8,
+ vpx_highbd_8_variance16x8, vpx_highbd_8_sub_pixel_variance16x8,
+ vpx_highbd_8_sub_pixel_avg_variance16x8, vpx_highbd_sad16x8x3_bits8,
+ vpx_highbd_sad16x8x8_bits8, vpx_highbd_sad16x8x4d_bits8)
+
+ HIGHBD_BFP(
+ BLOCK_8X16, vpx_highbd_sad8x16_bits8, vpx_highbd_sad8x16_avg_bits8,
+ vpx_highbd_8_variance8x16, vpx_highbd_8_sub_pixel_variance8x16,
+ vpx_highbd_8_sub_pixel_avg_variance8x16, vpx_highbd_sad8x16x3_bits8,
+ vpx_highbd_sad8x16x8_bits8, vpx_highbd_sad8x16x4d_bits8)
+
+ HIGHBD_BFP(
+ BLOCK_8X8, vpx_highbd_sad8x8_bits8, vpx_highbd_sad8x8_avg_bits8,
+ vpx_highbd_8_variance8x8, vpx_highbd_8_sub_pixel_variance8x8,
+ vpx_highbd_8_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x3_bits8,
+ vpx_highbd_sad8x8x8_bits8, vpx_highbd_sad8x8x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits8,
+ vpx_highbd_sad8x4_avg_bits8, vpx_highbd_8_variance8x4,
+ vpx_highbd_8_sub_pixel_variance8x4,
+ vpx_highbd_8_sub_pixel_avg_variance8x4, NULL,
+ vpx_highbd_sad8x4x8_bits8, vpx_highbd_sad8x4x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits8,
+ vpx_highbd_sad4x8_avg_bits8, vpx_highbd_8_variance4x8,
+ vpx_highbd_8_sub_pixel_variance4x8,
+ vpx_highbd_8_sub_pixel_avg_variance4x8, NULL,
+ vpx_highbd_sad4x8x8_bits8, vpx_highbd_sad4x8x4d_bits8)
+
+ HIGHBD_BFP(
+ BLOCK_4X4, vpx_highbd_sad4x4_bits8, vpx_highbd_sad4x4_avg_bits8,
+ vpx_highbd_8_variance4x4, vpx_highbd_8_sub_pixel_variance4x4,
+ vpx_highbd_8_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x3_bits8,
+ vpx_highbd_sad4x4x8_bits8, vpx_highbd_sad4x4x4d_bits8)
+
+#if CONFIG_EXT_PARTITION
+ HIGHBD_BFP(BLOCK_128X128, vpx_highbd_sad128x128_bits8,
+ vpx_highbd_sad128x128_avg_bits8,
+ vpx_highbd_8_variance128x128,
+ vpx_highbd_8_sub_pixel_variance128x128,
+ vpx_highbd_8_sub_pixel_avg_variance128x128,
+ vpx_highbd_sad128x128x3_bits8, vpx_highbd_sad128x128x8_bits8,
+ vpx_highbd_sad128x128x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_128X64, vpx_highbd_sad128x64_bits8,
+ vpx_highbd_sad128x64_avg_bits8, vpx_highbd_8_variance128x64,
+ vpx_highbd_8_sub_pixel_variance128x64,
+ vpx_highbd_8_sub_pixel_avg_variance128x64, NULL, NULL,
+ vpx_highbd_sad128x64x4d_bits8)
+
+ HIGHBD_BFP(BLOCK_64X128, vpx_highbd_sad64x128_bits8,
+ vpx_highbd_sad64x128_avg_bits8, vpx_highbd_8_variance64x128,
+ vpx_highbd_8_sub_pixel_variance64x128,
+ vpx_highbd_8_sub_pixel_avg_variance64x128, NULL, NULL,
+ vpx_highbd_sad64x128x4d_bits8)
+#endif // CONFIG_EXT_PARTITION
+
+#if CONFIG_EXT_INTER
+#if CONFIG_EXT_PARTITION
+ HIGHBD_MBFP(BLOCK_128X128, vpx_highbd_masked_sad128x128_bits8,
+ vpx_highbd_masked_variance128x128,
+ vpx_highbd_masked_sub_pixel_variance128x128)
+ HIGHBD_MBFP(BLOCK_128X64, vpx_highbd_masked_sad128x64_bits8,
+ vpx_highbd_masked_variance128x64,
+ vpx_highbd_masked_sub_pixel_variance128x64)
+ HIGHBD_MBFP(BLOCK_64X128, vpx_highbd_masked_sad64x128_bits8,
+ vpx_highbd_masked_variance64x128,
+ vpx_highbd_masked_sub_pixel_variance64x128)
+#endif // CONFIG_EXT_PARTITION
+ HIGHBD_MBFP(BLOCK_64X64, vpx_highbd_masked_sad64x64_bits8,
+ vpx_highbd_masked_variance64x64,
+ vpx_highbd_masked_sub_pixel_variance64x64)
+ HIGHBD_MBFP(BLOCK_64X32, vpx_highbd_masked_sad64x32_bits8,
+ vpx_highbd_masked_variance64x32,
+ vpx_highbd_masked_sub_pixel_variance64x32)
+ HIGHBD_MBFP(BLOCK_32X64, vpx_highbd_masked_sad32x64_bits8,
+ vpx_highbd_masked_variance32x64,
+ vpx_highbd_masked_sub_pixel_variance32x64)
+ HIGHBD_MBFP(BLOCK_32X32, vpx_highbd_masked_sad32x32_bits8,
+ vpx_highbd_masked_variance32x32,
+ vpx_highbd_masked_sub_pixel_variance32x32)
+ HIGHBD_MBFP(BLOCK_32X16, vpx_highbd_masked_sad32x16_bits8,
+ vpx_highbd_masked_variance32x16,
+ vpx_highbd_masked_sub_pixel_variance32x16)
+ HIGHBD_MBFP(BLOCK_16X32, vpx_highbd_masked_sad16x32_bits8,
+ vpx_highbd_masked_variance16x32,
+ vpx_highbd_masked_sub_pixel_variance16x32)
+ HIGHBD_MBFP(BLOCK_16X16, vpx_highbd_masked_sad16x16_bits8,
+ vpx_highbd_masked_variance16x16,
+ vpx_highbd_masked_sub_pixel_variance16x16)
+ HIGHBD_MBFP(BLOCK_8X16, vpx_highbd_masked_sad8x16_bits8,
+ vpx_highbd_masked_variance8x16,
+ vpx_highbd_masked_sub_pixel_variance8x16)
+ HIGHBD_MBFP(BLOCK_16X8, vpx_highbd_masked_sad16x8_bits8,
+ vpx_highbd_masked_variance16x8,
+ vpx_highbd_masked_sub_pixel_variance16x8)
+ HIGHBD_MBFP(BLOCK_8X8, vpx_highbd_masked_sad8x8_bits8,
+ vpx_highbd_masked_variance8x8,
+ vpx_highbd_masked_sub_pixel_variance8x8)
+ HIGHBD_MBFP(BLOCK_4X8, vpx_highbd_masked_sad4x8_bits8,
+ vpx_highbd_masked_variance4x8,
+ vpx_highbd_masked_sub_pixel_variance4x8)
+ HIGHBD_MBFP(BLOCK_8X4, vpx_highbd_masked_sad8x4_bits8,
+ vpx_highbd_masked_variance8x4,
+ vpx_highbd_masked_sub_pixel_variance8x4)
+ HIGHBD_MBFP(BLOCK_4X4, vpx_highbd_masked_sad4x4_bits8,
+ vpx_highbd_masked_variance4x4,
+ vpx_highbd_masked_sub_pixel_variance4x4)
+#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+#if CONFIG_EXT_PARTITION
+ HIGHBD_OBFP(BLOCK_128X128, vpx_highbd_obmc_sad128x128_bits8,
+ vpx_highbd_obmc_variance128x128,
+ vpx_highbd_obmc_sub_pixel_variance128x128)
+ HIGHBD_OBFP(BLOCK_128X64, vpx_highbd_obmc_sad128x64_bits8,
+ vpx_highbd_obmc_variance128x64,
+ vpx_highbd_obmc_sub_pixel_variance128x64)
+ HIGHBD_OBFP(BLOCK_64X128, vpx_highbd_obmc_sad64x128_bits8,
+ vpx_highbd_obmc_variance64x128,
+ vpx_highbd_obmc_sub_pixel_variance64x128)
+#endif // CONFIG_EXT_PARTITION
+ HIGHBD_OBFP(BLOCK_64X64, vpx_highbd_obmc_sad64x64_bits8,
+ vpx_highbd_obmc_variance64x64,
+ vpx_highbd_obmc_sub_pixel_variance64x64)
+ HIGHBD_OBFP(BLOCK_64X32, vpx_highbd_obmc_sad64x32_bits8,
+ vpx_highbd_obmc_variance64x32,
+ vpx_highbd_obmc_sub_pixel_variance64x32)
+ HIGHBD_OBFP(BLOCK_32X64, vpx_highbd_obmc_sad32x64_bits8,
+ vpx_highbd_obmc_variance32x64,
+ vpx_highbd_obmc_sub_pixel_variance32x64)
+ HIGHBD_OBFP(BLOCK_32X32, vpx_highbd_obmc_sad32x32_bits8,
+ vpx_highbd_obmc_variance32x32,
+ vpx_highbd_obmc_sub_pixel_variance32x32)
+ HIGHBD_OBFP(BLOCK_32X16, vpx_highbd_obmc_sad32x16_bits8,
+ vpx_highbd_obmc_variance32x16,
+ vpx_highbd_obmc_sub_pixel_variance32x16)
+ HIGHBD_OBFP(BLOCK_16X32, vpx_highbd_obmc_sad16x32_bits8,
+ vpx_highbd_obmc_variance16x32,
+ vpx_highbd_obmc_sub_pixel_variance16x32)
+ HIGHBD_OBFP(BLOCK_16X16, vpx_highbd_obmc_sad16x16_bits8,
+ vpx_highbd_obmc_variance16x16,
+ vpx_highbd_obmc_sub_pixel_variance16x16)
+ HIGHBD_OBFP(BLOCK_8X16, vpx_highbd_obmc_sad8x16_bits8,
+ vpx_highbd_obmc_variance8x16,
+ vpx_highbd_obmc_sub_pixel_variance8x16)
+ HIGHBD_OBFP(BLOCK_16X8, vpx_highbd_obmc_sad16x8_bits8,
+ vpx_highbd_obmc_variance16x8,
+ vpx_highbd_obmc_sub_pixel_variance16x8)
+ HIGHBD_OBFP(BLOCK_8X8, vpx_highbd_obmc_sad8x8_bits8,
+ vpx_highbd_obmc_variance8x8,
+ vpx_highbd_obmc_sub_pixel_variance8x8)
+ HIGHBD_OBFP(BLOCK_4X8, vpx_highbd_obmc_sad4x8_bits8,
+ vpx_highbd_obmc_variance4x8,
+ vpx_highbd_obmc_sub_pixel_variance4x8)
+ HIGHBD_OBFP(BLOCK_8X4, vpx_highbd_obmc_sad8x4_bits8,
+ vpx_highbd_obmc_variance8x4,
+ vpx_highbd_obmc_sub_pixel_variance8x4)
+ HIGHBD_OBFP(BLOCK_4X4, vpx_highbd_obmc_sad4x4_bits8,
+ vpx_highbd_obmc_variance4x4,
+ vpx_highbd_obmc_sub_pixel_variance4x4)
+#endif // CONFIG_OBMC
+ break;
+
+ case VPX_BITS_10:
+ HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits10,
+ vpx_highbd_sad32x16_avg_bits10, vpx_highbd_10_variance32x16,
+ vpx_highbd_10_sub_pixel_variance32x16,
+ vpx_highbd_10_sub_pixel_avg_variance32x16, NULL, NULL,
+ vpx_highbd_sad32x16x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits10,
+ vpx_highbd_sad16x32_avg_bits10, vpx_highbd_10_variance16x32,
+ vpx_highbd_10_sub_pixel_variance16x32,
+ vpx_highbd_10_sub_pixel_avg_variance16x32, NULL, NULL,
+ vpx_highbd_sad16x32x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits10,
+ vpx_highbd_sad64x32_avg_bits10, vpx_highbd_10_variance64x32,
+ vpx_highbd_10_sub_pixel_variance64x32,
+ vpx_highbd_10_sub_pixel_avg_variance64x32, NULL, NULL,
+ vpx_highbd_sad64x32x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits10,
+ vpx_highbd_sad32x64_avg_bits10, vpx_highbd_10_variance32x64,
+ vpx_highbd_10_sub_pixel_variance32x64,
+ vpx_highbd_10_sub_pixel_avg_variance32x64, NULL, NULL,
+ vpx_highbd_sad32x64x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits10,
+ vpx_highbd_sad32x32_avg_bits10, vpx_highbd_10_variance32x32,
+ vpx_highbd_10_sub_pixel_variance32x32,
+ vpx_highbd_10_sub_pixel_avg_variance32x32,
+ vpx_highbd_sad32x32x3_bits10, vpx_highbd_sad32x32x8_bits10,
+ vpx_highbd_sad32x32x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits10,
+ vpx_highbd_sad64x64_avg_bits10, vpx_highbd_10_variance64x64,
+ vpx_highbd_10_sub_pixel_variance64x64,
+ vpx_highbd_10_sub_pixel_avg_variance64x64,
+ vpx_highbd_sad64x64x3_bits10, vpx_highbd_sad64x64x8_bits10,
+ vpx_highbd_sad64x64x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits10,
+ vpx_highbd_sad16x16_avg_bits10, vpx_highbd_10_variance16x16,
+ vpx_highbd_10_sub_pixel_variance16x16,
+ vpx_highbd_10_sub_pixel_avg_variance16x16,
+ vpx_highbd_sad16x16x3_bits10, vpx_highbd_sad16x16x8_bits10,
+ vpx_highbd_sad16x16x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits10,
+ vpx_highbd_sad16x8_avg_bits10, vpx_highbd_10_variance16x8,
+ vpx_highbd_10_sub_pixel_variance16x8,
+ vpx_highbd_10_sub_pixel_avg_variance16x8,
+ vpx_highbd_sad16x8x3_bits10, vpx_highbd_sad16x8x8_bits10,
+ vpx_highbd_sad16x8x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits10,
+ vpx_highbd_sad8x16_avg_bits10, vpx_highbd_10_variance8x16,
+ vpx_highbd_10_sub_pixel_variance8x16,
+ vpx_highbd_10_sub_pixel_avg_variance8x16,
+ vpx_highbd_sad8x16x3_bits10, vpx_highbd_sad8x16x8_bits10,
+ vpx_highbd_sad8x16x4d_bits10)
+
+ HIGHBD_BFP(
+ BLOCK_8X8, vpx_highbd_sad8x8_bits10, vpx_highbd_sad8x8_avg_bits10,
+ vpx_highbd_10_variance8x8, vpx_highbd_10_sub_pixel_variance8x8,
+ vpx_highbd_10_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x3_bits10,
+ vpx_highbd_sad8x8x8_bits10, vpx_highbd_sad8x8x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits10,
+ vpx_highbd_sad8x4_avg_bits10, vpx_highbd_10_variance8x4,
+ vpx_highbd_10_sub_pixel_variance8x4,
+ vpx_highbd_10_sub_pixel_avg_variance8x4, NULL,
+ vpx_highbd_sad8x4x8_bits10, vpx_highbd_sad8x4x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits10,
+ vpx_highbd_sad4x8_avg_bits10, vpx_highbd_10_variance4x8,
+ vpx_highbd_10_sub_pixel_variance4x8,
+ vpx_highbd_10_sub_pixel_avg_variance4x8, NULL,
+ vpx_highbd_sad4x8x8_bits10, vpx_highbd_sad4x8x4d_bits10)
+
+ HIGHBD_BFP(
+ BLOCK_4X4, vpx_highbd_sad4x4_bits10, vpx_highbd_sad4x4_avg_bits10,
+ vpx_highbd_10_variance4x4, vpx_highbd_10_sub_pixel_variance4x4,
+ vpx_highbd_10_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x3_bits10,
+ vpx_highbd_sad4x4x8_bits10, vpx_highbd_sad4x4x4d_bits10)
+
+#if CONFIG_EXT_PARTITION
+ HIGHBD_BFP(
+ BLOCK_128X128, vpx_highbd_sad128x128_bits10,
+ vpx_highbd_sad128x128_avg_bits10, vpx_highbd_10_variance128x128,
+ vpx_highbd_10_sub_pixel_variance128x128,
+ vpx_highbd_10_sub_pixel_avg_variance128x128,
+ vpx_highbd_sad128x128x3_bits10, vpx_highbd_sad128x128x8_bits10,
+ vpx_highbd_sad128x128x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_128X64, vpx_highbd_sad128x64_bits10,
+ vpx_highbd_sad128x64_avg_bits10,
+ vpx_highbd_10_variance128x64,
+ vpx_highbd_10_sub_pixel_variance128x64,
+ vpx_highbd_10_sub_pixel_avg_variance128x64, NULL, NULL,
+ vpx_highbd_sad128x64x4d_bits10)
+
+ HIGHBD_BFP(BLOCK_64X128, vpx_highbd_sad64x128_bits10,
+ vpx_highbd_sad64x128_avg_bits10,
+ vpx_highbd_10_variance64x128,
+ vpx_highbd_10_sub_pixel_variance64x128,
+ vpx_highbd_10_sub_pixel_avg_variance64x128, NULL, NULL,
+ vpx_highbd_sad64x128x4d_bits10)
+#endif // CONFIG_EXT_PARTITION
+
+#if CONFIG_EXT_INTER
+#if CONFIG_EXT_PARTITION
+ HIGHBD_MBFP(BLOCK_128X128, vpx_highbd_masked_sad128x128_bits10,
+ vpx_highbd_10_masked_variance128x128,
+ vpx_highbd_10_masked_sub_pixel_variance128x128)
+ HIGHBD_MBFP(BLOCK_128X64, vpx_highbd_masked_sad128x64_bits10,
+ vpx_highbd_10_masked_variance128x64,
+ vpx_highbd_10_masked_sub_pixel_variance128x64)
+ HIGHBD_MBFP(BLOCK_64X128, vpx_highbd_masked_sad64x128_bits10,
+ vpx_highbd_10_masked_variance64x128,
+ vpx_highbd_10_masked_sub_pixel_variance64x128)
+#endif // CONFIG_EXT_PARTITION
+ HIGHBD_MBFP(BLOCK_64X64, vpx_highbd_masked_sad64x64_bits10,
+ vpx_highbd_10_masked_variance64x64,
+ vpx_highbd_10_masked_sub_pixel_variance64x64)
+ HIGHBD_MBFP(BLOCK_64X32, vpx_highbd_masked_sad64x32_bits10,
+ vpx_highbd_10_masked_variance64x32,
+ vpx_highbd_10_masked_sub_pixel_variance64x32)
+ HIGHBD_MBFP(BLOCK_32X64, vpx_highbd_masked_sad32x64_bits10,
+ vpx_highbd_10_masked_variance32x64,
+ vpx_highbd_10_masked_sub_pixel_variance32x64)
+ HIGHBD_MBFP(BLOCK_32X32, vpx_highbd_masked_sad32x32_bits10,
+ vpx_highbd_10_masked_variance32x32,
+ vpx_highbd_10_masked_sub_pixel_variance32x32)
+ HIGHBD_MBFP(BLOCK_32X16, vpx_highbd_masked_sad32x16_bits10,
+ vpx_highbd_10_masked_variance32x16,
+ vpx_highbd_10_masked_sub_pixel_variance32x16)
+ HIGHBD_MBFP(BLOCK_16X32, vpx_highbd_masked_sad16x32_bits10,
+ vpx_highbd_10_masked_variance16x32,
+ vpx_highbd_10_masked_sub_pixel_variance16x32)
+ HIGHBD_MBFP(BLOCK_16X16, vpx_highbd_masked_sad16x16_bits10,
+ vpx_highbd_10_masked_variance16x16,
+ vpx_highbd_10_masked_sub_pixel_variance16x16)
+ HIGHBD_MBFP(BLOCK_8X16, vpx_highbd_masked_sad8x16_bits10,
+ vpx_highbd_10_masked_variance8x16,
+ vpx_highbd_10_masked_sub_pixel_variance8x16)
+ HIGHBD_MBFP(BLOCK_16X8, vpx_highbd_masked_sad16x8_bits10,
+ vpx_highbd_10_masked_variance16x8,
+ vpx_highbd_10_masked_sub_pixel_variance16x8)
+ HIGHBD_MBFP(BLOCK_8X8, vpx_highbd_masked_sad8x8_bits10,
+ vpx_highbd_10_masked_variance8x8,
+ vpx_highbd_10_masked_sub_pixel_variance8x8)
+ HIGHBD_MBFP(BLOCK_4X8, vpx_highbd_masked_sad4x8_bits10,
+ vpx_highbd_10_masked_variance4x8,
+ vpx_highbd_10_masked_sub_pixel_variance4x8)
+ HIGHBD_MBFP(BLOCK_8X4, vpx_highbd_masked_sad8x4_bits10,
+ vpx_highbd_10_masked_variance8x4,
+ vpx_highbd_10_masked_sub_pixel_variance8x4)
+ HIGHBD_MBFP(BLOCK_4X4, vpx_highbd_masked_sad4x4_bits10,
+ vpx_highbd_10_masked_variance4x4,
+ vpx_highbd_10_masked_sub_pixel_variance4x4)
+#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC
+#if CONFIG_EXT_PARTITION
+ HIGHBD_OBFP(BLOCK_128X128, vpx_highbd_obmc_sad128x128_bits10,
+ vpx_highbd_10_obmc_variance128x128,
+ vpx_highbd_10_obmc_sub_pixel_variance128x128)
+ HIGHBD_OBFP(BLOCK_128X64, vpx_highbd_obmc_sad128x64_bits10,
+ vpx_highbd_10_obmc_variance128x64,
+ vpx_highbd_10_obmc_sub_pixel_variance128x64)
+ HIGHBD_OBFP(BLOCK_64X128, vpx_highbd_obmc_sad64x128_bits10,
+ vpx_highbd_10_obmc_variance64x128,
+ vpx_highbd_10_obmc_sub_pixel_variance64x128)
+#endif // CONFIG_EXT_PARTITION
+ HIGHBD_OBFP(BLOCK_64X64, vpx_highbd_obmc_sad64x64_bits10,
+ vpx_highbd_10_obmc_variance64x64,
+ vpx_highbd_10_obmc_sub_pixel_variance64x64)
+ HIGHBD_OBFP(BLOCK_64X32, vpx_highbd_obmc_sad64x32_bits10,
+ vpx_highbd_10_obmc_variance64x32,
+ vpx_highbd_10_obmc_sub_pixel_variance64x32)
+ HIGHBD_OBFP(BLOCK_32X64, vpx_highbd_obmc_sad32x64_bits10,
+ vpx_highbd_10_obmc_variance32x64,
+ vpx_highbd_10_obmc_sub_pixel_variance32x64)
+ HIGHBD_OBFP(BLOCK_32X32, vpx_highbd_obmc_sad32x32_bits10,
+ vpx_highbd_10_obmc_variance32x32,
+ vpx_highbd_10_obmc_sub_pixel_variance32x32)
+ HIGHBD_OBFP(BLOCK_32X16, vpx_highbd_obmc_sad32x16_bits10,
+ vpx_highbd_10_obmc_variance32x16,
+ vpx_highbd_10_obmc_sub_pixel_variance32x16)
+ HIGHBD_OBFP(BLOCK_16X32, vpx_highbd_obmc_sad16x32_bits10,
+ vpx_highbd_10_obmc_variance16x32,
+ vpx_highbd_10_obmc_sub_pixel_variance16x32)
+ HIGHBD_OBFP(BLOCK_16X16, vpx_highbd_obmc_sad16x16_bits10,
+ vpx_highbd_10_obmc_variance16x16,
+ vpx_highbd_10_obmc_sub_pixel_variance16x16)
+ HIGHBD_OBFP(BLOCK_8X16, vpx_highbd_obmc_sad8x16_bits10,
+ vpx_highbd_10_obmc_variance8x16,
+ vpx_highbd_10_obmc_sub_pixel_variance8x16)
+ HIGHBD_OBFP(BLOCK_16X8, vpx_highbd_obmc_sad16x8_bits10,
+ vpx_highbd_10_obmc_variance16x8,
+ vpx_highbd_10_obmc_sub_pixel_variance16x8)
+ HIGHBD_OBFP(BLOCK_8X8, vpx_highbd_obmc_sad8x8_bits10,
+ vpx_highbd_10_obmc_variance8x8,
+ vpx_highbd_10_obmc_sub_pixel_variance8x8)
+ HIGHBD_OBFP(BLOCK_4X8, vpx_highbd_obmc_sad4x8_bits10,
+ vpx_highbd_10_obmc_variance4x8,
+ vpx_highbd_10_obmc_sub_pixel_variance4x8)
+ HIGHBD_OBFP(BLOCK_8X4, vpx_highbd_obmc_sad8x4_bits10,
+ vpx_highbd_10_obmc_variance8x4,
+ vpx_highbd_10_obmc_sub_pixel_variance8x4)
+ HIGHBD_OBFP(BLOCK_4X4, vpx_highbd_obmc_sad4x4_bits10,
+ vpx_highbd_10_obmc_variance4x4,
+ vpx_highbd_10_obmc_sub_pixel_variance4x4)
+#endif // CONFIG_OBMC
+ break;
+
+ case VPX_BITS_12:
+ HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12,
+ vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16,
+ vpx_highbd_12_sub_pixel_variance32x16,
+ vpx_highbd_12_sub_pixel_avg_variance32x16, NULL, NULL,
+ vpx_highbd_sad32x16x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits12,
+ vpx_highbd_sad16x32_avg_bits12, vpx_highbd_12_variance16x32,
+ vpx_highbd_12_sub_pixel_variance16x32,
+ vpx_highbd_12_sub_pixel_avg_variance16x32, NULL, NULL,
+ vpx_highbd_sad16x32x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits12,
+ vpx_highbd_sad64x32_avg_bits12, vpx_highbd_12_variance64x32,
+ vpx_highbd_12_sub_pixel_variance64x32,
+ vpx_highbd_12_sub_pixel_avg_variance64x32, NULL, NULL,
+ vpx_highbd_sad64x32x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits12,
+ vpx_highbd_sad32x64_avg_bits12, vpx_highbd_12_variance32x64,
+ vpx_highbd_12_sub_pixel_variance32x64,
+ vpx_highbd_12_sub_pixel_avg_variance32x64, NULL, NULL,
+ vpx_highbd_sad32x64x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits12,
+ vpx_highbd_sad32x32_avg_bits12, vpx_highbd_12_variance32x32,
+ vpx_highbd_12_sub_pixel_variance32x32,
+ vpx_highbd_12_sub_pixel_avg_variance32x32,
+ vpx_highbd_sad32x32x3_bits12, vpx_highbd_sad32x32x8_bits12,
+ vpx_highbd_sad32x32x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits12,
+ vpx_highbd_sad64x64_avg_bits12, vpx_highbd_12_variance64x64,
+ vpx_highbd_12_sub_pixel_variance64x64,
+ vpx_highbd_12_sub_pixel_avg_variance64x64,
+ vpx_highbd_sad64x64x3_bits12, vpx_highbd_sad64x64x8_bits12,
+ vpx_highbd_sad64x64x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits12,
+ vpx_highbd_sad16x16_avg_bits12, vpx_highbd_12_variance16x16,
+ vpx_highbd_12_sub_pixel_variance16x16,
+ vpx_highbd_12_sub_pixel_avg_variance16x16,
+ vpx_highbd_sad16x16x3_bits12, vpx_highbd_sad16x16x8_bits12,
+ vpx_highbd_sad16x16x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits12,
+ vpx_highbd_sad16x8_avg_bits12, vpx_highbd_12_variance16x8,
+ vpx_highbd_12_sub_pixel_variance16x8,
+ vpx_highbd_12_sub_pixel_avg_variance16x8,
+ vpx_highbd_sad16x8x3_bits12, vpx_highbd_sad16x8x8_bits12,
+ vpx_highbd_sad16x8x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits12,
+ vpx_highbd_sad8x16_avg_bits12, vpx_highbd_12_variance8x16,
+ vpx_highbd_12_sub_pixel_variance8x16,
+ vpx_highbd_12_sub_pixel_avg_variance8x16,
+ vpx_highbd_sad8x16x3_bits12, vpx_highbd_sad8x16x8_bits12,
+ vpx_highbd_sad8x16x4d_bits12)
+
+ HIGHBD_BFP(
+ BLOCK_8X8, vpx_highbd_sad8x8_bits12, vpx_highbd_sad8x8_avg_bits12,
+ vpx_highbd_12_variance8x8, vpx_highbd_12_sub_pixel_variance8x8,
+ vpx_highbd_12_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x3_bits12,
+ vpx_highbd_sad8x8x8_bits12, vpx_highbd_sad8x8x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits12,
+ vpx_highbd_sad8x4_avg_bits12, vpx_highbd_12_variance8x4,
+ vpx_highbd_12_sub_pixel_variance8x4,
+ vpx_highbd_12_sub_pixel_avg_variance8x4, NULL,
+ vpx_highbd_sad8x4x8_bits12, vpx_highbd_sad8x4x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits12,
+ vpx_highbd_sad4x8_avg_bits12, vpx_highbd_12_variance4x8,
+ vpx_highbd_12_sub_pixel_variance4x8,
+ vpx_highbd_12_sub_pixel_avg_variance4x8, NULL,
+ vpx_highbd_sad4x8x8_bits12, vpx_highbd_sad4x8x4d_bits12)
+
+ HIGHBD_BFP(
+ BLOCK_4X4, vpx_highbd_sad4x4_bits12, vpx_highbd_sad4x4_avg_bits12,
+ vpx_highbd_12_variance4x4, vpx_highbd_12_sub_pixel_variance4x4,
+ vpx_highbd_12_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x3_bits12,
+ vpx_highbd_sad4x4x8_bits12, vpx_highbd_sad4x4x4d_bits12)
+
+#if CONFIG_EXT_PARTITION
+ HIGHBD_BFP(
+ BLOCK_128X128, vpx_highbd_sad128x128_bits12,
+ vpx_highbd_sad128x128_avg_bits12, vpx_highbd_12_variance128x128,
+ vpx_highbd_12_sub_pixel_variance128x128,
+ vpx_highbd_12_sub_pixel_avg_variance128x128,
+ vpx_highbd_sad128x128x3_bits12, vpx_highbd_sad128x128x8_bits12,
+ vpx_highbd_sad128x128x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_128X64, vpx_highbd_sad128x64_bits12,
+ vpx_highbd_sad128x64_avg_bits12,
+ vpx_highbd_12_variance128x64,
+ vpx_highbd_12_sub_pixel_variance128x64,
+ vpx_highbd_12_sub_pixel_avg_variance128x64, NULL, NULL,
+ vpx_highbd_sad128x64x4d_bits12)
+
+ HIGHBD_BFP(BLOCK_64X128, vpx_highbd_sad64x128_bits12,
+ vpx_highbd_sad64x128_avg_bits12,
+ vpx_highbd_12_variance64x128,
+ vpx_highbd_12_sub_pixel_variance64x128,
+ vpx_highbd_12_sub_pixel_avg_variance64x128, NULL, NULL,
+ vpx_highbd_sad64x128x4d_bits12)
+#endif // CONFIG_EXT_PARTITION
+
+#if CONFIG_EXT_INTER
+#if CONFIG_EXT_PARTITION
+ HIGHBD_MBFP(BLOCK_128X128, vpx_highbd_masked_sad128x128_bits12,
+ vpx_highbd_12_masked_variance128x128,
+ vpx_highbd_12_masked_sub_pixel_variance128x128)
+ HIGHBD_MBFP(BLOCK_128X64, vpx_highbd_masked_sad128x64_bits12,
+ vpx_highbd_12_masked_variance128x64,
+ vpx_highbd_12_masked_sub_pixel_variance128x64)
+ HIGHBD_MBFP(BLOCK_64X128, vpx_highbd_masked_sad64x128_bits12,
+ vpx_highbd_12_masked_variance64x128,
+ vpx_highbd_12_masked_sub_pixel_variance64x128)
+#endif // CONFIG_EXT_PARTITION
+ HIGHBD_MBFP(BLOCK_64X64, vpx_highbd_masked_sad64x64_bits12,
+ vpx_highbd_12_masked_variance64x64,
+ vpx_highbd_12_masked_sub_pixel_variance64x64)
+ HIGHBD_MBFP(BLOCK_64X32, vpx_highbd_masked_sad64x32_bits12,
+ vpx_highbd_12_masked_variance64x32,
+ vpx_highbd_12_masked_sub_pixel_variance64x32)
+ HIGHBD_MBFP(BLOCK_32X64, vpx_highbd_masked_sad32x64_bits12,
+ vpx_highbd_12_masked_variance32x64,
+ vpx_highbd_12_masked_sub_pixel_variance32x64)
+ HIGHBD_MBFP(BLOCK_32X32, vpx_highbd_masked_sad32x32_bits12,
+ vpx_highbd_12_masked_variance32x32,
+ vpx_highbd_12_masked_sub_pixel_variance32x32)
+ HIGHBD_MBFP(BLOCK_32X16, vpx_highbd_masked_sad32x16_bits12,
+ vpx_highbd_12_masked_variance32x16,
+ vpx_highbd_12_masked_sub_pixel_variance32x16)
+ HIGHBD_MBFP(BLOCK_16X32, vpx_highbd_masked_sad16x32_bits12,
+ vpx_highbd_12_masked_variance16x32,
+ vpx_highbd_12_masked_sub_pixel_variance16x32)
+ HIGHBD_MBFP(BLOCK_16X16, vpx_highbd_masked_sad16x16_bits12,
+ vpx_highbd_12_masked_variance16x16,
+ vpx_highbd_12_masked_sub_pixel_variance16x16)
+ HIGHBD_MBFP(BLOCK_8X16, vpx_highbd_masked_sad8x16_bits12,
+ vpx_highbd_12_masked_variance8x16,
+ vpx_highbd_12_masked_sub_pixel_variance8x16)
+ HIGHBD_MBFP(BLOCK_16X8, vpx_highbd_masked_sad16x8_bits12,
+ vpx_highbd_12_masked_variance16x8,
+ vpx_highbd_12_masked_sub_pixel_variance16x8)
+ HIGHBD_MBFP(BLOCK_8X8, vpx_highbd_masked_sad8x8_bits12,
+ vpx_highbd_12_masked_variance8x8,
+ vpx_highbd_12_masked_sub_pixel_variance8x8)
+ HIGHBD_MBFP(BLOCK_4X8, vpx_highbd_masked_sad4x8_bits12,
+ vpx_highbd_12_masked_variance4x8,
+ vpx_highbd_12_masked_sub_pixel_variance4x8)
+ HIGHBD_MBFP(BLOCK_8X4, vpx_highbd_masked_sad8x4_bits12,
+ vpx_highbd_12_masked_variance8x4,
+ vpx_highbd_12_masked_sub_pixel_variance8x4)
+ HIGHBD_MBFP(BLOCK_4X4, vpx_highbd_masked_sad4x4_bits12,
+ vpx_highbd_12_masked_variance4x4,
+ vpx_highbd_12_masked_sub_pixel_variance4x4)
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC
+#if CONFIG_EXT_PARTITION
+ HIGHBD_OBFP(BLOCK_128X128, vpx_highbd_obmc_sad128x128_bits12,
+ vpx_highbd_12_obmc_variance128x128,
+ vpx_highbd_12_obmc_sub_pixel_variance128x128)
+ HIGHBD_OBFP(BLOCK_128X64, vpx_highbd_obmc_sad128x64_bits12,
+ vpx_highbd_12_obmc_variance128x64,
+ vpx_highbd_12_obmc_sub_pixel_variance128x64)
+ HIGHBD_OBFP(BLOCK_64X128, vpx_highbd_obmc_sad64x128_bits12,
+ vpx_highbd_12_obmc_variance64x128,
+ vpx_highbd_12_obmc_sub_pixel_variance64x128)
+#endif // CONFIG_EXT_PARTITION
+ HIGHBD_OBFP(BLOCK_64X64, vpx_highbd_obmc_sad64x64_bits12,
+ vpx_highbd_12_obmc_variance64x64,
+ vpx_highbd_12_obmc_sub_pixel_variance64x64)
+ HIGHBD_OBFP(BLOCK_64X32, vpx_highbd_obmc_sad64x32_bits12,
+ vpx_highbd_12_obmc_variance64x32,
+ vpx_highbd_12_obmc_sub_pixel_variance64x32)
+ HIGHBD_OBFP(BLOCK_32X64, vpx_highbd_obmc_sad32x64_bits12,
+ vpx_highbd_12_obmc_variance32x64,
+ vpx_highbd_12_obmc_sub_pixel_variance32x64)
+ HIGHBD_OBFP(BLOCK_32X32, vpx_highbd_obmc_sad32x32_bits12,
+ vpx_highbd_12_obmc_variance32x32,
+ vpx_highbd_12_obmc_sub_pixel_variance32x32)
+ HIGHBD_OBFP(BLOCK_32X16, vpx_highbd_obmc_sad32x16_bits12,
+ vpx_highbd_12_obmc_variance32x16,
+ vpx_highbd_12_obmc_sub_pixel_variance32x16)
+ HIGHBD_OBFP(BLOCK_16X32, vpx_highbd_obmc_sad16x32_bits12,
+ vpx_highbd_12_obmc_variance16x32,
+ vpx_highbd_12_obmc_sub_pixel_variance16x32)
+ HIGHBD_OBFP(BLOCK_16X16, vpx_highbd_obmc_sad16x16_bits12,
+ vpx_highbd_12_obmc_variance16x16,
+ vpx_highbd_12_obmc_sub_pixel_variance16x16)
+ HIGHBD_OBFP(BLOCK_8X16, vpx_highbd_obmc_sad8x16_bits12,
+ vpx_highbd_12_obmc_variance8x16,
+ vpx_highbd_12_obmc_sub_pixel_variance8x16)
+ HIGHBD_OBFP(BLOCK_16X8, vpx_highbd_obmc_sad16x8_bits12,
+ vpx_highbd_12_obmc_variance16x8,
+ vpx_highbd_12_obmc_sub_pixel_variance16x8)
+ HIGHBD_OBFP(BLOCK_8X8, vpx_highbd_obmc_sad8x8_bits12,
+ vpx_highbd_12_obmc_variance8x8,
+ vpx_highbd_12_obmc_sub_pixel_variance8x8)
+ HIGHBD_OBFP(BLOCK_4X8, vpx_highbd_obmc_sad4x8_bits12,
+ vpx_highbd_12_obmc_variance4x8,
+ vpx_highbd_12_obmc_sub_pixel_variance4x8)
+ HIGHBD_OBFP(BLOCK_8X4, vpx_highbd_obmc_sad8x4_bits12,
+ vpx_highbd_12_obmc_variance8x4,
+ vpx_highbd_12_obmc_sub_pixel_variance8x4)
+ HIGHBD_OBFP(BLOCK_4X4, vpx_highbd_obmc_sad4x4_bits12,
+ vpx_highbd_12_obmc_variance4x4,
+ vpx_highbd_12_obmc_sub_pixel_variance4x4)
+#endif // CONFIG_OBMC
+ break;
+
+ default:
+ assert(0 &&
+ "cm->bit_depth should be VPX_BITS_8, "
+ "VPX_BITS_10 or VPX_BITS_12");
+ }
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static void realloc_segmentation_maps(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+
+ // Create the encoder segmentation map and set all entries to 0
+ vpx_free(cpi->segmentation_map);
+ CHECK_MEM_ERROR(cm, cpi->segmentation_map,
+ vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
+
+ // Create a map used for cyclic background refresh.
+ if (cpi->cyclic_refresh) vp10_cyclic_refresh_free(cpi->cyclic_refresh);
+ CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
+ vp10_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
+
+ // Create a map used to mark inactive areas.
+ vpx_free(cpi->active_map.map);
+ CHECK_MEM_ERROR(cm, cpi->active_map.map,
+ vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
+
+ // And a place holder structure is the coding context
+ // for use if we want to save and restore it
+ vpx_free(cpi->coding_context.last_frame_seg_map_copy);
+ CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy,
+ vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
+}
+
+void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) {
+ VP10_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+
+ if (cm->profile != oxcf->profile) cm->profile = oxcf->profile;
+ cm->bit_depth = oxcf->bit_depth;
+ cm->color_space = oxcf->color_space;
+ cm->color_range = oxcf->color_range;
+
+ if (cm->profile <= PROFILE_1)
+ assert(cm->bit_depth == VPX_BITS_8);
+ else
+ assert(cm->bit_depth > VPX_BITS_8);
+
+ cpi->oxcf = *oxcf;
+#if CONFIG_VP9_HIGHBITDEPTH
+ cpi->td.mb.e_mbd.bd = (int)cm->bit_depth;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_GLOBAL_MOTION
+ cpi->td.mb.e_mbd.global_motion = cm->global_motion;
+#endif // CONFIG_GLOBAL_MOTION
+
+ if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
+ rc->baseline_gf_interval = FIXED_GF_INTERVAL;
+ } else {
+ rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
+ }
+
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+#if CONFIG_EXT_REFS
+ cpi->refresh_bwd_ref_frame = 0;
+#endif // CONFIG_EXT_REFS
+
+ cm->refresh_frame_context =
+ (oxcf->error_resilient_mode || oxcf->frame_parallel_decoding_mode)
+ ? REFRESH_FRAME_CONTEXT_FORWARD
+ : REFRESH_FRAME_CONTEXT_BACKWARD;
+ cm->reset_frame_context = RESET_FRAME_CONTEXT_NONE;
+
+ cm->allow_screen_content_tools = (cpi->oxcf.content == VPX_CONTENT_SCREEN);
+ if (cm->allow_screen_content_tools) {
+ MACROBLOCK *x = &cpi->td.mb;
+ if (x->palette_buffer == 0) {
+ CHECK_MEM_ERROR(cm, x->palette_buffer,
+ vpx_memalign(16, sizeof(*x->palette_buffer)));
+ }
+ // Reallocate the pc_tree, as it's contents depends on
+ // the state of cm->allow_screen_content_tools
+ vp10_free_pc_tree(&cpi->td);
+ vp10_setup_pc_tree(&cpi->common, &cpi->td);
+ }
+
+ vp10_reset_segment_features(cm);
+ vp10_set_high_precision_mv(cpi, 0);
+
+ {
+ int i;
+
+ for (i = 0; i < MAX_SEGMENTS; i++)
+ cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
+ }
+ cpi->encode_breakout = cpi->oxcf.encode_breakout;
+
+ set_rc_buffer_sizes(rc, &cpi->oxcf);
+
+ // Under a configuration change, where maximum_buffer_size may change,
+ // keep buffer level clipped to the maximum allowed buffer size.
+ rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
+ rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size);
+
+ // Set up frame rate and related parameters rate control values.
+ vp10_new_framerate(cpi, cpi->framerate);
+
+ // Set absolute upper and lower quality limits
+ rc->worst_quality = cpi->oxcf.worst_allowed_q;
+ rc->best_quality = cpi->oxcf.best_allowed_q;
+
+ cm->interp_filter = cpi->sf.default_interp_filter;
+
+ if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) {
+ cm->render_width = cpi->oxcf.render_width;
+ cm->render_height = cpi->oxcf.render_height;
+ } else {
+ cm->render_width = cpi->oxcf.width;
+ cm->render_height = cpi->oxcf.height;
+ }
+ cm->width = cpi->oxcf.width;
+ cm->height = cpi->oxcf.height;
+
+ if (cpi->initial_width) {
+ if (cm->width > cpi->initial_width || cm->height > cpi->initial_height) {
+ vp10_free_context_buffers(cm);
+ vp10_alloc_compressor_data(cpi);
+ realloc_segmentation_maps(cpi);
+ cpi->initial_width = cpi->initial_height = 0;
+ }
+ }
+ update_frame_size(cpi);
+
+ cpi->alt_ref_source = NULL;
+ rc->is_src_frame_alt_ref = 0;
+
+#if CONFIG_EXT_REFS
+ rc->is_bwd_ref_frame = 0;
+ rc->is_last_bipred_frame = 0;
+ rc->is_bipred_frame = 0;
+#endif // CONFIG_EXT_REFS
+
+#if 0
+ // Experimental RD Code
+ cpi->frame_distortion = 0;
+ cpi->last_frame_distortion = 0;
+#endif
+
+ set_tile_info(cpi);
+
+ cpi->ext_refresh_frame_flags_pending = 0;
+ cpi->ext_refresh_frame_context_pending = 0;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ highbd_set_var_fns(cpi);
+#endif
+}
+
+#ifndef M_LOG2_E
+#define M_LOG2_E 0.693147180559945309417
+#endif
+#define log2f(x) (log(x) / (float)M_LOG2_E)
+
+#if !CONFIG_REF_MV
+static void cal_nmvjointsadcost(int *mvjointsadcost) {
+ mvjointsadcost[0] = 600;
+ mvjointsadcost[1] = 300;
+ mvjointsadcost[2] = 300;
+ mvjointsadcost[3] = 300;
+}
+#endif
+
+static void cal_nmvsadcosts(int *mvsadcost[2]) {
+ int i = 1;
+
+ mvsadcost[0][0] = 0;
+ mvsadcost[1][0] = 0;
+
+ do {
+ double z = 256 * (2 * (log2f(8 * i) + .6));
+ mvsadcost[0][i] = (int)z;
+ mvsadcost[1][i] = (int)z;
+ mvsadcost[0][-i] = (int)z;
+ mvsadcost[1][-i] = (int)z;
+ } while (++i <= MV_MAX);
+}
+
+static void cal_nmvsadcosts_hp(int *mvsadcost[2]) {
+ int i = 1;
+
+ mvsadcost[0][0] = 0;
+ mvsadcost[1][0] = 0;
+
+ do {
+ double z = 256 * (2 * (log2f(8 * i) + .6));
+ mvsadcost[0][i] = (int)z;
+ mvsadcost[1][i] = (int)z;
+ mvsadcost[0][-i] = (int)z;
+ mvsadcost[1][-i] = (int)z;
+ } while (++i <= MV_MAX);
+}
+
+static INLINE void init_upsampled_ref_frame_bufs(VP10_COMP *cpi) {
+ int i;
+
+ for (i = 0; i < (REF_FRAMES + 1); ++i) {
+ cpi->upsampled_ref_bufs[i].ref_count = 0;
+ cpi->upsampled_ref_idx[i] = INVALID_IDX;
+ }
+}
+
+VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
+ BufferPool *const pool) {
+ unsigned int i;
+ VP10_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP10_COMP));
+ VP10_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
+
+ if (!cm) return NULL;
+
+ vp10_zero(*cpi);
+
+ if (setjmp(cm->error.jmp)) {
+ cm->error.setjmp = 0;
+ vp10_remove_compressor(cpi);
+ return 0;
+ }
+
+ cm->error.setjmp = 1;
+ cm->alloc_mi = vp10_enc_alloc_mi;
+ cm->free_mi = vp10_enc_free_mi;
+ cm->setup_mi = vp10_enc_setup_mi;
+
+ CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
+ CHECK_MEM_ERROR(
+ cm, cm->frame_contexts,
+ (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
+
+ cpi->resize_state = 0;
+ cpi->resize_avg_qp = 0;
+ cpi->resize_buffer_underflow = 0;
+ cpi->common.buffer_pool = pool;
+
+ init_config(cpi, oxcf);
+ vp10_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
+
+ cm->current_video_frame = 0;
+ cpi->partition_search_skippable_frame = 0;
+ cpi->tile_data = NULL;
+ cpi->last_show_frame_buf_idx = INVALID_IDX;
+
+ realloc_segmentation_maps(cpi);
+
+#if CONFIG_REF_MV
+ for (i = 0; i < NMV_CONTEXTS; ++i) {
+ CHECK_MEM_ERROR(cm, cpi->nmv_costs[i][0],
+ vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs[i][0])));
+ CHECK_MEM_ERROR(cm, cpi->nmv_costs[i][1],
+ vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs[i][1])));
+ CHECK_MEM_ERROR(cm, cpi->nmv_costs_hp[i][0],
+ vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs_hp[i][0])));
+ CHECK_MEM_ERROR(cm, cpi->nmv_costs_hp[i][1],
+ vpx_calloc(MV_VALS, sizeof(*cpi->nmv_costs_hp[i][1])));
+ }
+#endif
+
+ CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
+ vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
+ CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
+ vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[1])));
+ CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[0],
+ vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[0])));
+ CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[1],
+ vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[1])));
+ CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[0],
+ vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[0])));
+ CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[1],
+ vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[1])));
+ CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[0],
+ vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[0])));
+ CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[1],
+ vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[1])));
+
+ for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]));
+ i++) {
+ CHECK_MEM_ERROR(
+ cm, cpi->mbgraph_stats[i].mb_stats,
+ vpx_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
+ }
+
+#if CONFIG_FP_MB_STATS
+ cpi->use_fp_mb_stats = 0;
+ if (cpi->use_fp_mb_stats) {
+ // a place holder used to store the first pass mb stats in the first pass
+ CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf,
+ vpx_calloc(cm->MBs * sizeof(uint8_t), 1));
+ } else {
+ cpi->twopass.frame_mb_stats_buf = NULL;
+ }
+#endif
+
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->multi_arf_last_grp_enabled = 0;
+
+ cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
+#if CONFIG_INTERNAL_STATS
+ cpi->b_calculate_blockiness = 1;
+ cpi->b_calculate_consistency = 1;
+ cpi->total_inconsistency = 0;
+ cpi->psnr.worst = 100.0;
+ cpi->worst_ssim = 100.0;
+
+ cpi->count = 0;
+ cpi->bytes = 0;
+
+ if (cpi->b_calculate_psnr) {
+ cpi->total_sq_error = 0;
+ cpi->total_samples = 0;
+ cpi->tot_recode_hits = 0;
+ cpi->summed_quality = 0;
+ cpi->summed_weights = 0;
+ }
+
+ cpi->fastssim.worst = 100.0;
+ cpi->psnrhvs.worst = 100.0;
+
+ if (cpi->b_calculate_blockiness) {
+ cpi->total_blockiness = 0;
+ cpi->worst_blockiness = 0.0;
+ }
+
+ if (cpi->b_calculate_consistency) {
+ CHECK_MEM_ERROR(cm, cpi->ssim_vars,
+ vpx_malloc(sizeof(*cpi->ssim_vars) * 4 *
+ cpi->common.mi_rows * cpi->common.mi_cols));
+ cpi->worst_consistency = 100.0;
+ }
+#endif
+
+ cpi->first_time_stamp_ever = INT64_MAX;
+
+#if CONFIG_REF_MV
+ for (i = 0; i < NMV_CONTEXTS; ++i) {
+ cpi->td.mb.nmvcost[i][0] = &cpi->nmv_costs[i][0][MV_MAX];
+ cpi->td.mb.nmvcost[i][1] = &cpi->nmv_costs[i][1][MV_MAX];
+ cpi->td.mb.nmvcost_hp[i][0] = &cpi->nmv_costs_hp[i][0][MV_MAX];
+ cpi->td.mb.nmvcost_hp[i][1] = &cpi->nmv_costs_hp[i][1][MV_MAX];
+ }
+#else
+ cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
+ cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
+ cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
+ cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX];
+ cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX];
+#endif
+ cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX];
+ cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX];
+ cal_nmvsadcosts(cpi->td.mb.nmvsadcost);
+
+ cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX];
+ cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX];
+ cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp);
+
+#ifdef OUTPUT_YUV_SKINMAP
+ yuv_skinmap_file = fopen("skinmap.yuv", "ab");
+#endif
+#ifdef OUTPUT_YUV_REC
+ yuv_rec_file = fopen("rec.yuv", "wb");
+#endif
+
+#if 0
+ framepsnr = fopen("framepsnr.stt", "a");
+ kf_list = fopen("kf_list.stt", "w");
+#endif
+
+ cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
+
+ if (oxcf->pass == 1) {
+ vp10_init_first_pass(cpi);
+ } else if (oxcf->pass == 2) {
+ const size_t packet_sz = sizeof(FIRSTPASS_STATS);
+ const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz);
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ const size_t psz = cpi->common.MBs * sizeof(uint8_t);
+ const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);
+
+ cpi->twopass.firstpass_mb_stats.mb_stats_start =
+ oxcf->firstpass_mb_stats_in.buf;
+ cpi->twopass.firstpass_mb_stats.mb_stats_end =
+ cpi->twopass.firstpass_mb_stats.mb_stats_start +
+ (ps - 1) * cpi->common.MBs * sizeof(uint8_t);
+ }
+#endif
+
+ cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
+ cpi->twopass.stats_in = cpi->twopass.stats_in_start;
+ cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
+
+ vp10_init_second_pass(cpi);
+ }
+
+ init_upsampled_ref_frame_bufs(cpi);
+
+ vp10_set_speed_features_framesize_independent(cpi);
+ vp10_set_speed_features_framesize_dependent(cpi);
+
+ // Allocate memory to store variances for a frame.
+ CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
+ cpi->source_var_thresh = 0;
+ cpi->frames_till_next_var_check = 0;
+
+#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF) \
+ cpi->fn_ptr[BT].sdf = SDF; \
+ cpi->fn_ptr[BT].sdaf = SDAF; \
+ cpi->fn_ptr[BT].vf = VF; \
+ cpi->fn_ptr[BT].svf = SVF; \
+ cpi->fn_ptr[BT].svaf = SVAF; \
+ cpi->fn_ptr[BT].sdx3f = SDX3F; \
+ cpi->fn_ptr[BT].sdx8f = SDX8F; \
+ cpi->fn_ptr[BT].sdx4df = SDX4DF;
+
+#if CONFIG_EXT_PARTITION
+ BFP(BLOCK_128X128, vpx_sad128x128, vpx_sad128x128_avg, vpx_variance128x128,
+ vpx_sub_pixel_variance128x128, vpx_sub_pixel_avg_variance128x128,
+ vpx_sad128x128x3, vpx_sad128x128x8, vpx_sad128x128x4d)
+
+ BFP(BLOCK_128X64, vpx_sad128x64, vpx_sad128x64_avg, vpx_variance128x64,
+ vpx_sub_pixel_variance128x64, vpx_sub_pixel_avg_variance128x64, NULL,
+ NULL, vpx_sad128x64x4d)
+
+ BFP(BLOCK_64X128, vpx_sad64x128, vpx_sad64x128_avg, vpx_variance64x128,
+ vpx_sub_pixel_variance64x128, vpx_sub_pixel_avg_variance64x128, NULL,
+ NULL, vpx_sad64x128x4d)
+#endif // CONFIG_EXT_PARTITION
+
+ BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16,
+ vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16, NULL, NULL,
+ vpx_sad32x16x4d)
+
+ BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32,
+ vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32, NULL, NULL,
+ vpx_sad16x32x4d)
+
+ BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32,
+ vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32, NULL, NULL,
+ vpx_sad64x32x4d)
+
+ BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64,
+ vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64, NULL, NULL,
+ vpx_sad32x64x4d)
+
+ BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32,
+ vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32,
+ vpx_sad32x32x3, vpx_sad32x32x8, vpx_sad32x32x4d)
+
+ BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64,
+ vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64,
+ vpx_sad64x64x3, vpx_sad64x64x8, vpx_sad64x64x4d)
+
+ BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16,
+ vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16,
+ vpx_sad16x16x3, vpx_sad16x16x8, vpx_sad16x16x4d)
+
+ BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8,
+ vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8, vpx_sad16x8x3,
+ vpx_sad16x8x8, vpx_sad16x8x4d)
+
+ BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16,
+ vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16, vpx_sad8x16x3,
+ vpx_sad8x16x8, vpx_sad8x16x4d)
+
+ BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8,
+ vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x3,
+ vpx_sad8x8x8, vpx_sad8x8x4d)
+
+ BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4,
+ vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, NULL,
+ vpx_sad8x4x8, vpx_sad8x4x4d)
+
+ BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8,
+ vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, NULL,
+ vpx_sad4x8x8, vpx_sad4x8x4d)
+
+ BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4,
+ vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x3,
+ vpx_sad4x4x8, vpx_sad4x4x4d)
+
+#if CONFIG_OBMC
+#define OBFP(BT, OSDF, OVF, OSVF) \
+ cpi->fn_ptr[BT].osdf = OSDF; \
+ cpi->fn_ptr[BT].ovf = OVF; \
+ cpi->fn_ptr[BT].osvf = OSVF;
+
+#if CONFIG_EXT_PARTITION
+ OBFP(BLOCK_128X128, vpx_obmc_sad128x128, vpx_obmc_variance128x128,
+ vpx_obmc_sub_pixel_variance128x128)
+ OBFP(BLOCK_128X64, vpx_obmc_sad128x64, vpx_obmc_variance128x64,
+ vpx_obmc_sub_pixel_variance128x64)
+ OBFP(BLOCK_64X128, vpx_obmc_sad64x128, vpx_obmc_variance64x128,
+ vpx_obmc_sub_pixel_variance64x128)
+#endif // CONFIG_EXT_PARTITION
+ OBFP(BLOCK_64X64, vpx_obmc_sad64x64, vpx_obmc_variance64x64,
+ vpx_obmc_sub_pixel_variance64x64)
+ OBFP(BLOCK_64X32, vpx_obmc_sad64x32, vpx_obmc_variance64x32,
+ vpx_obmc_sub_pixel_variance64x32)
+ OBFP(BLOCK_32X64, vpx_obmc_sad32x64, vpx_obmc_variance32x64,
+ vpx_obmc_sub_pixel_variance32x64)
+ OBFP(BLOCK_32X32, vpx_obmc_sad32x32, vpx_obmc_variance32x32,
+ vpx_obmc_sub_pixel_variance32x32)
+ OBFP(BLOCK_32X16, vpx_obmc_sad32x16, vpx_obmc_variance32x16,
+ vpx_obmc_sub_pixel_variance32x16)
+ OBFP(BLOCK_16X32, vpx_obmc_sad16x32, vpx_obmc_variance16x32,
+ vpx_obmc_sub_pixel_variance16x32)
+ OBFP(BLOCK_16X16, vpx_obmc_sad16x16, vpx_obmc_variance16x16,
+ vpx_obmc_sub_pixel_variance16x16)
+ OBFP(BLOCK_16X8, vpx_obmc_sad16x8, vpx_obmc_variance16x8,
+ vpx_obmc_sub_pixel_variance16x8)
+ OBFP(BLOCK_8X16, vpx_obmc_sad8x16, vpx_obmc_variance8x16,
+ vpx_obmc_sub_pixel_variance8x16)
+ OBFP(BLOCK_8X8, vpx_obmc_sad8x8, vpx_obmc_variance8x8,
+ vpx_obmc_sub_pixel_variance8x8)
+ OBFP(BLOCK_4X8, vpx_obmc_sad4x8, vpx_obmc_variance4x8,
+ vpx_obmc_sub_pixel_variance4x8)
+ OBFP(BLOCK_8X4, vpx_obmc_sad8x4, vpx_obmc_variance8x4,
+ vpx_obmc_sub_pixel_variance8x4)
+ OBFP(BLOCK_4X4, vpx_obmc_sad4x4, vpx_obmc_variance4x4,
+ vpx_obmc_sub_pixel_variance4x4)
+#endif // CONFIG_OBMC
+
+#if CONFIG_EXT_INTER
+#define MBFP(BT, MSDF, MVF, MSVF) \
+ cpi->fn_ptr[BT].msdf = MSDF; \
+ cpi->fn_ptr[BT].mvf = MVF; \
+ cpi->fn_ptr[BT].msvf = MSVF;
+
+#if CONFIG_EXT_PARTITION
+ MBFP(BLOCK_128X128, vpx_masked_sad128x128, vpx_masked_variance128x128,
+ vpx_masked_sub_pixel_variance128x128)
+ MBFP(BLOCK_128X64, vpx_masked_sad128x64, vpx_masked_variance128x64,
+ vpx_masked_sub_pixel_variance128x64)
+ MBFP(BLOCK_64X128, vpx_masked_sad64x128, vpx_masked_variance64x128,
+ vpx_masked_sub_pixel_variance64x128)
+#endif // CONFIG_EXT_PARTITION
+ MBFP(BLOCK_64X64, vpx_masked_sad64x64, vpx_masked_variance64x64,
+ vpx_masked_sub_pixel_variance64x64)
+ MBFP(BLOCK_64X32, vpx_masked_sad64x32, vpx_masked_variance64x32,
+ vpx_masked_sub_pixel_variance64x32)
+ MBFP(BLOCK_32X64, vpx_masked_sad32x64, vpx_masked_variance32x64,
+ vpx_masked_sub_pixel_variance32x64)
+ MBFP(BLOCK_32X32, vpx_masked_sad32x32, vpx_masked_variance32x32,
+ vpx_masked_sub_pixel_variance32x32)
+ MBFP(BLOCK_32X16, vpx_masked_sad32x16, vpx_masked_variance32x16,
+ vpx_masked_sub_pixel_variance32x16)
+ MBFP(BLOCK_16X32, vpx_masked_sad16x32, vpx_masked_variance16x32,
+ vpx_masked_sub_pixel_variance16x32)
+ MBFP(BLOCK_16X16, vpx_masked_sad16x16, vpx_masked_variance16x16,
+ vpx_masked_sub_pixel_variance16x16)
+ MBFP(BLOCK_16X8, vpx_masked_sad16x8, vpx_masked_variance16x8,
+ vpx_masked_sub_pixel_variance16x8)
+ MBFP(BLOCK_8X16, vpx_masked_sad8x16, vpx_masked_variance8x16,
+ vpx_masked_sub_pixel_variance8x16)
+ MBFP(BLOCK_8X8, vpx_masked_sad8x8, vpx_masked_variance8x8,
+ vpx_masked_sub_pixel_variance8x8)
+ MBFP(BLOCK_4X8, vpx_masked_sad4x8, vpx_masked_variance4x8,
+ vpx_masked_sub_pixel_variance4x8)
+ MBFP(BLOCK_8X4, vpx_masked_sad8x4, vpx_masked_variance8x4,
+ vpx_masked_sub_pixel_variance8x4)
+ MBFP(BLOCK_4X4, vpx_masked_sad4x4, vpx_masked_variance4x4,
+ vpx_masked_sub_pixel_variance4x4)
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ highbd_set_var_fns(cpi);
+#endif
+
+ /* vp10_init_quantizer() is first called here. Add check in
+ * vp10_frame_init_quantizer() so that vp10_init_quantizer is only
+ * called later when needed. This will avoid unnecessary calls of
+ * vp10_init_quantizer() for every frame.
+ */
+ vp10_init_quantizer(cpi);
+#if CONFIG_AOM_QM
+ aom_qm_init(cm);
+#endif
+
+ vp10_loop_filter_init(cm);
+#if CONFIG_LOOP_RESTORATION
+ vp10_loop_restoration_precal();
+#endif // CONFIG_LOOP_RESTORATION
+
+ cm->error.setjmp = 0;
+
+ return cpi;
+}
+
+#define SNPRINT(H, T) snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T))
+
+#define SNPRINT2(H, T, V) \
+ snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
+
+void vp10_remove_compressor(VP10_COMP *cpi) {
+ VP10_COMMON *cm;
+ unsigned int i;
+ int t;
+
+ if (!cpi) return;
+
+ cm = &cpi->common;
+ if (cm->current_video_frame > 0) {
+#if CONFIG_INTERNAL_STATS
+ vpx_clear_system_state();
+
+ if (cpi->oxcf.pass != 1) {
+ char headings[512] = { 0 };
+ char results[512] = { 0 };
+ FILE *f = fopen("opsnr.stt", "a");
+ double time_encoded =
+ (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
+ 10000000.000;
+ double total_encode_time =
+ (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
+ const double dr =
+ (double)cpi->bytes * (double)8 / (double)1000 / time_encoded;
+ const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
+ const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
+ const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
+
+ if (cpi->b_calculate_psnr) {
+ const double total_psnr = vpx_sse_to_psnr(
+ (double)cpi->total_samples, peak, (double)cpi->total_sq_error);
+ const double total_ssim =
+ 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
+ snprintf(headings, sizeof(headings),
+ "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
+ "VPXSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
+ "WstPsnr\tWstSsim\tWstFast\tWstHVS");
+ snprintf(results, sizeof(results),
+ "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
+ "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
+ "%7.3f\t%7.3f\t%7.3f\t%7.3f",
+ dr, cpi->psnr.stat[ALL] / cpi->count, total_psnr,
+ cpi->psnr.stat[ALL] / cpi->count, total_psnr, total_ssim,
+ total_ssim, cpi->fastssim.stat[ALL] / cpi->count,
+ cpi->psnrhvs.stat[ALL] / cpi->count, cpi->psnr.worst,
+ cpi->worst_ssim, cpi->fastssim.worst, cpi->psnrhvs.worst);
+
+ if (cpi->b_calculate_blockiness) {
+ SNPRINT(headings, "\t Block\tWstBlck");
+ SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count);
+ SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness);
+ }
+
+ if (cpi->b_calculate_consistency) {
+ double consistency =
+ vpx_sse_to_psnr((double)cpi->total_samples, peak,
+ (double)cpi->total_inconsistency);
+
+ SNPRINT(headings, "\tConsist\tWstCons");
+ SNPRINT2(results, "\t%7.3f", consistency);
+ SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
+ }
+
+ fprintf(f, "%s\t Time Rc-Err Abs Err\n", headings);
+ fprintf(f, "%s\t%8.0f %7.2f %7.2f\n", results, total_encode_time,
+ rate_err, fabs(rate_err));
+ }
+
+ fclose(f);
+ }
+
+#endif
+
+#if 0
+ {
+ printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000);
+ printf("\n_frames recive_data encod_mb_row compress_frame Total\n");
+ printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame,
+ cpi->time_receive_data / 1000, cpi->time_encode_sb_row / 1000,
+ cpi->time_compress_data / 1000,
+ (cpi->time_receive_data + cpi->time_compress_data) / 1000);
+ }
+#endif
+ }
+
+ for (t = 0; t < cpi->num_workers; ++t) {
+ VPxWorker *const worker = &cpi->workers[t];
+ EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
+
+ // Deallocate allocated threads.
+ vpx_get_worker_interface()->end(worker);
+
+ // Deallocate allocated thread data.
+ if (t < cpi->num_workers - 1) {
+ if (cpi->common.allow_screen_content_tools)
+ vpx_free(thread_data->td->mb.palette_buffer);
+ vpx_free(thread_data->td->counts);
+ vp10_free_pc_tree(thread_data->td);
+ vp10_free_var_tree(thread_data->td);
+ vpx_free(thread_data->td);
+ }
+ }
+ vpx_free(cpi->tile_thr_data);
+ vpx_free(cpi->workers);
+
+ if (cpi->num_workers > 1) vp10_loop_filter_dealloc(&cpi->lf_row_sync);
+
+ dealloc_compressor_data(cpi);
+
+ for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]);
+ ++i) {
+ vpx_free(cpi->mbgraph_stats[i].mb_stats);
+ }
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ vpx_free(cpi->twopass.frame_mb_stats_buf);
+ cpi->twopass.frame_mb_stats_buf = NULL;
+ }
+#endif
+
+ vp10_remove_common(cm);
+ vp10_free_ref_frame_buffers(cm->buffer_pool);
+ vpx_free(cpi);
+
+#ifdef OUTPUT_YUV_SKINMAP
+ fclose(yuv_skinmap_file);
+#endif
+#ifdef OUTPUT_YUV_REC
+ fclose(yuv_rec_file);
+#endif
+
+#if 0
+
+ if (keyfile)
+ fclose(keyfile);
+
+ if (framepsnr)
+ fclose(framepsnr);
+
+ if (kf_list)
+ fclose(kf_list);
+
+#endif
+}
+
+static void generate_psnr_packet(VP10_COMP *cpi) {
+ struct vpx_codec_cx_pkt pkt;
+ int i;
+ PSNR_STATS psnr;
+#if CONFIG_VP9_HIGHBITDEPTH
+ vpx_calc_highbd_psnr(cpi->Source, cpi->common.frame_to_show, &psnr,
+ cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
+#else
+ vpx_calc_psnr(cpi->Source, cpi->common.frame_to_show, &psnr);
+#endif
+
+ for (i = 0; i < 4; ++i) {
+ pkt.data.psnr.samples[i] = psnr.samples[i];
+ pkt.data.psnr.sse[i] = psnr.sse[i];
+ pkt.data.psnr.psnr[i] = psnr.psnr[i];
+ }
+ pkt.kind = VPX_CODEC_PSNR_PKT;
+ vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
+}
+
+int vp10_use_as_reference(VP10_COMP *cpi, int ref_frame_flags) {
+ if (ref_frame_flags > ((1 << INTER_REFS_PER_FRAME) - 1)) return -1;
+
+ cpi->ref_frame_flags = ref_frame_flags;
+ return 0;
+}
+
+void vp10_update_reference(VP10_COMP *cpi, int ref_frame_flags) {
+ cpi->ext_refresh_golden_frame = (ref_frame_flags & VPX_GOLD_FLAG) != 0;
+ cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VPX_ALT_FLAG) != 0;
+ cpi->ext_refresh_last_frame = (ref_frame_flags & VPX_LAST_FLAG) != 0;
+ cpi->ext_refresh_frame_flags_pending = 1;
+}
+
+static YV12_BUFFER_CONFIG *get_vp10_ref_frame_buffer(
+ VP10_COMP *cpi, VPX_REFFRAME ref_frame_flag) {
+ MV_REFERENCE_FRAME ref_frame = NONE;
+ if (ref_frame_flag == VPX_LAST_FLAG) ref_frame = LAST_FRAME;
+#if CONFIG_EXT_REFS
+ else if (ref_frame_flag == VPX_LAST2_FLAG)
+ ref_frame = LAST2_FRAME;
+ else if (ref_frame_flag == VPX_LAST3_FLAG)
+ ref_frame = LAST3_FRAME;
+#endif // CONFIG_EXT_REFS
+ else if (ref_frame_flag == VPX_GOLD_FLAG)
+ ref_frame = GOLDEN_FRAME;
+#if CONFIG_EXT_REFS
+ else if (ref_frame_flag == VPX_BWD_FLAG)
+ ref_frame = BWDREF_FRAME;
+#endif // CONFIG_EXT_REFS
+ else if (ref_frame_flag == VPX_ALT_FLAG)
+ ref_frame = ALTREF_FRAME;
+
+ return ref_frame == NONE ? NULL : get_ref_frame_buffer(cpi, ref_frame);
+}
+
+int vp10_copy_reference_enc(VP10_COMP *cpi, VPX_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd) {
+ YV12_BUFFER_CONFIG *cfg = get_vp10_ref_frame_buffer(cpi, ref_frame_flag);
+ if (cfg) {
+ vpx_yv12_copy_frame(cfg, sd);
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+int vp10_set_reference_enc(VP10_COMP *cpi, VPX_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd) {
+ YV12_BUFFER_CONFIG *cfg = get_vp10_ref_frame_buffer(cpi, ref_frame_flag);
+ if (cfg) {
+ vpx_yv12_copy_frame(sd, cfg);
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+int vp10_update_entropy(VP10_COMP *cpi, int update) {
+ cpi->ext_refresh_frame_context = update;
+ cpi->ext_refresh_frame_context_pending = 1;
+ return 0;
+}
+
+#if defined(OUTPUT_YUV_DENOISED) || defined(OUTPUT_YUV_SKINMAP)
+// The denoiser buffer is allocated as a YUV 440 buffer. This function writes it
+// as YUV 420. We simply use the top-left pixels of the UV buffers, since we do
+// not denoise the UV channels at this time. If ever we implement UV channel
+// denoising we will have to modify this.
+void vp10_write_yuv_frame_420(YV12_BUFFER_CONFIG *s, FILE *f) {
+ uint8_t *src = s->y_buffer;
+ int h = s->y_height;
+
+ do {
+ fwrite(src, s->y_width, 1, f);
+ src += s->y_stride;
+ } while (--h);
+
+ src = s->u_buffer;
+ h = s->uv_height;
+
+ do {
+ fwrite(src, s->uv_width, 1, f);
+ src += s->uv_stride;
+ } while (--h);
+
+ src = s->v_buffer;
+ h = s->uv_height;
+
+ do {
+ fwrite(src, s->uv_width, 1, f);
+ src += s->uv_stride;
+ } while (--h);
+}
+#endif
+
+#if CONFIG_EXT_REFS
+static void check_show_existing_frame(VP10_COMP *cpi) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ VP10_COMMON *const cm = &cpi->common;
+ const FRAME_UPDATE_TYPE next_frame_update_type =
+ gf_group->update_type[gf_group->index];
+ const int which_arf = gf_group->arf_update_idx[gf_group->index];
+ if (cpi->rc.is_last_bipred_frame) {
+ // NOTE(zoeliu): If the current frame is a last bi-predictive frame, it is
+ // needed next to show the BWDREF_FRAME, which is pointed by
+ // the last_fb_idxes[0] after reference frame buffer update
+ cpi->rc.is_last_bipred_frame = 0;
+ cm->show_existing_frame = 1;
+ cpi->existing_fb_idx_to_show = cpi->lst_fb_idxes[0];
+ } else if (cpi->is_arf_filter_off[which_arf] &&
+ (next_frame_update_type == OVERLAY_UPDATE ||
+ next_frame_update_type == INTNL_OVERLAY_UPDATE)) {
+ // Other parameters related to OVERLAY_UPDATE will be taken care of
+ // in vp10_rc_get_second_pass_params(cpi)
+ cm->show_existing_frame = 1;
+ cpi->rc.is_src_frame_alt_ref = 1;
+ cpi->existing_fb_idx_to_show = cpi->alt_fb_idx;
+ cpi->is_arf_filter_off[which_arf] = 0;
+ } else {
+ cm->show_existing_frame = 0;
+ }
+ cpi->rc.is_src_frame_ext_arf = 0;
+}
+#endif // CONFIG_EXT_REFS
+
+#ifdef OUTPUT_YUV_REC
+void vp10_write_one_yuv_frame(VP10_COMMON *cm, YV12_BUFFER_CONFIG *s) {
+ uint8_t *src = s->y_buffer;
+ int h = cm->height;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (s->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer);
+
+ do {
+ fwrite(src16, s->y_width, 2, yuv_rec_file);
+ src16 += s->y_stride;
+ } while (--h);
+
+ src16 = CONVERT_TO_SHORTPTR(s->u_buffer);
+ h = s->uv_height;
+
+ do {
+ fwrite(src16, s->uv_width, 2, yuv_rec_file);
+ src16 += s->uv_stride;
+ } while (--h);
+
+ src16 = CONVERT_TO_SHORTPTR(s->v_buffer);
+ h = s->uv_height;
+
+ do {
+ fwrite(src16, s->uv_width, 2, yuv_rec_file);
+ src16 += s->uv_stride;
+ } while (--h);
+
+ fflush(yuv_rec_file);
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ do {
+ fwrite(src, s->y_width, 1, yuv_rec_file);
+ src += s->y_stride;
+ } while (--h);
+
+ src = s->u_buffer;
+ h = s->uv_height;
+
+ do {
+ fwrite(src, s->uv_width, 1, yuv_rec_file);
+ src += s->uv_stride;
+ } while (--h);
+
+ src = s->v_buffer;
+ h = s->uv_height;
+
+ do {
+ fwrite(src, s->uv_width, 1, yuv_rec_file);
+ src += s->uv_stride;
+ } while (--h);
+
+ fflush(yuv_rec_file);
+}
+#endif // OUTPUT_YUV_REC
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst,
+ int bd) {
+#else
+static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst) {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t
+ int i;
+ const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
+ src->v_buffer };
+ const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
+ const int src_widths[3] = { src->y_crop_width, src->uv_crop_width,
+ src->uv_crop_width };
+ const int src_heights[3] = { src->y_crop_height, src->uv_crop_height,
+ src->uv_crop_height };
+ uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
+ const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
+ const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width,
+ dst->uv_crop_width };
+ const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height,
+ dst->uv_crop_height };
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp10_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i],
+ src_strides[i], dsts[i], dst_heights[i],
+ dst_widths[i], dst_strides[i], bd);
+ } else {
+ vp10_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
+ dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
+ }
+#else
+ vp10_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
+ dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ vpx_extend_frame_borders(dst);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst, int planes,
+ int bd) {
+#else
+static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst, int planes) {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ const int src_w = src->y_crop_width;
+ const int src_h = src->y_crop_height;
+ const int dst_w = dst->y_crop_width;
+ const int dst_h = dst->y_crop_height;
+ const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer,
+ src->v_buffer };
+ const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
+ uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer };
+ const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride };
+ const InterpFilterParams interp_filter_params =
+ vp10_get_interp_filter_params(EIGHTTAP_REGULAR);
+ const int16_t *kernel = interp_filter_params.filter_ptr;
+ const int taps = interp_filter_params.taps;
+ int x, y, i;
+
+ for (y = 0; y < dst_h; y += 16) {
+ for (x = 0; x < dst_w; x += 16) {
+ for (i = 0; i < planes; ++i) {
+ const int factor = (i == 0 || i == 3 ? 1 : 2);
+ const int x_q4 = x * (16 / factor) * src_w / dst_w;
+ const int y_q4 = y * (16 / factor) * src_h / dst_h;
+ const int src_stride = src_strides[i];
+ const int dst_stride = dst_strides[i];
+ const uint8_t *src_ptr = srcs[i] +
+ (y / factor) * src_h / dst_h * src_stride +
+ (x / factor) * src_w / dst_w;
+ uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vpx_highbd_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
+ &kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w,
+ &kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
+ 16 / factor, 16 / factor, bd);
+ } else {
+ vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
+ &kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w,
+ &kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
+ 16 / factor, 16 / factor);
+ }
+#else
+ vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
+ &kernel[(x_q4 & 0xf) * taps], 16 * src_w / dst_w,
+ &kernel[(y_q4 & 0xf) * taps], 16 * src_h / dst_h,
+ 16 / factor, 16 / factor);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ }
+ }
+
+ if (planes == 1)
+ vpx_extend_frame_borders_y(dst);
+ else
+ vpx_extend_frame_borders(dst);
+}
+
+static int scale_down(VP10_COMP *cpi, int q) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ int scale = 0;
+ assert(frame_is_kf_gf_arf(cpi));
+
+ if (rc->frame_size_selector == UNSCALED &&
+ q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) {
+ const int max_size_thresh =
+ (int)(rate_thresh_mult[SCALE_STEP1] *
+ VPXMAX(rc->this_frame_target, rc->avg_frame_bandwidth));
+ scale = rc->projected_frame_size > max_size_thresh ? 1 : 0;
+ }
+ return scale;
+}
+
+// Function to test for conditions that indicate we should loop
+// back and recode a frame.
+static int recode_loop_test(VP10_COMP *cpi, int high_limit, int low_limit,
+ int q, int maxq, int minq) {
+ const RATE_CONTROL *const rc = &cpi->rc;
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ const int frame_is_kfgfarf = frame_is_kf_gf_arf(cpi);
+ int force_recode = 0;
+
+ if ((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
+ (cpi->sf.recode_loop == ALLOW_RECODE) ||
+ (frame_is_kfgfarf && (cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF))) {
+ if (frame_is_kfgfarf && (oxcf->resize_mode == RESIZE_DYNAMIC) &&
+ scale_down(cpi, q)) {
+ // Code this group at a lower resolution.
+ cpi->resize_pending = 1;
+ return 1;
+ }
+
+ // TODO(agrange) high_limit could be greater than the scale-down threshold.
+ if ((rc->projected_frame_size > high_limit && q < maxq) ||
+ (rc->projected_frame_size < low_limit && q > minq)) {
+ force_recode = 1;
+ } else if (cpi->oxcf.rc_mode == VPX_CQ) {
+ // Deal with frame undershoot and whether or not we are
+ // below the automatically set cq level.
+ if (q > oxcf->cq_level &&
+ rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) {
+ force_recode = 1;
+ }
+ }
+ }
+ return force_recode;
+}
+
+static INLINE int get_free_upsampled_ref_buf(EncRefCntBuffer *ubufs) {
+ int i;
+
+ for (i = 0; i < (REF_FRAMES + 1); i++) {
+ if (!ubufs[i].ref_count) {
+ return i;
+ }
+ }
+ return INVALID_IDX;
+}
+
+// Up-sample 1 reference frame.
+static INLINE int upsample_ref_frame(VP10_COMP *cpi,
+ const YV12_BUFFER_CONFIG *const ref) {
+ VP10_COMMON *const cm = &cpi->common;
+ EncRefCntBuffer *ubufs = cpi->upsampled_ref_bufs;
+ int new_uidx = get_free_upsampled_ref_buf(ubufs);
+
+ if (new_uidx == INVALID_IDX) {
+ return INVALID_IDX;
+ } else {
+ YV12_BUFFER_CONFIG *upsampled_ref = &ubufs[new_uidx].buf;
+
+ // Can allocate buffer for Y plane only.
+ if (upsampled_ref->buffer_alloc_sz < (ref->buffer_alloc_sz << 6))
+ if (vpx_realloc_frame_buffer(upsampled_ref, (cm->width << 3),
+ (cm->height << 3), cm->subsampling_x,
+ cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ (VPX_ENC_BORDER_IN_PIXELS << 3),
+ cm->byte_alignment, NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate up-sampled frame buffer");
+
+// Currently, only Y plane is up-sampled, U, V are not used.
+#if CONFIG_VP9_HIGHBITDEPTH
+ scale_and_extend_frame(ref, upsampled_ref, 1, (int)cm->bit_depth);
+#else
+ scale_and_extend_frame(ref, upsampled_ref, 1);
+#endif
+ return new_uidx;
+ }
+}
+
+#define DUMP_REF_FRAME_IMAGES 0
+
+#if DUMP_REF_FRAME_IMAGES == 1
+static int dump_one_image(VP10_COMMON *cm,
+ const YV12_BUFFER_CONFIG *const ref_buf,
+ char *file_name) {
+ int h;
+ FILE *f_ref = NULL;
+
+ if (ref_buf == NULL) {
+ printf("Frame data buffer is NULL.\n");
+ return VPX_CODEC_MEM_ERROR;
+ }
+
+ if ((f_ref = fopen(file_name, "wb")) == NULL) {
+ printf("Unable to open file %s to write.\n", file_name);
+ return VPX_CODEC_MEM_ERROR;
+ }
+
+ // --- Y ---
+ for (h = 0; h < cm->height; ++h) {
+ fwrite(&ref_buf->y_buffer[h * ref_buf->y_stride], 1, cm->width, f_ref);
+ }
+ // --- U ---
+ for (h = 0; h < (cm->height >> 1); ++h) {
+ fwrite(&ref_buf->u_buffer[h * ref_buf->uv_stride], 1, (cm->width >> 1),
+ f_ref);
+ }
+ // --- V ---
+ for (h = 0; h < (cm->height >> 1); ++h) {
+ fwrite(&ref_buf->v_buffer[h * ref_buf->uv_stride], 1, (cm->width >> 1),
+ f_ref);
+ }
+
+ fclose(f_ref);
+
+ return VPX_CODEC_OK;
+}
+
+static void dump_ref_frame_images(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ MV_REFERENCE_FRAME ref_frame;
+
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ char file_name[256] = "";
+ snprintf(file_name, sizeof(file_name), "/tmp/enc_F%d_ref_%d.yuv",
+ cm->current_video_frame, ref_frame);
+ dump_one_image(cm, get_ref_frame_buffer(cpi, ref_frame), file_name);
+ }
+}
+#endif // DUMP_REF_FRAME_IMAGES == 1
+
+#if CONFIG_EXT_REFS
+// This function is used to shift the virtual indices of last reference frames
+// as follows:
+// LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME
+// when the LAST_FRAME is updated.
+static INLINE void shift_last_ref_frames(VP10_COMP *cpi) {
+ int ref_frame;
+ for (ref_frame = LAST_REF_FRAMES - 1; ref_frame > 0; --ref_frame) {
+ cpi->lst_fb_idxes[ref_frame] = cpi->lst_fb_idxes[ref_frame - 1];
+
+ // [0] is allocated to the current coded frame. The statistics for the
+ // reference frames start at [1].
+ if (!cpi->rc.is_src_frame_alt_ref) {
+ memcpy(cpi->interp_filter_selected[ref_frame + 1],
+ cpi->interp_filter_selected[ref_frame],
+ sizeof(cpi->interp_filter_selected[ref_frame]));
+ }
+ }
+}
+#endif
+
+void vp10_update_reference_frames(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ BufferPool *const pool = cm->buffer_pool;
+ const int use_upsampled_ref = cpi->sf.use_upsampled_references;
+ int new_uidx = 0;
+
+ // NOTE: Save the new show frame buffer index for --test-code=warn, i.e.,
+ // for the purpose to verify no mismatch between encoder and decoder.
+ if (cm->show_frame) cpi->last_show_frame_buf_idx = cm->new_fb_idx;
+
+ if (use_upsampled_ref) {
+#if CONFIG_EXT_REFS
+ if (cm->show_existing_frame) {
+ new_uidx = cpi->upsampled_ref_idx[cpi->existing_fb_idx_to_show];
+ // TODO(zoeliu): Once following is confirmed, remove it.
+ assert(cpi->upsampled_ref_bufs[new_uidx].ref_count > 0);
+ } else {
+#endif // CONFIG_EXT_REFS
+ // Up-sample the current encoded frame.
+ RefCntBuffer *bufs = pool->frame_bufs;
+ const YV12_BUFFER_CONFIG *const ref = &bufs[cm->new_fb_idx].buf;
+
+ new_uidx = upsample_ref_frame(cpi, ref);
+#if CONFIG_EXT_REFS
+ assert(new_uidx != INVALID_IDX);
+ }
+#endif // CONFIG_EXT_REFS
+ }
+ // At this point the new frame has been encoded.
+ // If any buffer copy / swapping is signaled it should be done here.
+ if (cm->frame_type == KEY_FRAME) {
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
+ cm->new_fb_idx);
+#if CONFIG_EXT_REFS
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->bwd_fb_idx],
+ cm->new_fb_idx);
+#endif // CONFIG_EXT_REFS
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
+ cm->new_fb_idx);
+
+ if (use_upsampled_ref) {
+ uref_cnt_fb(cpi->upsampled_ref_bufs,
+ &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
+#if CONFIG_EXT_REFS
+ uref_cnt_fb(cpi->upsampled_ref_bufs,
+ &cpi->upsampled_ref_idx[cpi->bwd_fb_idx], new_uidx);
+#endif // CONFIG_EXT_REFS
+ uref_cnt_fb(cpi->upsampled_ref_bufs,
+ &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
+ }
+ } else if (vp10_preserve_existing_gf(cpi)) {
+ // We have decided to preserve the previously existing golden frame as our
+ // new ARF frame. However, in the short term in function
+ // vp10_bitstream.c::get_refresh_mask() we left it in the GF slot and, if
+ // we're updating the GF with the current decoded frame, we save it to the
+ // ARF slot instead.
+ // We now have to update the ARF with the current frame and swap gld_fb_idx
+ // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF
+ // slot and, if we're updating the GF, the current frame becomes the new GF.
+ int tmp;
+
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx],
+ cm->new_fb_idx);
+ if (use_upsampled_ref)
+ uref_cnt_fb(cpi->upsampled_ref_bufs,
+ &cpi->upsampled_ref_idx[cpi->alt_fb_idx], new_uidx);
+
+ tmp = cpi->alt_fb_idx;
+ cpi->alt_fb_idx = cpi->gld_fb_idx;
+ cpi->gld_fb_idx = tmp;
+
+#if CONFIG_EXT_REFS
+ // We need to modify the mapping accordingly
+ cpi->arf_map[0] = cpi->alt_fb_idx;
+#endif
+// TODO(zoeliu): Do we need to copy cpi->interp_filter_selected[0] over to
+// cpi->interp_filter_selected[GOLDEN_FRAME]?
+#if CONFIG_EXT_REFS
+ } else if (cpi->rc.is_last_bipred_frame) {
+ // Refresh the LAST_FRAME with the BWDREF_FRAME and retire the LAST3_FRAME
+ // by updating the virtual indices. Note that the frame BWDREF_FRAME points
+ // to now should be retired, and it should not be used before refreshed.
+ int tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1];
+
+ shift_last_ref_frames(cpi);
+
+ cpi->lst_fb_idxes[0] = cpi->bwd_fb_idx;
+ if (!cpi->rc.is_src_frame_alt_ref) {
+ memcpy(cpi->interp_filter_selected[0],
+ cpi->interp_filter_selected[BWDREF_FRAME],
+ sizeof(cpi->interp_filter_selected[BWDREF_FRAME]));
+ }
+ cpi->bwd_fb_idx = tmp;
+#endif // CONFIG_EXT_REFS
+#if CONFIG_EXT_REFS
+ } else if (cpi->rc.is_src_frame_ext_arf && cm->show_existing_frame) {
+ // Deal with the special case for showing existing internal ALTREF_FRAME
+ // Refresh the LAST_FRAME with the ALTREF_FRAME and retire the LAST3_FRAME
+ // by updating the virtual indices.
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ int which_arf = gf_group->arf_ref_idx[gf_group->index];
+ int tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1];
+
+ shift_last_ref_frames(cpi);
+
+ cpi->lst_fb_idxes[0] = cpi->alt_fb_idx;
+ memcpy(cpi->interp_filter_selected[LAST_FRAME],
+ cpi->interp_filter_selected[ALTREF_FRAME + which_arf],
+ sizeof(cpi->interp_filter_selected[ALTREF_FRAME + which_arf]));
+
+ cpi->alt_fb_idx = tmp;
+ // We need to modify the mapping accordingly
+ cpi->arf_map[which_arf] = cpi->alt_fb_idx;
+#endif // CONFIG_EXT_REFS
+ } else { /* For non key/golden frames */
+ if (cpi->refresh_alt_ref_frame) {
+ int arf_idx = cpi->alt_fb_idx;
+ int which_arf = 0;
+#if CONFIG_EXT_REFS
+ if (cpi->oxcf.pass == 2) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ which_arf = gf_group->arf_update_idx[gf_group->index];
+ arf_idx = cpi->arf_map[which_arf];
+ }
+#else
+ if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ arf_idx = gf_group->arf_update_idx[gf_group->index];
+ }
+#endif // CONFIG_EXT_REFS
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
+ if (use_upsampled_ref)
+ uref_cnt_fb(cpi->upsampled_ref_bufs, &cpi->upsampled_ref_idx[arf_idx],
+ new_uidx);
+
+ memcpy(cpi->interp_filter_selected[ALTREF_FRAME + which_arf],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+ }
+
+ if (cpi->refresh_golden_frame) {
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
+ cm->new_fb_idx);
+ if (use_upsampled_ref)
+ uref_cnt_fb(cpi->upsampled_ref_bufs,
+ &cpi->upsampled_ref_idx[cpi->gld_fb_idx], new_uidx);
+
+ if (!cpi->rc.is_src_frame_alt_ref) {
+ memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+ } else {
+ int which_arf = 0;
+#if CONFIG_EXT_REFS
+ if (cpi->oxcf.pass == 2) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ which_arf = gf_group->arf_update_idx[gf_group->index];
+ }
+#endif // CONFIG_EXT_REFS
+ memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
+ cpi->interp_filter_selected[ALTREF_FRAME + which_arf],
+ sizeof(cpi->interp_filter_selected[ALTREF_FRAME + which_arf]));
+ }
+ }
+
+#if CONFIG_EXT_REFS
+ if (cpi->refresh_bwd_ref_frame) {
+ if (cpi->rc.is_bwd_ref_frame && cpi->num_extra_arfs) {
+ // We have swapped the virtual indices to allow bwd_ref_frame to use
+ // ALT0 as reference frame. We need to swap them back.
+ // NOTE: The ALT_REFs' are indexed reversely, and ALT0 refers to the
+ // farthest ALT_REF from the first frame in the gf group.
+ int tmp = cpi->arf_map[0];
+ cpi->arf_map[0] = cpi->alt_fb_idx;
+ cpi->alt_fb_idx = cpi->bwd_fb_idx;
+ cpi->bwd_fb_idx = tmp;
+ }
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->bwd_fb_idx],
+ cm->new_fb_idx);
+ if (use_upsampled_ref)
+ uref_cnt_fb(cpi->upsampled_ref_bufs,
+ &cpi->upsampled_ref_idx[cpi->bwd_fb_idx], new_uidx);
+
+ memcpy(cpi->interp_filter_selected[BWDREF_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+ }
+#endif // CONFIG_EXT_REFS
+ }
+
+ if (cpi->refresh_last_frame) {
+#if CONFIG_EXT_REFS
+ // NOTE(zoeliu): We have two layers of mapping (1) from the per-frame
+ // reference to the reference frame buffer virtual index; and then (2) from
+ // the virtual index to the reference frame buffer physical index:
+ //
+ // LAST_FRAME, ..., LAST3_FRAME, ..., ALTREF_FRAME
+ // | | |
+ // v v v
+ // lst_fb_idxes[0], ..., lst_fb_idxes[2], ..., alt_fb_idx
+ // | | |
+ // v v v
+ // ref_frame_map[], ..., ref_frame_map[], ..., ref_frame_map[]
+ //
+ // When refresh_last_frame is set, it is intended to retire LAST3_FRAME,
+ // have the other 2 LAST reference frames shifted as follows:
+ // LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME
+ // , and then have LAST_FRAME refreshed by the newly coded frame.
+ //
+ // To fulfill it, the decoder will be notified to execute following 2 steps:
+ //
+ // (a) To change ref_frame_map[] and have the virtual index of LAST3_FRAME
+ // to point to the newly coded frame, i.e.
+ // ref_frame_map[lst_fb_idexes[2]] => new_fb_idx;
+ //
+ // (b) To change the 1st layer mapping to have LAST_FRAME mapped to the
+ // original virtual index of LAST3_FRAME and have the other mappings
+ // shifted as follows:
+ // LAST_FRAME, LAST2_FRAME, LAST3_FRAME
+ // | | |
+ // v v v
+ // lst_fb_idxes[2], lst_fb_idxes[0], lst_fb_idxes[1]
+ int ref_frame;
+ if (cpi->rc.is_bwd_ref_frame && cpi->num_extra_arfs) {
+ // We have swapped the virtual indices to use ALT0 as BWD_REF
+ // and we need to swap them back.
+ int tmp = cpi->arf_map[0];
+ cpi->arf_map[0] = cpi->alt_fb_idx;
+ cpi->alt_fb_idx = cpi->bwd_fb_idx;
+ cpi->bwd_fb_idx = tmp;
+ }
+ if (cm->frame_type == KEY_FRAME) {
+ for (ref_frame = 0; ref_frame < LAST_REF_FRAMES; ++ref_frame) {
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->lst_fb_idxes[ref_frame]],
+ cm->new_fb_idx);
+
+ if (use_upsampled_ref)
+ uref_cnt_fb(cpi->upsampled_ref_bufs,
+ &cpi->upsampled_ref_idx[cpi->lst_fb_idxes[ref_frame]],
+ new_uidx);
+ }
+ } else {
+ int tmp;
+
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->lst_fb_idxes[LAST_REF_FRAMES - 1]],
+ cm->new_fb_idx);
+
+ if (use_upsampled_ref)
+ uref_cnt_fb(
+ cpi->upsampled_ref_bufs,
+ &cpi->upsampled_ref_idx[cpi->lst_fb_idxes[LAST_REF_FRAMES - 1]],
+ new_uidx);
+
+ tmp = cpi->lst_fb_idxes[LAST_REF_FRAMES - 1];
+
+ shift_last_ref_frames(cpi);
+
+ cpi->lst_fb_idxes[0] = tmp;
+
+ if (!cpi->rc.is_src_frame_alt_ref) {
+ if (cm->show_existing_frame) {
+ memcpy(cpi->interp_filter_selected[LAST_FRAME],
+ cpi->interp_filter_selected[BWDREF_FRAME],
+ sizeof(cpi->interp_filter_selected[BWDREF_FRAME]));
+ } else {
+ memcpy(cpi->interp_filter_selected[LAST_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+ }
+ }
+ }
+#else
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
+ cm->new_fb_idx);
+ if (use_upsampled_ref)
+ uref_cnt_fb(cpi->upsampled_ref_bufs,
+ &cpi->upsampled_ref_idx[cpi->lst_fb_idx], new_uidx);
+ if (!cpi->rc.is_src_frame_alt_ref) {
+ memcpy(cpi->interp_filter_selected[LAST_FRAME],
+ cpi->interp_filter_selected[0],
+ sizeof(cpi->interp_filter_selected[0]));
+ }
+#endif // CONFIG_EXT_REFS
+ }
+
+#if DUMP_REF_FRAME_IMAGES == 1
+ // Dump out all reference frame images.
+ dump_ref_frame_images(cpi);
+#endif // DUMP_REF_FRAME_IMAGES
+}
+
+static void loopfilter_frame(VP10_COMP *cpi, VP10_COMMON *cm) {
+ MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
+ struct loopfilter *lf = &cm->lf;
+ if (is_lossless_requested(&cpi->oxcf)) {
+ lf->filter_level = 0;
+ } else {
+ struct vpx_usec_timer timer;
+
+ vpx_clear_system_state();
+
+ vpx_usec_timer_start(&timer);
+
+#if CONFIG_LOOP_RESTORATION
+ vp10_pick_filter_restoration(cpi->Source, cpi, cpi->sf.lpf_pick);
+#else
+ vp10_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick);
+#endif // CONFIG_LOOP_RESTORATION
+
+ vpx_usec_timer_mark(&timer);
+ cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer);
+ }
+
+ if (lf->filter_level > 0) {
+#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION
+ vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
+#else
+ if (cpi->num_workers > 1)
+ vp10_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
+ lf->filter_level, 0, 0, cpi->workers,
+ cpi->num_workers, &cpi->lf_row_sync);
+ else
+ vp10_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
+#endif
+ }
+#if CONFIG_DERING
+ if (is_lossless_requested(&cpi->oxcf)) {
+ cm->dering_level = 0;
+ } else {
+ cm->dering_level =
+ vp10_dering_search(cm->frame_to_show, cpi->Source, cm, xd);
+ vp10_dering_frame(cm->frame_to_show, cm, xd, cm->dering_level);
+ }
+#endif // CONFIG_DERING
+
+#if CONFIG_CLPF
+ cm->clpf = 0;
+ if (!is_lossless_requested(&cpi->oxcf)) {
+ // Test CLPF
+ int i, hq = 1;
+ uint64_t before, after;
+ // TODO(yaowu): investigate per-segment CLPF decision and
+ // an optimal threshold, use 80 for now.
+ for (i = 0; i < MAX_SEGMENTS; i++)
+ hq &= vp10_get_qindex(&cm->seg, i, cm->base_qindex) < 80;
+
+ if (!hq) { // Don't try filter if the entire image is nearly losslessly
+ // encoded
+#if CLPF_FILTER_ALL_PLANES
+ vpx_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf);
+ before = vpx_get_y_sse(cpi->Source, cm->frame_to_show) +
+ vpx_get_u_sse(cpi->Source, cm->frame_to_show) +
+ vpx_get_v_sse(cpi->Source, cm->frame_to_show);
+ vp10_clpf_frame(cm->frame_to_show, cm, xd);
+ after = vpx_get_y_sse(cpi->Source, cm->frame_to_show) +
+ vpx_get_u_sse(cpi->Source, cm->frame_to_show) +
+ vpx_get_v_sse(cpi->Source, cm->frame_to_show);
+#else
+ vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
+ before = vpx_get_y_sse(cpi->Source, cm->frame_to_show);
+ vp10_clpf_frame(cm->frame_to_show, cm, xd);
+ after = vpx_get_y_sse(cpi->Source, cm->frame_to_show);
+#endif
+ if (before < after) {
+// No improvement, restore original
+#if CLPF_FILTER_ALL_PLANES
+ vpx_yv12_copy_frame(&cpi->last_frame_uf, cm->frame_to_show);
+#else
+ vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+#endif
+ } else {
+ cm->clpf = 1;
+ }
+ }
+ }
+#endif
+#if CONFIG_LOOP_RESTORATION
+ if (cm->rst_info.restoration_type != RESTORE_NONE) {
+ vp10_loop_restoration_init(&cm->rst_internal, &cm->rst_info,
+ cm->frame_type == KEY_FRAME, cm->width,
+ cm->height);
+ vp10_loop_restoration_rows(cm->frame_to_show, cm, 0, cm->mi_rows, 0);
+ }
+#endif // CONFIG_LOOP_RESTORATION
+
+ vpx_extend_frame_inner_borders(cm->frame_to_show);
+}
+
+static INLINE void alloc_frame_mvs(VP10_COMMON *const cm, int buffer_idx) {
+ RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx];
+ if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows ||
+ new_fb_ptr->mi_cols < cm->mi_cols) {
+ vpx_free(new_fb_ptr->mvs);
+ CHECK_MEM_ERROR(cm, new_fb_ptr->mvs,
+ (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
+ sizeof(*new_fb_ptr->mvs)));
+ new_fb_ptr->mi_rows = cm->mi_rows;
+ new_fb_ptr->mi_cols = cm->mi_cols;
+ }
+}
+
+void vp10_scale_references(VP10_COMP *cpi) {
+ VP10_COMMON *cm = &cpi->common;
+ MV_REFERENCE_FRAME ref_frame;
+ const VPX_REFFRAME ref_mask[INTER_REFS_PER_FRAME] = {
+ VPX_LAST_FLAG,
+#if CONFIG_EXT_REFS
+ VPX_LAST2_FLAG,
+ VPX_LAST3_FLAG,
+#endif // CONFIG_EXT_REFS
+ VPX_GOLD_FLAG,
+#if CONFIG_EXT_REFS
+ VPX_BWD_FLAG,
+#endif // CONFIG_EXT_REFS
+ VPX_ALT_FLAG
+ };
+
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ // Need to convert from VPX_REFFRAME to index into ref_mask (subtract 1).
+ if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) {
+ BufferPool *const pool = cm->buffer_pool;
+ const YV12_BUFFER_CONFIG *const ref =
+ get_ref_frame_buffer(cpi, ref_frame);
+
+ if (ref == NULL) {
+ cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
+ continue;
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
+ RefCntBuffer *new_fb_ptr = NULL;
+ int force_scaling = 0;
+ int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
+ if (new_fb == INVALID_IDX) {
+ new_fb = get_free_fb(cm);
+ force_scaling = 1;
+ }
+ if (new_fb == INVALID_IDX) return;
+ new_fb_ptr = &pool->frame_bufs[new_fb];
+ if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
+ new_fb_ptr->buf.y_crop_height != cm->height) {
+ if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ cm->use_highbitdepth,
+ VPX_ENC_BORDER_IN_PIXELS,
+ cm->byte_alignment, NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffer");
+ scale_and_extend_frame(ref, &new_fb_ptr->buf, MAX_MB_PLANE,
+ (int)cm->bit_depth);
+ cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
+ alloc_frame_mvs(cm, new_fb);
+ }
+#else
+ if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
+ RefCntBuffer *new_fb_ptr = NULL;
+ int force_scaling = 0;
+ int new_fb = cpi->scaled_ref_idx[ref_frame - 1];
+ if (new_fb == INVALID_IDX) {
+ new_fb = get_free_fb(cm);
+ force_scaling = 1;
+ }
+ if (new_fb == INVALID_IDX) return;
+ new_fb_ptr = &pool->frame_bufs[new_fb];
+ if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
+ new_fb_ptr->buf.y_crop_height != cm->height) {
+ if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VPX_ENC_BORDER_IN_PIXELS,
+ cm->byte_alignment, NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffer");
+ scale_and_extend_frame(ref, &new_fb_ptr->buf, MAX_MB_PLANE);
+ cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
+ alloc_frame_mvs(cm, new_fb);
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (cpi->sf.use_upsampled_references &&
+ (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width ||
+ new_fb_ptr->buf.y_crop_height != cm->height)) {
+ const int map_idx = get_ref_frame_map_idx(cpi, ref_frame);
+ EncRefCntBuffer *ubuf =
+ &cpi->upsampled_ref_bufs[cpi->upsampled_ref_idx[map_idx]];
+
+ if (vpx_realloc_frame_buffer(&ubuf->buf, (cm->width << 3),
+ (cm->height << 3), cm->subsampling_x,
+ cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ (VPX_ENC_BORDER_IN_PIXELS << 3),
+ cm->byte_alignment, NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate up-sampled frame buffer");
+#if CONFIG_VP9_HIGHBITDEPTH
+ scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, 1,
+ (int)cm->bit_depth);
+#else
+ scale_and_extend_frame(&new_fb_ptr->buf, &ubuf->buf, 1);
+#endif
+ }
+ } else {
+ const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+ RefCntBuffer *const buf = &pool->frame_bufs[buf_idx];
+ buf->buf.y_crop_width = ref->y_crop_width;
+ buf->buf.y_crop_height = ref->y_crop_height;
+ cpi->scaled_ref_idx[ref_frame - 1] = buf_idx;
+ ++buf->ref_count;
+ }
+ } else {
+ if (cpi->oxcf.pass != 0) cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
+ }
+ }
+}
+
+static void release_scaled_references(VP10_COMP *cpi) {
+ VP10_COMMON *cm = &cpi->common;
+ int i;
+ if (cpi->oxcf.pass == 0) {
+ // Only release scaled references under certain conditions:
+ // if reference will be updated, or if scaled reference has same resolution.
+ int refresh[INTER_REFS_PER_FRAME];
+ refresh[0] = (cpi->refresh_last_frame) ? 1 : 0;
+#if CONFIG_EXT_REFS
+ refresh[1] = refresh[2] = 0;
+ refresh[3] = (cpi->refresh_golden_frame) ? 1 : 0;
+ refresh[4] = (cpi->refresh_bwd_ref_frame) ? 1 : 0;
+ refresh[5] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
+#else
+ refresh[1] = (cpi->refresh_golden_frame) ? 1 : 0;
+ refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
+#endif // CONFIG_EXT_REFS
+ for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
+ const int idx = cpi->scaled_ref_idx[i - 1];
+ RefCntBuffer *const buf =
+ idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL;
+ const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
+ if (buf != NULL &&
+ (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
+ buf->buf.y_crop_height == ref->y_crop_height))) {
+ --buf->ref_count;
+ cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
+ }
+ }
+ } else {
+ for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) {
+ const int idx = cpi->scaled_ref_idx[i];
+ RefCntBuffer *const buf =
+ idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL;
+ if (buf != NULL) {
+ --buf->ref_count;
+ cpi->scaled_ref_idx[i] = INVALID_IDX;
+ }
+ }
+ }
+}
+
+static void full_to_model_count(unsigned int *model_count,
+ unsigned int *full_count) {
+ int n;
+ model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN];
+ model_count[ONE_TOKEN] = full_count[ONE_TOKEN];
+ model_count[TWO_TOKEN] = full_count[TWO_TOKEN];
+ for (n = THREE_TOKEN; n < EOB_TOKEN; ++n)
+ model_count[TWO_TOKEN] += full_count[n];
+ model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN];
+}
+
+void vp10_full_to_model_counts(vp10_coeff_count_model *model_count,
+ vp10_coeff_count *full_count) {
+ int i, j, k, l;
+
+ for (i = 0; i < PLANE_TYPES; ++i)
+ for (j = 0; j < REF_TYPES; ++j)
+ for (k = 0; k < COEF_BANDS; ++k)
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l)
+ full_to_model_count(model_count[i][j][k][l], full_count[i][j][k][l]);
+}
+
+#if 0 && CONFIG_INTERNAL_STATS
+static void output_frame_level_debug_stats(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ FILE *const f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w");
+ int64_t recon_err;
+
+ vpx_clear_system_state();
+
+ recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+
+ if (cpi->twopass.total_left_stats.coded_error != 0.0)
+ fprintf(f, "%10u %dx%d %10d %10d %d %d %10d %10d %10d %10d"
+ "%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" "
+ "%10"PRId64" %10"PRId64" %10d "
+ "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
+ "%6d %6d %5d %5d %5d "
+ "%10"PRId64" %10.3lf"
+ "%10lf %8u %10"PRId64" %10d %10d %10d\n",
+ cpi->common.current_video_frame,
+ cm->width, cm->height,
+ cpi->td.rd_counts.m_search_count,
+ cpi->td.rd_counts.ex_search_count,
+ cpi->rc.source_alt_ref_pending,
+ cpi->rc.source_alt_ref_active,
+ cpi->rc.this_frame_target,
+ cpi->rc.projected_frame_size,
+ cpi->rc.projected_frame_size / cpi->common.MBs,
+ (cpi->rc.projected_frame_size - cpi->rc.this_frame_target),
+ cpi->rc.vbr_bits_off_target,
+ cpi->rc.vbr_bits_off_target_fast,
+ cpi->twopass.extend_minq,
+ cpi->twopass.extend_minq_fast,
+ cpi->rc.total_target_vs_actual,
+ (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target),
+ cpi->rc.total_actual_bits, cm->base_qindex,
+ vp10_convert_qindex_to_q(cm->base_qindex, cm->bit_depth),
+ (double)vp10_dc_quant(cm->base_qindex, 0, cm->bit_depth) / 4.0,
+ vp10_convert_qindex_to_q(cpi->twopass.active_worst_quality,
+ cm->bit_depth),
+ cpi->rc.avg_q,
+ vp10_convert_qindex_to_q(cpi->oxcf.cq_level, cm->bit_depth),
+ cpi->refresh_last_frame, cpi->refresh_golden_frame,
+ cpi->refresh_alt_ref_frame, cm->frame_type, cpi->rc.gfu_boost,
+ cpi->twopass.bits_left,
+ cpi->twopass.total_left_stats.coded_error,
+ cpi->twopass.bits_left /
+ (1 + cpi->twopass.total_left_stats.coded_error),
+ cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
+ cpi->twopass.kf_zeromotion_pct,
+ cpi->twopass.fr_content_type);
+
+ fclose(f);
+
+ if (0) {
+ FILE *const fmodes = fopen("Modes.stt", "a");
+ int i;
+
+ fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame,
+ cm->frame_type, cpi->refresh_golden_frame,
+ cpi->refresh_alt_ref_frame);
+
+ for (i = 0; i < MAX_MODES; ++i)
+ fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);
+
+ fprintf(fmodes, "\n");
+
+ fclose(fmodes);
+ }
+}
+#endif
+
+static void set_mv_search_params(VP10_COMP *cpi) {
+ const VP10_COMMON *const cm = &cpi->common;
+ const unsigned int max_mv_def = VPXMIN(cm->width, cm->height);
+
+ // Default based on max resolution.
+ cpi->mv_step_param = vp10_init_search_range(max_mv_def);
+
+ if (cpi->sf.mv.auto_mv_step_size) {
+ if (frame_is_intra_only(cm)) {
+ // Initialize max_mv_magnitude for use in the first INTER frame
+ // after a key/intra-only frame.
+ cpi->max_mv_magnitude = max_mv_def;
+ } else {
+ if (cm->show_frame) {
+ // Allow mv_steps to correspond to twice the max mv magnitude found
+ // in the previous frame, capped by the default max_mv_magnitude based
+ // on resolution.
+ cpi->mv_step_param = vp10_init_search_range(
+ VPXMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
+ }
+ cpi->max_mv_magnitude = 0;
+ }
+ }
+}
+
+static void set_size_independent_vars(VP10_COMP *cpi) {
+ vp10_set_speed_features_framesize_independent(cpi);
+ vp10_set_rd_speed_thresholds(cpi);
+ vp10_set_rd_speed_thresholds_sub8x8(cpi);
+ cpi->common.interp_filter = cpi->sf.default_interp_filter;
+}
+
+static void set_size_dependent_vars(VP10_COMP *cpi, int *q, int *bottom_index,
+ int *top_index) {
+ VP10_COMMON *const cm = &cpi->common;
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+
+ // Setup variables that depend on the dimensions of the frame.
+ vp10_set_speed_features_framesize_dependent(cpi);
+
+ // Decide q and q bounds.
+ *q = vp10_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
+
+ if (!frame_is_intra_only(cm)) {
+ vp10_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
+ }
+
+ // Configure experimental use of segmentation for enhanced coding of
+ // static regions if indicated.
+ // Only allowed in the second pass of a two pass encode, as it requires
+ // lagged coding, and if the relevant speed feature flag is set.
+ if (oxcf->pass == 2 && cpi->sf.static_segmentation)
+ configure_static_seg_features(cpi);
+}
+
+static void init_motion_estimation(VP10_COMP *cpi) {
+ int y_stride = cpi->scaled_source.y_stride;
+
+ if (cpi->sf.mv.search_method == NSTEP) {
+ vp10_init3smotion_compensation(&cpi->ss_cfg, y_stride);
+ } else if (cpi->sf.mv.search_method == DIAMOND) {
+ vp10_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
+ }
+}
+
+static void set_frame_size(VP10_COMP *cpi) {
+ int ref_frame;
+ VP10_COMMON *const cm = &cpi->common;
+ VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+
+ if (oxcf->pass == 2 && oxcf->rc_mode == VPX_VBR &&
+ ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) ||
+ (oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending))) {
+ vp10_calculate_coded_size(cpi, &oxcf->scaled_frame_width,
+ &oxcf->scaled_frame_height);
+
+ // There has been a change in frame size.
+ vp10_set_size_literal(cpi, oxcf->scaled_frame_width,
+ oxcf->scaled_frame_height);
+ }
+
+ if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR &&
+ oxcf->resize_mode == RESIZE_DYNAMIC) {
+ if (cpi->resize_pending == 1) {
+ oxcf->scaled_frame_width =
+ (cm->width * cpi->resize_scale_num) / cpi->resize_scale_den;
+ oxcf->scaled_frame_height =
+ (cm->height * cpi->resize_scale_num) / cpi->resize_scale_den;
+ } else if (cpi->resize_pending == -1) {
+ // Go back up to original size.
+ oxcf->scaled_frame_width = oxcf->width;
+ oxcf->scaled_frame_height = oxcf->height;
+ }
+ if (cpi->resize_pending != 0) {
+ // There has been a change in frame size.
+ vp10_set_size_literal(cpi, oxcf->scaled_frame_width,
+ oxcf->scaled_frame_height);
+
+ // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
+ set_mv_search_params(cpi);
+ }
+ }
+
+ if (oxcf->pass == 2) {
+ vp10_set_target_rate(cpi);
+ }
+
+ alloc_frame_mvs(cm, cm->new_fb_idx);
+
+ // Reset the frame pointers to the current frame size.
+ if (vpx_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VPX_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
+ NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffer");
+
+ alloc_util_frame_buffers(cpi);
+ init_motion_estimation(cpi);
+
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - LAST_FRAME];
+ const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+
+ ref_buf->idx = buf_idx;
+
+ if (buf_idx != INVALID_IDX) {
+ YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
+ ref_buf->buf = buf;
+#if CONFIG_VP9_HIGHBITDEPTH
+ vp10_setup_scale_factors_for_frame(
+ &ref_buf->sf, buf->y_crop_width, buf->y_crop_height, cm->width,
+ cm->height, (buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0);
+#else
+ vp10_setup_scale_factors_for_frame(&ref_buf->sf, buf->y_crop_width,
+ buf->y_crop_height, cm->width,
+ cm->height);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ if (vp10_is_scaled(&ref_buf->sf)) vpx_extend_frame_borders(buf);
+ } else {
+ ref_buf->buf = NULL;
+ }
+ }
+
+ set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
+}
+
+static void reset_use_upsampled_references(VP10_COMP *cpi) {
+ MV_REFERENCE_FRAME ref_frame;
+
+ // reset up-sampled reference buffer structure.
+ init_upsampled_ref_frame_bufs(cpi);
+
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, ref_frame);
+ int new_uidx = upsample_ref_frame(cpi, ref);
+
+ // Update the up-sampled reference index.
+ cpi->upsampled_ref_idx[get_ref_frame_map_idx(cpi, ref_frame)] = new_uidx;
+ cpi->upsampled_ref_bufs[new_uidx].ref_count++;
+ }
+}
+
+static void encode_without_recode_loop(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ int q = 0, bottom_index = 0, top_index = 0; // Dummy variables.
+ const int use_upsampled_ref = cpi->sf.use_upsampled_references;
+
+ vpx_clear_system_state();
+
+ set_frame_size(cpi);
+
+ // For 1 pass CBR under dynamic resize mode: use faster scaling for source.
+ // Only for 2x2 scaling for now.
+ if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
+ cpi->oxcf.resize_mode == RESIZE_DYNAMIC &&
+ cpi->un_scaled_source->y_width == (cm->width << 1) &&
+ cpi->un_scaled_source->y_height == (cm->height << 1)) {
+ cpi->Source = vp10_scale_if_required_fast(cm, cpi->un_scaled_source,
+ &cpi->scaled_source);
+ if (cpi->unscaled_last_source != NULL)
+ cpi->Last_Source = vp10_scale_if_required_fast(
+ cm, cpi->unscaled_last_source, &cpi->scaled_last_source);
+ } else {
+ cpi->Source =
+ vp10_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source);
+ if (cpi->unscaled_last_source != NULL)
+ cpi->Last_Source = vp10_scale_if_required(cm, cpi->unscaled_last_source,
+ &cpi->scaled_last_source);
+ }
+
+ if (frame_is_intra_only(cm) == 0) {
+ vp10_scale_references(cpi);
+ }
+
+ set_size_independent_vars(cpi);
+ set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
+
+ // cpi->sf.use_upsampled_references can be different from frame to frame.
+ // Every time when cpi->sf.use_upsampled_references is changed from 0 to 1.
+ // The reference frames for this frame have to be up-sampled before encoding.
+ if (!use_upsampled_ref && cpi->sf.use_upsampled_references)
+ reset_use_upsampled_references(cpi);
+
+ vp10_set_quantizer(cm, q);
+ vp10_set_variance_partition_thresholds(cpi, q);
+
+ setup_frame(cpi);
+
+#if CONFIG_ENTROPY
+ cm->do_subframe_update = cm->tile_cols == 1 && cm->tile_rows == 1;
+ vp10_copy(cm->starting_coef_probs, cm->fc->coef_probs);
+ vp10_copy(cpi->subframe_stats.enc_starting_coef_probs, cm->fc->coef_probs);
+ cm->coef_probs_update_idx = 0;
+ vp10_copy(cpi->subframe_stats.coef_probs_buf[0], cm->fc->coef_probs);
+#endif // CONFIG_ENTROPY
+
+ suppress_active_map(cpi);
+ // Variance adaptive and in frame q adjustment experiments are mutually
+ // exclusive.
+ if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+ vp10_vaq_frame_setup(cpi);
+ } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
+ vp10_setup_in_frame_q_adj(cpi);
+ } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+ vp10_cyclic_refresh_setup(cpi);
+ }
+ apply_active_map(cpi);
+
+ // transform / motion compensation build reconstruction frame
+ vp10_encode_frame(cpi);
+
+ // Update some stats from cyclic refresh, and check if we should not update
+ // golden reference, for 1 pass CBR.
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->frame_type != KEY_FRAME &&
+ (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR))
+ vp10_cyclic_refresh_check_golden_update(cpi);
+
+ // Update the skip mb flag probabilities based on the distribution
+ // seen in the last encoder iteration.
+ // update_base_skip_probs(cpi);
+ vpx_clear_system_state();
+}
+
+static void encode_with_recode_loop(VP10_COMP *cpi, size_t *size,
+ uint8_t *dest) {
+ VP10_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ int bottom_index, top_index;
+ int loop_count = 0;
+ int loop_at_this_size = 0;
+ int loop = 0;
+ int overshoot_seen = 0;
+ int undershoot_seen = 0;
+ int frame_over_shoot_limit;
+ int frame_under_shoot_limit;
+ int q = 0, q_low = 0, q_high = 0;
+ const int use_upsampled_ref = cpi->sf.use_upsampled_references;
+
+ set_size_independent_vars(cpi);
+
+ // cpi->sf.use_upsampled_references can be different from frame to frame.
+ // Every time when cpi->sf.use_upsampled_references is changed from 0 to 1.
+ // The reference frames for this frame have to be up-sampled before encoding.
+ if (!use_upsampled_ref && cpi->sf.use_upsampled_references)
+ reset_use_upsampled_references(cpi);
+
+ do {
+ vpx_clear_system_state();
+
+ set_frame_size(cpi);
+
+ if (loop_count == 0 || cpi->resize_pending != 0) {
+ set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
+
+ // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
+ set_mv_search_params(cpi);
+
+ // Reset the loop state for new frame size.
+ overshoot_seen = 0;
+ undershoot_seen = 0;
+
+ // Reconfiguration for change in frame size has concluded.
+ cpi->resize_pending = 0;
+
+ q_low = bottom_index;
+ q_high = top_index;
+
+ loop_at_this_size = 0;
+ }
+
+ // Decide frame size bounds first time through.
+ if (loop_count == 0) {
+ vp10_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
+ &frame_under_shoot_limit,
+ &frame_over_shoot_limit);
+ }
+
+ cpi->Source =
+ vp10_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source);
+
+ if (cpi->unscaled_last_source != NULL)
+ cpi->Last_Source = vp10_scale_if_required(cm, cpi->unscaled_last_source,
+ &cpi->scaled_last_source);
+
+ if (frame_is_intra_only(cm) == 0) {
+ if (loop_count > 0) {
+ release_scaled_references(cpi);
+ }
+ vp10_scale_references(cpi);
+ }
+
+ vp10_set_quantizer(cm, q);
+
+ if (loop_count == 0) setup_frame(cpi);
+
+#if CONFIG_ENTROPY
+ // Base q-index may have changed, so we need to assign proper default coef
+ // probs before every iteration.
+ if (frame_is_intra_only(cm) || cm->error_resilient_mode) {
+ int i;
+ vp10_default_coef_probs(cm);
+ if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
+ cm->reset_frame_context == RESET_FRAME_CONTEXT_ALL) {
+ for (i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc;
+ } else if (cm->reset_frame_context == RESET_FRAME_CONTEXT_CURRENT) {
+ cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+ }
+ }
+#endif // CONFIG_ENTROPY
+
+#if CONFIG_ENTROPY
+ cm->do_subframe_update = cm->tile_cols == 1 && cm->tile_rows == 1;
+ if (loop_count == 0 || frame_is_intra_only(cm) ||
+ cm->error_resilient_mode) {
+ vp10_copy(cm->starting_coef_probs, cm->fc->coef_probs);
+ vp10_copy(cpi->subframe_stats.enc_starting_coef_probs,
+ cm->fc->coef_probs);
+ } else {
+ if (cm->do_subframe_update) {
+ vp10_copy(cm->fc->coef_probs,
+ cpi->subframe_stats.enc_starting_coef_probs);
+ vp10_copy(cm->starting_coef_probs,
+ cpi->subframe_stats.enc_starting_coef_probs);
+ vp10_zero(cpi->subframe_stats.coef_counts_buf);
+ vp10_zero(cpi->subframe_stats.eob_counts_buf);
+ }
+ }
+ cm->coef_probs_update_idx = 0;
+ vp10_copy(cpi->subframe_stats.coef_probs_buf[0], cm->fc->coef_probs);
+#endif // CONFIG_ENTROPY
+
+ // Variance adaptive and in frame q adjustment experiments are mutually
+ // exclusive.
+ if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+ vp10_vaq_frame_setup(cpi);
+ } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
+ vp10_setup_in_frame_q_adj(cpi);
+ }
+
+ // transform / motion compensation build reconstruction frame
+ vp10_encode_frame(cpi);
+
+ // Update the skip mb flag probabilities based on the distribution
+ // seen in the last encoder iteration.
+ // update_base_skip_probs(cpi);
+
+ vpx_clear_system_state();
+
+ // Dummy pack of the bitstream using up to date stats to get an
+ // accurate estimate of output frame size to determine if we need
+ // to recode.
+ if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
+ save_coding_context(cpi);
+
+ vp10_pack_bitstream(cpi, dest, size);
+
+ rc->projected_frame_size = (int)(*size) << 3;
+ restore_coding_context(cpi);
+
+ if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
+ }
+
+ if (cpi->oxcf.rc_mode == VPX_Q) {
+ loop = 0;
+ } else {
+ if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced &&
+ (rc->projected_frame_size < rc->max_frame_bandwidth)) {
+ int last_q = q;
+ int64_t kf_err;
+
+ int64_t high_err_target = cpi->ambient_err;
+ int64_t low_err_target = cpi->ambient_err >> 1;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ kf_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ } else {
+ kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ }
+#else
+ kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // Prevent possible divide by zero error below for perfect KF
+ kf_err += !kf_err;
+
+ // The key frame is not good enough or we can afford
+ // to make it better without undue risk of popping.
+ if ((kf_err > high_err_target &&
+ rc->projected_frame_size <= frame_over_shoot_limit) ||
+ (kf_err > low_err_target &&
+ rc->projected_frame_size <= frame_under_shoot_limit)) {
+ // Lower q_high
+ q_high = q > q_low ? q - 1 : q_low;
+
+ // Adjust Q
+ q = (int)((q * high_err_target) / kf_err);
+ q = VPXMIN(q, (q_high + q_low) >> 1);
+ } else if (kf_err < low_err_target &&
+ rc->projected_frame_size >= frame_under_shoot_limit) {
+ // The key frame is much better than the previous frame
+ // Raise q_low
+ q_low = q < q_high ? q + 1 : q_high;
+
+ // Adjust Q
+ q = (int)((q * low_err_target) / kf_err);
+ q = VPXMIN(q, (q_high + q_low + 1) >> 1);
+ }
+
+ // Clamp Q to upper and lower limits:
+ q = clamp(q, q_low, q_high);
+
+ loop = q != last_q;
+ } else if (recode_loop_test(cpi, frame_over_shoot_limit,
+ frame_under_shoot_limit, q,
+ VPXMAX(q_high, top_index), bottom_index)) {
+ // Is the projected frame size out of range and are we allowed
+ // to attempt to recode.
+ int last_q = q;
+ int retries = 0;
+
+ if (cpi->resize_pending == 1) {
+ // Change in frame size so go back around the recode loop.
+ cpi->rc.frame_size_selector =
+ SCALE_STEP1 - cpi->rc.frame_size_selector;
+ cpi->rc.next_frame_size_selector = cpi->rc.frame_size_selector;
+
+#if CONFIG_INTERNAL_STATS
+ ++cpi->tot_recode_hits;
+#endif
+ ++loop_count;
+ loop = 1;
+ continue;
+ }
+
+ // Frame size out of permitted range:
+ // Update correction factor & compute new Q to try...
+
+ // Frame is too large
+ if (rc->projected_frame_size > rc->this_frame_target) {
+ // Special case if the projected size is > the max allowed.
+ if (rc->projected_frame_size >= rc->max_frame_bandwidth)
+ q_high = rc->worst_quality;
+
+ // Raise Qlow as to at least the current value
+ q_low = q < q_high ? q + 1 : q_high;
+
+ if (undershoot_seen || loop_at_this_size > 1) {
+ // Update rate_correction_factor unless
+ vp10_rc_update_rate_correction_factors(cpi);
+
+ q = (q_high + q_low + 1) / 2;
+ } else {
+ // Update rate_correction_factor unless
+ vp10_rc_update_rate_correction_factors(cpi);
+
+ q = vp10_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
+ VPXMAX(q_high, top_index));
+
+ while (q < q_low && retries < 10) {
+ vp10_rc_update_rate_correction_factors(cpi);
+ q = vp10_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
+ VPXMAX(q_high, top_index));
+ retries++;
+ }
+ }
+
+ overshoot_seen = 1;
+ } else {
+ // Frame is too small
+ q_high = q > q_low ? q - 1 : q_low;
+
+ if (overshoot_seen || loop_at_this_size > 1) {
+ vp10_rc_update_rate_correction_factors(cpi);
+ q = (q_high + q_low) / 2;
+ } else {
+ vp10_rc_update_rate_correction_factors(cpi);
+ q = vp10_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
+ top_index);
+ // Special case reset for qlow for constrained quality.
+ // This should only trigger where there is very substantial
+ // undershoot on a frame and the auto cq level is above
+ // the user passsed in value.
+ if (cpi->oxcf.rc_mode == VPX_CQ && q < q_low) {
+ q_low = q;
+ }
+
+ while (q > q_high && retries < 10) {
+ vp10_rc_update_rate_correction_factors(cpi);
+ q = vp10_rc_regulate_q(cpi, rc->this_frame_target, bottom_index,
+ top_index);
+ retries++;
+ }
+ }
+
+ undershoot_seen = 1;
+ }
+
+ // Clamp Q to upper and lower limits:
+ q = clamp(q, q_low, q_high);
+
+ loop = (q != last_q);
+ } else {
+ loop = 0;
+ }
+ }
+
+ // Special case for overlay frame.
+ if (rc->is_src_frame_alt_ref &&
+ rc->projected_frame_size < rc->max_frame_bandwidth)
+ loop = 0;
+
+ if (loop) {
+ ++loop_count;
+ ++loop_at_this_size;
+
+#if CONFIG_INTERNAL_STATS
+ ++cpi->tot_recode_hits;
+#endif
+ }
+ } while (loop);
+}
+
+static int get_ref_frame_flags(const VP10_COMP *cpi) {
+ const int *const map = cpi->common.ref_frame_map;
+
+#if CONFIG_EXT_REFS
+ const int last2_is_last =
+ map[cpi->lst_fb_idxes[1]] == map[cpi->lst_fb_idxes[0]];
+ const int last3_is_last =
+ map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[0]];
+ const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[0]];
+ const int bwd_is_last = map[cpi->bwd_fb_idx] == map[cpi->lst_fb_idxes[0]];
+ const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[0]];
+
+ const int last3_is_last2 =
+ map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[1]];
+ const int gld_is_last2 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[1]];
+ const int bwd_is_last2 = map[cpi->bwd_fb_idx] == map[cpi->lst_fb_idxes[1]];
+
+ const int gld_is_last3 = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[2]];
+ const int bwd_is_last3 = map[cpi->bwd_fb_idx] == map[cpi->lst_fb_idxes[2]];
+
+ const int bwd_is_gld = map[cpi->bwd_fb_idx] == map[cpi->gld_fb_idx];
+
+ const int last2_is_alt = map[cpi->lst_fb_idxes[1]] == map[cpi->alt_fb_idx];
+ const int last3_is_alt = map[cpi->lst_fb_idxes[2]] == map[cpi->alt_fb_idx];
+ const int gld_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
+ const int bwd_is_alt = map[cpi->bwd_fb_idx] == map[cpi->alt_fb_idx];
+#else
+ const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
+ const int gld_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
+ const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
+#endif // CONFIG_EXT_REFS
+
+ int flags = VPX_REFFRAME_ALL;
+
+#if CONFIG_EXT_REFS
+ // Disable the use of BWDREF_FRAME for non-bipredictive frames.
+ if (!(cpi->rc.is_bipred_frame || cpi->rc.is_last_bipred_frame ||
+ (cpi->rc.is_bwd_ref_frame && cpi->num_extra_arfs)))
+ flags &= ~VPX_BWD_FLAG;
+#endif // CONFIG_EXT_REFS
+
+ if (gld_is_last || gld_is_alt) flags &= ~VPX_GOLD_FLAG;
+
+ if (cpi->rc.frames_till_gf_update_due == INT_MAX) flags &= ~VPX_GOLD_FLAG;
+
+ if (alt_is_last) flags &= ~VPX_ALT_FLAG;
+
+#if CONFIG_EXT_REFS
+ if (last2_is_last || last2_is_alt) flags &= ~VPX_LAST2_FLAG;
+
+ if (last3_is_last || last3_is_last2 || last3_is_alt) flags &= ~VPX_LAST3_FLAG;
+
+ if (gld_is_last2 || gld_is_last3) flags &= ~VPX_GOLD_FLAG;
+
+ if ((bwd_is_last || bwd_is_last2 || bwd_is_last3 || bwd_is_gld ||
+ bwd_is_alt) &&
+ (flags & VPX_BWD_FLAG))
+ flags &= ~VPX_BWD_FLAG;
+#endif // CONFIG_EXT_REFS
+
+ return flags;
+}
+
+static void set_ext_overrides(VP10_COMP *cpi) {
+ // Overrides the defaults with the externally supplied values with
+ // vp10_update_reference() and vp10_update_entropy() calls
+ // Note: The overrides are valid only for the next frame passed
+ // to encode_frame_to_data_rate() function
+ if (cpi->ext_refresh_frame_context_pending) {
+ cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context;
+ cpi->ext_refresh_frame_context_pending = 0;
+ }
+ if (cpi->ext_refresh_frame_flags_pending) {
+ cpi->refresh_last_frame = cpi->ext_refresh_last_frame;
+ cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame;
+ cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame;
+ cpi->ext_refresh_frame_flags_pending = 0;
+ }
+}
+
+YV12_BUFFER_CONFIG *vp10_scale_if_required_fast(VP10_COMMON *cm,
+ YV12_BUFFER_CONFIG *unscaled,
+ YV12_BUFFER_CONFIG *scaled) {
+ if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
+ cm->mi_rows * MI_SIZE != unscaled->y_height) {
+ // For 2x2 scaling down.
+ vpx_scale_frame(unscaled, scaled, unscaled->y_buffer, 9, 2, 1, 2, 1, 0);
+ vpx_extend_frame_borders(scaled);
+ return scaled;
+ } else {
+ return unscaled;
+ }
+}
+
+YV12_BUFFER_CONFIG *vp10_scale_if_required(VP10_COMMON *cm,
+ YV12_BUFFER_CONFIG *unscaled,
+ YV12_BUFFER_CONFIG *scaled) {
+ if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
+ cm->mi_rows * MI_SIZE != unscaled->y_height) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth);
+#else
+ scale_and_extend_frame_nonnormative(unscaled, scaled);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ return scaled;
+ } else {
+ return unscaled;
+ }
+}
+
+static void set_arf_sign_bias(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ int arf_sign_bias;
+#if CONFIG_EXT_REFS
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ // The arf_sign_bias will be one for internal ARFs'
+ arf_sign_bias = cpi->rc.source_alt_ref_active &&
+ (!cpi->refresh_alt_ref_frame ||
+ (gf_group->rf_level[gf_group->index] == GF_ARF_LOW));
+#else
+ if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ arf_sign_bias = cpi->rc.source_alt_ref_active &&
+ (!cpi->refresh_alt_ref_frame ||
+ (gf_group->rf_level[gf_group->index] == GF_ARF_LOW));
+ } else {
+ arf_sign_bias =
+ (cpi->rc.source_alt_ref_active && !cpi->refresh_alt_ref_frame);
+ }
+#endif
+ cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias;
+#if CONFIG_EXT_REFS
+ cm->ref_frame_sign_bias[BWDREF_FRAME] = cm->ref_frame_sign_bias[ALTREF_FRAME];
+#endif // CONFIG_EXT_REFS
+}
+
+static int setup_interp_filter_search_mask(VP10_COMP *cpi) {
+ INTERP_FILTER ifilter;
+ int ref_total[TOTAL_REFS_PER_FRAME] = { 0 };
+ MV_REFERENCE_FRAME ref;
+ int mask = 0;
+ int arf_idx = ALTREF_FRAME;
+
+#if CONFIG_EXT_REFS
+ // Get which arf used as ALTREF_FRAME
+ if (cpi->oxcf.pass == 2)
+ arf_idx += cpi->twopass.gf_group.arf_ref_idx[cpi->twopass.gf_group.index];
+#endif
+
+ if (cpi->common.last_frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame)
+ return mask;
+
+#if CONFIG_EXT_REFS
+ for (ref = LAST_FRAME; ref < ALTREF_FRAME; ++ref)
+ for (ifilter = EIGHTTAP_REGULAR; ifilter < SWITCHABLE_FILTERS; ++ifilter)
+ ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
+
+ for (ifilter = EIGHTTAP_REGULAR; ifilter < SWITCHABLE_FILTERS; ++ifilter)
+ ref_total[ref] += cpi->interp_filter_selected[arf_idx][ifilter];
+#else
+ for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
+ for (ifilter = EIGHTTAP_REGULAR; ifilter < SWITCHABLE_FILTERS; ++ifilter)
+ ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
+#endif
+
+ for (ifilter = EIGHTTAP_REGULAR; ifilter < SWITCHABLE_FILTERS; ++ifilter) {
+ if ((ref_total[LAST_FRAME] &&
+ cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) &&
+#if CONFIG_EXT_REFS
+ (ref_total[LAST2_FRAME] == 0 ||
+ cpi->interp_filter_selected[LAST2_FRAME][ifilter] * 50 <
+ ref_total[LAST2_FRAME]) &&
+ (ref_total[LAST3_FRAME] == 0 ||
+ cpi->interp_filter_selected[LAST3_FRAME][ifilter] * 50 <
+ ref_total[LAST3_FRAME]) &&
+#endif // CONFIG_EXT_REFS
+ (ref_total[GOLDEN_FRAME] == 0 ||
+ cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50 <
+ ref_total[GOLDEN_FRAME]) &&
+#if CONFIG_EXT_REFS
+ (ref_total[BWDREF_FRAME] == 0 ||
+ cpi->interp_filter_selected[BWDREF_FRAME][ifilter] * 50 <
+ ref_total[BWDREF_FRAME]) &&
+#endif // CONFIG_EXT_REFS
+ (ref_total[ALTREF_FRAME] == 0 ||
+ cpi->interp_filter_selected[arf_idx][ifilter] * 50 <
+ ref_total[ALTREF_FRAME]))
+ mask |= 1 << ifilter;
+ }
+ return mask;
+}
+
+#define DUMP_RECON_FRAMES 0
+
+#if DUMP_RECON_FRAMES == 1
+// NOTE(zoeliu): For debug - Output the filtered reconstructed video.
+static void dump_filtered_recon_frames(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ const YV12_BUFFER_CONFIG *recon_buf = cm->frame_to_show;
+ int h;
+ char file_name[256] = "/tmp/enc_filtered_recon.yuv";
+ FILE *f_recon = NULL;
+
+ if (recon_buf == NULL || !cm->show_frame) {
+ printf("Frame %d is not ready or no show to dump.\n",
+ cm->current_video_frame);
+ return;
+ }
+
+ if (cm->current_video_frame == 0) {
+ if ((f_recon = fopen(file_name, "wb")) == NULL) {
+ printf("Unable to open file %s to write.\n", file_name);
+ return;
+ }
+ } else {
+ if ((f_recon = fopen(file_name, "ab")) == NULL) {
+ printf("Unable to open file %s to append.\n", file_name);
+ return;
+ }
+ }
+ printf(
+ "\nFrame=%5d, encode_update_type[%5d]=%1d, show_existing_frame=%d, "
+ "y_stride=%4d, uv_stride=%4d, width=%4d, height=%4d\n",
+ cm->current_video_frame, cpi->twopass.gf_group.index,
+ cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
+ cm->show_existing_frame, recon_buf->y_stride, recon_buf->uv_stride,
+ cm->width, cm->height);
+
+ // --- Y ---
+ for (h = 0; h < cm->height; ++h) {
+ fwrite(&recon_buf->y_buffer[h * recon_buf->y_stride], 1, cm->width,
+ f_recon);
+ }
+ // --- U ---
+ for (h = 0; h < (cm->height >> 1); ++h) {
+ fwrite(&recon_buf->u_buffer[h * recon_buf->uv_stride], 1, (cm->width >> 1),
+ f_recon);
+ }
+ // --- V ---
+ for (h = 0; h < (cm->height >> 1); ++h) {
+ fwrite(&recon_buf->v_buffer[h * recon_buf->uv_stride], 1, (cm->width >> 1),
+ f_recon);
+ }
+
+ fclose(f_recon);
+}
+#endif // DUMP_RECON_FRAMES
+
+static void encode_frame_to_data_rate(VP10_COMP *cpi, size_t *size,
+ uint8_t *dest,
+ unsigned int *frame_flags) {
+ VP10_COMMON *const cm = &cpi->common;
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ struct segmentation *const seg = &cm->seg;
+ TX_SIZE t;
+ set_ext_overrides(cpi);
+ vpx_clear_system_state();
+
+ // Set the arf sign bias for this frame.
+ set_arf_sign_bias(cpi);
+
+#if CONFIG_EXT_REFS
+ // NOTE:
+ // (1) Move the setup of the ref_frame_flags upfront as it would be
+ // determined by the current frame properties;
+ // (2) The setup of the ref_frame_flags applies to both show_existing_frame's
+ // and the other cases.
+ if (cm->current_video_frame > 0)
+ cpi->ref_frame_flags = get_ref_frame_flags(cpi);
+
+ if (cm->show_existing_frame) {
+ // NOTE(zoeliu): In BIDIR_PRED, the existing frame to show is the current
+ // BWDREF_FRAME in the reference frame buffer.
+ cm->frame_type = INTER_FRAME;
+ cm->show_frame = 1;
+ cpi->frame_flags = *frame_flags;
+
+ // In the case of show_existing frame, we will not send fresh flag
+ // to decoder. Any change in the reference frame buffer can be done by
+ // switching the virtual indices.
+
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+
+ cpi->rc.is_bwd_ref_frame = 0;
+ cpi->rc.is_last_bipred_frame = 0;
+ cpi->rc.is_bipred_frame = 0;
+
+ // Build the bitstream
+ vp10_pack_bitstream(cpi, dest, size);
+
+ // Set up frame to show to get ready for stats collection.
+ cm->frame_to_show = get_frame_new_buffer(cm);
+
+#if DUMP_RECON_FRAMES == 1
+ // NOTE(zoeliu): For debug - Output the filtered reconstructed video.
+ dump_filtered_recon_frames(cpi);
+#endif // DUMP_RECON_FRAMES
+
+ // Update the LAST_FRAME in the reference frame buffer.
+ vp10_update_reference_frames(cpi);
+
+ // Update frame flags
+ cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
+ cpi->frame_flags &= ~FRAMEFLAGS_BWDREF;
+ cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
+
+ *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
+
+ // Update the frame type
+ cm->last_frame_type = cm->frame_type;
+
+#if CONFIG_EXT_REFS
+ // Since we allocate a spot for the OVERLAY frame in the gf group, we need
+ // to do post-encoding update accordingly.
+ if (cpi->rc.is_src_frame_alt_ref) {
+ vp10_set_target_rate(cpi);
+ vp10_rc_postencode_update(cpi, *size);
+ }
+#endif
+
+ cm->last_width = cm->width;
+ cm->last_height = cm->height;
+
+ ++cm->current_video_frame;
+
+ return;
+ }
+#endif // CONFIG_EXT_REFS
+
+ // Set default state for segment based loop filter update flags.
+ cm->lf.mode_ref_delta_update = 0;
+
+ if (cpi->oxcf.pass == 2 && cpi->sf.adaptive_interp_filter_search)
+ cpi->sf.interp_filter_search_mask = setup_interp_filter_search_mask(cpi);
+
+ // Set various flags etc to special state if it is a key frame.
+ if (frame_is_intra_only(cm)) {
+ // Reset the loop filter deltas and segmentation map.
+ vp10_reset_segment_features(cm);
+
+ // If segmentation is enabled force a map update for key frames.
+ if (seg->enabled) {
+ seg->update_map = 1;
+ seg->update_data = 1;
+ }
+
+ // The alternate reference frame cannot be active for a key frame.
+ cpi->rc.source_alt_ref_active = 0;
+
+ cm->error_resilient_mode = oxcf->error_resilient_mode;
+
+ // By default, encoder assumes decoder can use prev_mi.
+ if (cm->error_resilient_mode) {
+ cm->reset_frame_context = RESET_FRAME_CONTEXT_NONE;
+ cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_FORWARD;
+ } else if (cm->intra_only) {
+ // Only reset the current context.
+ cm->reset_frame_context = RESET_FRAME_CONTEXT_CURRENT;
+ }
+ }
+
+ // For 1 pass CBR, check if we are dropping this frame.
+ // Never drop on key frame.
+ if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR &&
+ cm->frame_type != KEY_FRAME) {
+ if (vp10_rc_drop_frame(cpi)) {
+ vp10_rc_postencode_update_drop_frame(cpi);
+ ++cm->current_video_frame;
+ return;
+ }
+ }
+
+ vpx_clear_system_state();
+
+#if CONFIG_INTERNAL_STATS
+ memset(cpi->mode_chosen_counts, 0,
+ MAX_MODES * sizeof(*cpi->mode_chosen_counts));
+#endif
+
+ if (cpi->sf.recode_loop == DISALLOW_RECODE) {
+ encode_without_recode_loop(cpi);
+ } else {
+ encode_with_recode_loop(cpi, size, dest);
+ }
+
+#ifdef OUTPUT_YUV_SKINMAP
+ if (cpi->common.current_video_frame > 1) {
+ vp10_compute_skin_map(cpi, yuv_skinmap_file);
+ }
+#endif // OUTPUT_YUV_SKINMAP
+
+ // Special case code to reduce pulsing when key frames are forced at a
+ // fixed interval. Note the reconstruction error if it is the frame before
+ // the force key frame
+ if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ cpi->ambient_err =
+ vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ } else {
+ cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+ }
+#else
+ cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+
+ // If the encoder forced a KEY_FRAME decision
+ if (cm->frame_type == KEY_FRAME) {
+ cpi->refresh_last_frame = 1;
+ }
+
+ cm->frame_to_show = get_frame_new_buffer(cm);
+ cm->frame_to_show->color_space = cm->color_space;
+ cm->frame_to_show->color_range = cm->color_range;
+ cm->frame_to_show->render_width = cm->render_width;
+ cm->frame_to_show->render_height = cm->render_height;
+
+#if CONFIG_EXT_REFS
+// TODO(zoeliu): For non-ref frames, loop filtering may need to be turned
+// off.
+#endif // CONFIG_EXT_REFS
+
+ // Pick the loop filter level for the frame.
+ loopfilter_frame(cpi, cm);
+
+ // Build the bitstream
+ vp10_pack_bitstream(cpi, dest, size);
+
+#if DUMP_RECON_FRAMES == 1
+ // NOTE(zoeliu): For debug - Output the filtered reconstructed video.
+ if (cm->show_frame) dump_filtered_recon_frames(cpi);
+#endif // DUMP_RECON_FRAMES
+
+ if (cm->seg.update_map) update_reference_segmentation_map(cpi);
+
+ if (frame_is_intra_only(cm) == 0) {
+ release_scaled_references(cpi);
+ }
+
+ vp10_update_reference_frames(cpi);
+
+ for (t = TX_4X4; t <= TX_32X32; t++)
+ vp10_full_to_model_counts(cpi->td.counts->coef[t],
+ cpi->td.rd_counts.coef_counts[t]);
+
+ if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+#if CONFIG_ENTROPY
+ cm->partial_prob_update = 0;
+#endif // CONFIG_ENTROPY
+ vp10_adapt_coef_probs(cm);
+ vp10_adapt_intra_frame_probs(cm);
+ }
+
+ if (!frame_is_intra_only(cm)) {
+ if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
+ vp10_adapt_inter_frame_probs(cm);
+ vp10_adapt_mv_probs(cm, cm->allow_high_precision_mv);
+ }
+ }
+
+ if (cpi->refresh_golden_frame == 1)
+ cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
+ else
+ cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
+
+ if (cpi->refresh_alt_ref_frame == 1)
+ cpi->frame_flags |= FRAMEFLAGS_ALTREF;
+ else
+ cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
+
+#if CONFIG_EXT_REFS
+ if (cpi->refresh_bwd_ref_frame == 1)
+ cpi->frame_flags |= FRAMEFLAGS_BWDREF;
+ else
+ cpi->frame_flags &= ~FRAMEFLAGS_BWDREF;
+#endif // CONFIG_EXT_REFS
+
+#if !CONFIG_EXT_REFS
+ cpi->ref_frame_flags = get_ref_frame_flags(cpi);
+#endif // !CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+ cm->last3_frame_type = cm->last2_frame_type;
+ cm->last2_frame_type = cm->last_frame_type;
+#endif // CONFIG_EXT_REFS
+ cm->last_frame_type = cm->frame_type;
+
+ vp10_rc_postencode_update(cpi, *size);
+
+#if 0
+ output_frame_level_debug_stats(cpi);
+#endif
+
+ if (cm->frame_type == KEY_FRAME) {
+ // Tell the caller that the frame was coded as a key frame
+ *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
+ } else {
+ *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
+ }
+
+ // Clear the one shot update flags for segmentation map and mode/ref loop
+ // filter deltas.
+ cm->seg.update_map = 0;
+ cm->seg.update_data = 0;
+ cm->lf.mode_ref_delta_update = 0;
+
+ // keep track of the last coded dimensions
+ cm->last_width = cm->width;
+ cm->last_height = cm->height;
+
+ // reset to normal state now that we are done.
+ if (!cm->show_existing_frame) cm->last_show_frame = cm->show_frame;
+
+ if (cm->show_frame) {
+#if CONFIG_EXT_REFS
+// TODO(zoeliu): We may only swamp mi and prev_mi for those frames that are
+// being used as reference.
+#endif // CONFIG_EXT_REFS
+ vp10_swap_mi_and_prev_mi(cm);
+ // Don't increment frame counters if this was an altref buffer
+ // update not a real frame
+ ++cm->current_video_frame;
+ }
+
+#if CONFIG_EXT_REFS
+ // NOTE: Shall not refer to any frame not used as reference.
+ if (cm->is_reference_frame)
+#endif // CONFIG_EXT_REFS
+ cm->prev_frame = cm->cur_frame;
+}
+
+static void Pass0Encode(VP10_COMP *cpi, size_t *size, uint8_t *dest,
+ unsigned int *frame_flags) {
+ if (cpi->oxcf.rc_mode == VPX_CBR) {
+ vp10_rc_get_one_pass_cbr_params(cpi);
+ } else {
+ vp10_rc_get_one_pass_vbr_params(cpi);
+ }
+ encode_frame_to_data_rate(cpi, size, dest, frame_flags);
+}
+
+static void Pass2Encode(VP10_COMP *cpi, size_t *size, uint8_t *dest,
+ unsigned int *frame_flags) {
+ cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
+
+ encode_frame_to_data_rate(cpi, size, dest, frame_flags);
+
+#if CONFIG_EXT_REFS
+ // Do not do post-encoding update for those frames that do not have a spot in
+ // a gf group, but note that an OVERLAY frame always has a spot in a gf group,
+ // even when show_existing_frame is used.
+ if (!cpi->common.show_existing_frame || cpi->rc.is_src_frame_alt_ref) {
+ vp10_twopass_postencode_update(cpi);
+ }
+ check_show_existing_frame(cpi);
+#else
+ vp10_twopass_postencode_update(cpi);
+#endif // CONFIG_EXT_REFS
+}
+
+static void init_ref_frame_bufs(VP10_COMMON *cm) {
+ int i;
+ BufferPool *const pool = cm->buffer_pool;
+ cm->new_fb_idx = INVALID_IDX;
+ for (i = 0; i < REF_FRAMES; ++i) {
+ cm->ref_frame_map[i] = INVALID_IDX;
+ pool->frame_bufs[i].ref_count = 0;
+ }
+}
+
+static void check_initial_width(VP10_COMP *cpi,
+#if CONFIG_VP9_HIGHBITDEPTH
+ int use_highbitdepth,
+#endif
+ int subsampling_x, int subsampling_y) {
+ VP10_COMMON *const cm = &cpi->common;
+
+ if (!cpi->initial_width ||
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth != use_highbitdepth ||
+#endif
+ cm->subsampling_x != subsampling_x ||
+ cm->subsampling_y != subsampling_y) {
+ cm->subsampling_x = subsampling_x;
+ cm->subsampling_y = subsampling_y;
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth = use_highbitdepth;
+#endif
+
+ alloc_raw_frame_buffers(cpi);
+ init_ref_frame_bufs(cm);
+ alloc_util_frame_buffers(cpi);
+
+ init_motion_estimation(cpi); // TODO(agrange) This can be removed.
+
+ cpi->initial_width = cm->width;
+ cpi->initial_height = cm->height;
+ cpi->initial_mbs = cm->MBs;
+ }
+}
+
+int vp10_receive_raw_frame(VP10_COMP *cpi, unsigned int frame_flags,
+ YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
+ int64_t end_time) {
+ VP10_COMMON *const cm = &cpi->common;
+ struct vpx_usec_timer timer;
+ int res = 0;
+ const int subsampling_x = sd->subsampling_x;
+ const int subsampling_y = sd->subsampling_y;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
+#endif
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
+#else
+ check_initial_width(cpi, subsampling_x, subsampling_y);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ vpx_usec_timer_start(&timer);
+
+ if (vp10_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
+#if CONFIG_VP9_HIGHBITDEPTH
+ use_highbitdepth,
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ frame_flags))
+ res = -1;
+ vpx_usec_timer_mark(&timer);
+ cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
+
+ if ((cm->profile == PROFILE_0 || cm->profile == PROFILE_2) &&
+ (subsampling_x != 1 || subsampling_y != 1)) {
+ vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
+ "Non-4:2:0 color format requires profile 1 or 3");
+ res = -1;
+ }
+ if ((cm->profile == PROFILE_1 || cm->profile == PROFILE_3) &&
+ (subsampling_x == 1 && subsampling_y == 1)) {
+ vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM,
+ "4:2:0 color format requires profile 0 or 2");
+ res = -1;
+ }
+
+ return res;
+}
+
+static int frame_is_reference(const VP10_COMP *cpi) {
+ const VP10_COMMON *cm = &cpi->common;
+
+ return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
+ cpi->refresh_golden_frame ||
+#if CONFIG_EXT_REFS
+ cpi->refresh_bwd_ref_frame ||
+#endif // CONFIG_EXT_REFS
+ cpi->refresh_alt_ref_frame || !cm->error_resilient_mode ||
+ cm->lf.mode_ref_delta_update || cm->seg.update_map ||
+ cm->seg.update_data;
+}
+
+static void adjust_frame_rate(VP10_COMP *cpi,
+ const struct lookahead_entry *source) {
+ int64_t this_duration;
+ int step = 0;
+
+ if (source->ts_start == cpi->first_time_stamp_ever) {
+ this_duration = source->ts_end - source->ts_start;
+ step = 1;
+ } else {
+ int64_t last_duration =
+ cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;
+
+ this_duration = source->ts_end - cpi->last_end_time_stamp_seen;
+
+ // do a step update if the duration changes by 10%
+ if (last_duration)
+ step = (int)((this_duration - last_duration) * 10 / last_duration);
+ }
+
+ if (this_duration) {
+ if (step) {
+ vp10_new_framerate(cpi, 10000000.0 / this_duration);
+ } else {
+ // Average this frame's rate into the last second's average
+ // frame rate. If we haven't seen 1 second yet, then average
+ // over the whole interval seen.
+ const double interval = VPXMIN(
+ (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
+ double avg_duration = 10000000.0 / cpi->framerate;
+ avg_duration *= (interval - avg_duration + this_duration);
+ avg_duration /= interval;
+
+ vp10_new_framerate(cpi, 10000000.0 / avg_duration);
+ }
+ }
+ cpi->last_time_stamp_seen = source->ts_start;
+ cpi->last_end_time_stamp_seen = source->ts_end;
+}
+
+// Returns 0 if this is not an alt ref else the offset of the source frame
+// used as the arf midpoint.
+static int get_arf_src_index(VP10_COMP *cpi) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ int arf_src_index = 0;
+ if (is_altref_enabled(cpi)) {
+ if (cpi->oxcf.pass == 2) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
+ arf_src_index = gf_group->arf_src_offset[gf_group->index];
+ }
+ } else if (rc->source_alt_ref_pending) {
+ arf_src_index = rc->frames_till_gf_update_due;
+ }
+ }
+ return arf_src_index;
+}
+
+#if CONFIG_EXT_REFS
+static int get_brf_src_index(VP10_COMP *cpi) {
+ int brf_src_index = 0;
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+
+ // TODO(zoeliu): We need to add the check on the -bwd_ref command line setup
+ // flag.
+ if (gf_group->bidir_pred_enabled[gf_group->index]) {
+ if (cpi->oxcf.pass == 2) {
+ if (gf_group->update_type[gf_group->index] == BRF_UPDATE)
+ brf_src_index = gf_group->brf_src_offset[gf_group->index];
+ } else {
+ // TODO(zoeliu): To re-visit the setup for this scenario
+ brf_src_index = cpi->rc.bipred_group_interval - 1;
+ }
+ }
+
+ return brf_src_index;
+}
+#endif // CONFIG_EXT_REFS
+
+static void check_src_altref(VP10_COMP *cpi,
+ const struct lookahead_entry *source) {
+ RATE_CONTROL *const rc = &cpi->rc;
+
+ // If pass == 2, the parameters set here will be reset in
+ // vp10_rc_get_second_pass_params()
+
+ if (cpi->oxcf.pass == 2) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ rc->is_src_frame_alt_ref =
+#if CONFIG_EXT_REFS
+ (gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE) ||
+#endif
+ (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
+ } else {
+ rc->is_src_frame_alt_ref =
+ cpi->alt_ref_source && (source == cpi->alt_ref_source);
+ }
+
+ if (rc->is_src_frame_alt_ref) {
+ // Current frame is an ARF overlay frame.
+ cpi->alt_ref_source = NULL;
+
+ // Don't refresh the last buffer for an ARF overlay frame. It will
+ // become the GF so preserve last as an alternative prediction option.
+ cpi->refresh_last_frame = 0;
+ }
+}
+
+#if CONFIG_INTERNAL_STATS
+extern double vp10_get_blockiness(const unsigned char *img1, int img1_pitch,
+ const unsigned char *img2, int img2_pitch,
+ int width, int height);
+
+static void adjust_image_stat(double y, double u, double v, double all,
+ ImageStat *s) {
+ s->stat[Y] += y;
+ s->stat[U] += u;
+ s->stat[V] += v;
+ s->stat[ALL] += all;
+ s->worst = VPXMIN(s->worst, all);
+}
+
+static void compute_internal_stats(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ double samples = 0.0;
+ uint32_t in_bit_depth = 8;
+ uint32_t bit_depth = 8;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ in_bit_depth = cpi->oxcf.input_bit_depth;
+ bit_depth = cm->bit_depth;
+ }
+#endif
+ if (cm->show_frame) {
+ const YV12_BUFFER_CONFIG *orig = cpi->Source;
+ const YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show;
+ double y, u, v, frame_all;
+
+ cpi->count++;
+ if (cpi->b_calculate_psnr) {
+ PSNR_STATS psnr;
+ double frame_ssim2 = 0.0, weight = 0.0;
+ vpx_clear_system_state();
+// TODO(yaowu): unify these two versions into one.
+#if CONFIG_VP9_HIGHBITDEPTH
+ vpx_calc_highbd_psnr(orig, recon, &psnr, bit_depth, in_bit_depth);
+#else
+ vpx_calc_psnr(orig, recon, &psnr);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3], psnr.psnr[0],
+ &cpi->psnr);
+ cpi->total_sq_error += psnr.sse[0];
+ cpi->total_samples += psnr.samples[0];
+ samples = psnr.samples[0];
+// TODO(yaowu): unify these two versions into one.
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth)
+ frame_ssim2 =
+ vpx_highbd_calc_ssim(orig, recon, &weight, bit_depth, in_bit_depth);
+ else
+ frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
+#else
+ frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ cpi->worst_ssim = VPXMIN(cpi->worst_ssim, frame_ssim2);
+ cpi->summed_quality += frame_ssim2 * weight;
+ cpi->summed_weights += weight;
+
+#if 0
+ {
+ FILE *f = fopen("q_used.stt", "a");
+ fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
+ cpi->common.current_video_frame, y2, u2, v2,
+ frame_psnr2, frame_ssim2);
+ fclose(f);
+ }
+#endif
+ }
+ if (cpi->b_calculate_blockiness) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!cm->use_highbitdepth)
+#endif
+ {
+ const double frame_blockiness =
+ vp10_get_blockiness(orig->y_buffer, orig->y_stride, recon->y_buffer,
+ recon->y_stride, orig->y_width, orig->y_height);
+ cpi->worst_blockiness = VPXMAX(cpi->worst_blockiness, frame_blockiness);
+ cpi->total_blockiness += frame_blockiness;
+ }
+
+ if (cpi->b_calculate_consistency) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!cm->use_highbitdepth)
+#endif
+ {
+ const double this_inconsistency = vpx_get_ssim_metrics(
+ orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride,
+ orig->y_width, orig->y_height, cpi->ssim_vars, &cpi->metrics, 1);
+
+ const double peak = (double)((1 << in_bit_depth) - 1);
+ const double consistency =
+ vpx_sse_to_psnr(samples, peak, cpi->total_inconsistency);
+ if (consistency > 0.0)
+ cpi->worst_consistency =
+ VPXMIN(cpi->worst_consistency, consistency);
+ cpi->total_inconsistency += this_inconsistency;
+ }
+ }
+ }
+
+ frame_all =
+ vpx_calc_fastssim(orig, recon, &y, &u, &v, bit_depth, in_bit_depth);
+ adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
+ frame_all = vpx_psnrhvs(orig, recon, &y, &u, &v, bit_depth, in_bit_depth);
+ adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
+ }
+}
+#endif // CONFIG_INTERNAL_STATS
+
+int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
+ size_t *size, uint8_t *dest, int64_t *time_stamp,
+ int64_t *time_end, int flush) {
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ VP10_COMMON *const cm = &cpi->common;
+ BufferPool *const pool = cm->buffer_pool;
+ RATE_CONTROL *const rc = &cpi->rc;
+ struct vpx_usec_timer cmptimer;
+ YV12_BUFFER_CONFIG *force_src_buffer = NULL;
+ struct lookahead_entry *last_source = NULL;
+ struct lookahead_entry *source = NULL;
+ int arf_src_index;
+#if CONFIG_EXT_REFS
+ int brf_src_index;
+#endif // CONFIG_EXT_REFS
+ int i;
+
+#if CONFIG_BITSTREAM_DEBUG
+ assert(cpi->oxcf.max_threads == 0 &&
+ "bitstream debug tool does not support multithreading");
+ bitstream_queue_record_write();
+#endif
+
+ vpx_usec_timer_start(&cmptimer);
+
+ vp10_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
+
+ // Is multi-arf enabled.
+ // Note that at the moment multi_arf is only configured for 2 pass VBR
+ if ((oxcf->pass == 2) && (cpi->oxcf.enable_auto_arf > 1))
+ cpi->multi_arf_allowed = 1;
+ else
+ cpi->multi_arf_allowed = 0;
+
+ // Normal defaults
+ cm->reset_frame_context = RESET_FRAME_CONTEXT_NONE;
+ cm->refresh_frame_context =
+ (oxcf->error_resilient_mode || oxcf->frame_parallel_decoding_mode)
+ ? REFRESH_FRAME_CONTEXT_FORWARD
+ : REFRESH_FRAME_CONTEXT_BACKWARD;
+
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+#if CONFIG_EXT_REFS
+ cpi->refresh_bwd_ref_frame = 0;
+#endif // CONFIG_EXT_REFS
+ cpi->refresh_alt_ref_frame = 0;
+
+#if CONFIG_EXT_REFS
+ if (oxcf->pass == 2 && cm->show_existing_frame) {
+ // Manage the source buffer and flush out the source frame that has been
+ // coded already; Also get prepared for PSNR calculation if needed.
+ if ((source = vp10_lookahead_pop(cpi->lookahead, flush)) == NULL) {
+ *size = 0;
+ return -1;
+ }
+ cpi->Source = &source->img;
+ // TODO(zoeliu): To track down to determine whether it's needed to adjust
+ // the frame rate.
+ *time_stamp = source->ts_start;
+ *time_end = source->ts_end;
+
+ // We need to adjust frame rate for an overlay frame
+ if (cpi->rc.is_src_frame_alt_ref) {
+ adjust_frame_rate(cpi, source);
+ }
+
+ // Find a free buffer for the new frame, releasing the reference previously
+ // held.
+ if (cm->new_fb_idx != INVALID_IDX) {
+ --pool->frame_bufs[cm->new_fb_idx].ref_count;
+ }
+ cm->new_fb_idx = get_free_fb(cm);
+
+ if (cm->new_fb_idx == INVALID_IDX) return -1;
+
+ // Clear down mmx registers
+ vpx_clear_system_state();
+
+ // Start with a 0 size frame.
+ *size = 0;
+
+ // We need to update the gf_group for show_existing overlay frame
+ if (cpi->rc.is_src_frame_alt_ref) {
+ vp10_rc_get_second_pass_params(cpi);
+ }
+
+ Pass2Encode(cpi, size, dest, frame_flags);
+
+ if (cpi->b_calculate_psnr) generate_psnr_packet(cpi);
+
+#if CONFIG_INTERNAL_STATS
+ compute_internal_stats(cpi);
+ cpi->bytes += (int)(*size);
+#endif // CONFIG_INTERNAL_STATS
+
+ // Clear down mmx registers
+ vpx_clear_system_state();
+
+ cm->show_existing_frame = 0;
+ return 0;
+ }
+#endif // CONFIG_EXT_REFS
+
+ // Should we encode an arf frame.
+ arf_src_index = get_arf_src_index(cpi);
+ if (arf_src_index) {
+ for (i = 0; i <= arf_src_index; ++i) {
+ struct lookahead_entry *e = vp10_lookahead_peek(cpi->lookahead, i);
+ // Avoid creating an alt-ref if there's a forced keyframe pending.
+ if (e == NULL) {
+ break;
+ } else if (e->flags == VPX_EFLAG_FORCE_KF) {
+ arf_src_index = 0;
+ flush = 1;
+ break;
+ }
+ }
+ }
+
+ if (arf_src_index) {
+ assert(arf_src_index <= rc->frames_to_key);
+
+ if ((source = vp10_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
+ cpi->alt_ref_source = source;
+
+ if (oxcf->arnr_max_frames > 0) {
+ // Produce the filtered ARF frame.
+ vp10_temporal_filter(cpi, arf_src_index);
+ vpx_extend_frame_borders(&cpi->alt_ref_buffer);
+ force_src_buffer = &cpi->alt_ref_buffer;
+ }
+
+ cm->show_frame = 0;
+ cm->intra_only = 0;
+ cpi->refresh_alt_ref_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_last_frame = 0;
+ rc->is_src_frame_alt_ref = 0;
+ }
+ rc->source_alt_ref_pending = 0;
+ }
+
+#if CONFIG_EXT_REFS
+ rc->is_bwd_ref_frame = 0;
+ brf_src_index = get_brf_src_index(cpi);
+ if (brf_src_index) {
+ assert(brf_src_index <= rc->frames_to_key);
+ if ((source = vp10_lookahead_peek(cpi->lookahead, brf_src_index)) != NULL) {
+ cm->show_frame = 0;
+ cm->intra_only = 0;
+
+ cpi->refresh_bwd_ref_frame = 1;
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+
+ rc->is_bwd_ref_frame = 1;
+ }
+ }
+#endif // CONFIG_EXT_REFS
+
+ if (!source) {
+ // Get last frame source.
+ if (cm->current_video_frame > 0) {
+ if ((last_source = vp10_lookahead_peek(cpi->lookahead, -1)) == NULL)
+ return -1;
+ }
+
+ // Read in the source frame.
+ source = vp10_lookahead_pop(cpi->lookahead, flush);
+
+ if (source != NULL) {
+ cm->show_frame = 1;
+ cm->intra_only = 0;
+
+ // Check to see if the frame should be encoded as an arf overlay.
+ check_src_altref(cpi, source);
+ }
+ }
+
+ if (source) {
+ cpi->un_scaled_source = cpi->Source =
+ force_src_buffer ? force_src_buffer : &source->img;
+
+ cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
+
+ *time_stamp = source->ts_start;
+ *time_end = source->ts_end;
+ *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
+
+ } else {
+ *size = 0;
+ if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) {
+ vp10_end_first_pass(cpi); /* get last stats packet */
+ cpi->twopass.first_pass_done = 1;
+ }
+ return -1;
+ }
+
+ if (source->ts_start < cpi->first_time_stamp_ever) {
+ cpi->first_time_stamp_ever = source->ts_start;
+ cpi->last_end_time_stamp_seen = source->ts_start;
+ }
+
+ // Clear down mmx registers
+ vpx_clear_system_state();
+
+ // adjust frame rates based on timestamps given
+ if (cm->show_frame) adjust_frame_rate(cpi, source);
+
+ // Find a free buffer for the new frame, releasing the reference previously
+ // held.
+ if (cm->new_fb_idx != INVALID_IDX) {
+ --pool->frame_bufs[cm->new_fb_idx].ref_count;
+ }
+ cm->new_fb_idx = get_free_fb(cm);
+
+ if (cm->new_fb_idx == INVALID_IDX) return -1;
+
+ cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
+
+#if CONFIG_EXT_REFS
+ if (oxcf->pass == 2) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ cpi->alt_fb_idx = cpi->arf_map[gf_group->arf_ref_idx[gf_group->index]];
+ }
+#else
+ if (cpi->multi_arf_allowed) {
+ if (cm->frame_type == KEY_FRAME) {
+ init_buffer_indices(cpi);
+ } else if (oxcf->pass == 2) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ cpi->alt_fb_idx = gf_group->arf_ref_idx[gf_group->index];
+ }
+ }
+#endif
+ // Start with a 0 size frame.
+ *size = 0;
+
+ cpi->frame_flags = *frame_flags;
+
+ if (oxcf->pass == 2) {
+ vp10_rc_get_second_pass_params(cpi);
+ } else if (oxcf->pass == 1) {
+ set_frame_size(cpi);
+ }
+
+ if (cpi->oxcf.pass != 0 || frame_is_intra_only(cm) == 1) {
+ for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i)
+ cpi->scaled_ref_idx[i] = INVALID_IDX;
+ }
+
+#if CONFIG_AOM_QM
+ cm->using_qmatrix = cpi->oxcf.using_qm;
+ cm->min_qmlevel = cpi->oxcf.qm_minlevel;
+ cm->max_qmlevel = cpi->oxcf.qm_maxlevel;
+#endif
+
+ if (oxcf->pass == 1) {
+ cpi->td.mb.e_mbd.lossless[0] = is_lossless_requested(oxcf);
+ vp10_first_pass(cpi, source);
+ } else if (oxcf->pass == 2) {
+ Pass2Encode(cpi, size, dest, frame_flags);
+ } else {
+ // One pass encode
+ Pass0Encode(cpi, size, dest, frame_flags);
+ }
+
+ if (!cm->error_resilient_mode)
+ cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
+
+ // No frame encoded, or frame was dropped, release scaled references.
+ if ((*size == 0) && (frame_is_intra_only(cm) == 0)) {
+ release_scaled_references(cpi);
+ }
+
+ if (*size > 0) {
+ cpi->droppable = !frame_is_reference(cpi);
+ }
+
+ vpx_usec_timer_mark(&cmptimer);
+ cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
+
+ if (cpi->b_calculate_psnr && oxcf->pass != 1 && cm->show_frame)
+ generate_psnr_packet(cpi);
+
+#if CONFIG_INTERNAL_STATS
+ if (oxcf->pass != 1) {
+ compute_internal_stats(cpi);
+ cpi->bytes += (int)(*size);
+ }
+#endif // CONFIG_INTERNAL_STATS
+
+ vpx_clear_system_state();
+
+ return 0;
+}
+
+int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest) {
+ VP10_COMMON *cm = &cpi->common;
+ if (!cm->show_frame) {
+ return -1;
+ } else {
+ int ret;
+ if (cm->frame_to_show) {
+ *dest = *cm->frame_to_show;
+ dest->y_width = cm->width;
+ dest->y_height = cm->height;
+ dest->uv_width = cm->width >> cm->subsampling_x;
+ dest->uv_height = cm->height >> cm->subsampling_y;
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ vpx_clear_system_state();
+ return ret;
+ }
+}
+
+int vp10_get_last_show_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *frame) {
+ if (cpi->last_show_frame_buf_idx == INVALID_IDX) return -1;
+
+ *frame =
+ cpi->common.buffer_pool->frame_bufs[cpi->last_show_frame_buf_idx].buf;
+ return 0;
+}
+
+int vp10_set_internal_size(VP10_COMP *cpi, VPX_SCALING horiz_mode,
+ VPX_SCALING vert_mode) {
+ VP10_COMMON *cm = &cpi->common;
+ int hr = 0, hs = 0, vr = 0, vs = 0;
+
+ if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1;
+
+ Scale2Ratio(horiz_mode, &hr, &hs);
+ Scale2Ratio(vert_mode, &vr, &vs);
+
+ // always go to the next whole number
+ cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs;
+ cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs;
+ assert(cm->width <= cpi->initial_width);
+ assert(cm->height <= cpi->initial_height);
+
+ update_frame_size(cpi);
+
+ return 0;
+}
+
+int vp10_set_size_literal(VP10_COMP *cpi, unsigned int width,
+ unsigned int height) {
+ VP10_COMMON *cm = &cpi->common;
+#if CONFIG_VP9_HIGHBITDEPTH
+ check_initial_width(cpi, cm->use_highbitdepth, 1, 1);
+#else
+ check_initial_width(cpi, 1, 1);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (width) {
+ cm->width = width;
+ if (cm->width > cpi->initial_width) {
+ cm->width = cpi->initial_width;
+ printf("Warning: Desired width too large, changed to %d\n", cm->width);
+ }
+ }
+
+ if (height) {
+ cm->height = height;
+ if (cm->height > cpi->initial_height) {
+ cm->height = cpi->initial_height;
+ printf("Warning: Desired height too large, changed to %d\n", cm->height);
+ }
+ }
+ assert(cm->width <= cpi->initial_width);
+ assert(cm->height <= cpi->initial_height);
+
+ update_frame_size(cpi);
+
+ return 0;
+}
+
+int vp10_get_quantizer(VP10_COMP *cpi) { return cpi->common.base_qindex; }
+
+void vp10_apply_encoding_flags(VP10_COMP *cpi, vpx_enc_frame_flags_t flags) {
+ if (flags &
+ (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF)) {
+ int ref = VPX_REFFRAME_ALL;
+
+ if (flags & VP8_EFLAG_NO_REF_LAST) {
+ ref ^= VPX_LAST_FLAG;
+#if CONFIG_EXT_REFS
+ ref ^= VPX_LAST2_FLAG;
+ ref ^= VPX_LAST3_FLAG;
+#endif // CONFIG_EXT_REFS
+ }
+
+ if (flags & VP8_EFLAG_NO_REF_GF) ref ^= VPX_GOLD_FLAG;
+
+ if (flags & VP8_EFLAG_NO_REF_ARF) ref ^= VPX_ALT_FLAG;
+
+ vp10_use_as_reference(cpi, ref);
+ }
+
+ if (flags &
+ (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_FORCE_GF | VP8_EFLAG_FORCE_ARF)) {
+ int upd = VPX_REFFRAME_ALL;
+
+ if (flags & VP8_EFLAG_NO_UPD_LAST) {
+ upd ^= VPX_LAST_FLAG;
+#if CONFIG_EXT_REFS
+ upd ^= VPX_LAST2_FLAG;
+ upd ^= VPX_LAST3_FLAG;
+#endif // CONFIG_EXT_REFS
+ }
+
+ if (flags & VP8_EFLAG_NO_UPD_GF) upd ^= VPX_GOLD_FLAG;
+
+ if (flags & VP8_EFLAG_NO_UPD_ARF) upd ^= VPX_ALT_FLAG;
+
+ vp10_update_reference(cpi, upd);
+ }
+
+ if (flags & VP8_EFLAG_NO_UPD_ENTROPY) {
+ vp10_update_entropy(cpi, 0);
+ }
+}
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
new file mode 100644
index 0000000..719615b
--- /dev/null
+++ b/av1/encoder/encoder.h
@@ -0,0 +1,833 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_ENCODER_H_
+#define VP10_ENCODER_ENCODER_H_
+
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#include "aom/vp8cx.h"
+
+#include "av1/common/alloccommon.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/thread_common.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/encoder/aq_cyclicrefresh.h"
+#if CONFIG_ANS
+#include "av1/encoder/buf_ans.h"
+#endif
+#include "av1/encoder/context_tree.h"
+#include "av1/encoder/encodemb.h"
+#include "av1/encoder/firstpass.h"
+#include "av1/encoder/lookahead.h"
+#include "av1/encoder/mbgraph.h"
+#include "av1/encoder/mcomp.h"
+#include "av1/encoder/quantize.h"
+#include "av1/encoder/ratectrl.h"
+#include "av1/encoder/rd.h"
+#include "av1/encoder/speed_features.h"
+#include "av1/encoder/tokenize.h"
+#include "av1/encoder/variance_tree.h"
+
+#if CONFIG_INTERNAL_STATS
+#include "aom_dsp/ssim.h"
+#endif
+#include "aom_dsp/variance.h"
+#include "aom/internal/vpx_codec_internal.h"
+#include "aom_util/vpx_thread.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ int nmvjointcost[MV_JOINTS];
+ int nmvcosts[2][MV_VALS];
+ int nmvcosts_hp[2][MV_VALS];
+
+#if CONFIG_REF_MV
+ int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
+ int nmv_costs[NMV_CONTEXTS][2][MV_VALS];
+ int nmv_costs_hp[NMV_CONTEXTS][2][MV_VALS];
+#endif
+
+ unsigned char *last_frame_seg_map_copy;
+
+ // 0 = Intra, Last, GF, ARF
+ signed char last_ref_lf_deltas[TOTAL_REFS_PER_FRAME];
+ // 0 = ZERO_MV, MV
+ signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
+
+ FRAME_CONTEXT fc;
+} CODING_CONTEXT;
+
+typedef enum {
+ // regular inter frame
+ REGULAR_FRAME = 0,
+ // alternate reference frame
+ ARF_FRAME = 1,
+ // overlay frame
+ OVERLAY_FRAME = 2,
+ // golden frame
+ GLD_FRAME = 3,
+#if CONFIG_EXT_REFS
+ // backward reference frame
+ BRF_FRAME = 4,
+ // extra alternate reference frame
+ EXT_ARF_FRAME = 5
+#endif
+} FRAME_CONTEXT_INDEX;
+
+typedef enum {
+ // encode_breakout is disabled.
+ ENCODE_BREAKOUT_DISABLED = 0,
+ // encode_breakout is enabled.
+ ENCODE_BREAKOUT_ENABLED = 1,
+ // encode_breakout is enabled with small max_thresh limit.
+ ENCODE_BREAKOUT_LIMITED = 2
+} ENCODE_BREAKOUT_TYPE;
+
+typedef enum {
+ NORMAL = 0,
+ FOURFIVE = 1,
+ THREEFIVE = 2,
+ ONETWO = 3
+} VPX_SCALING;
+
+typedef enum {
+ // Good Quality Fast Encoding. The encoder balances quality with the amount of
+ // time it takes to encode the output. Speed setting controls how fast.
+ GOOD,
+
+ // The encoder places priority on the quality of the output over encoding
+ // speed. The output is compressed at the highest possible quality. This
+ // option takes the longest amount of time to encode. Speed setting ignored.
+ BEST,
+
+ // Realtime/Live Encoding. This mode is optimized for realtime encoding (for
+ // example, capturing a television signal or feed from a live camera). Speed
+ // setting controls how fast.
+ REALTIME
+} MODE;
+
+typedef enum {
+ FRAMEFLAGS_KEY = 1 << 0,
+ FRAMEFLAGS_GOLDEN = 1 << 1,
+#if CONFIG_EXT_REFS
+ FRAMEFLAGS_BWDREF = 1 << 2,
+ FRAMEFLAGS_ALTREF = 1 << 3,
+#else
+ FRAMEFLAGS_ALTREF = 1 << 2,
+#endif // CONFIG_EXT_REFS
+} FRAMETYPE_FLAGS;
+
+typedef enum {
+ NO_AQ = 0,
+ VARIANCE_AQ = 1,
+ COMPLEXITY_AQ = 2,
+ CYCLIC_REFRESH_AQ = 3,
+ AQ_MODE_COUNT // This should always be the last member of the enum
+} AQ_MODE;
+
+typedef enum {
+ RESIZE_NONE = 0, // No frame resizing allowed.
+ RESIZE_FIXED = 1, // All frames are coded at the specified dimension.
+ RESIZE_DYNAMIC = 2 // Coded size of each frame is determined by the codec.
+} RESIZE_TYPE;
+
+typedef struct VP10EncoderConfig {
+ BITSTREAM_PROFILE profile;
+ vpx_bit_depth_t bit_depth; // Codec bit-depth.
+ int width; // width of data passed to the compressor
+ int height; // height of data passed to the compressor
+ unsigned int input_bit_depth; // Input bit depth.
+ double init_framerate; // set to passed in framerate
+ int64_t target_bandwidth; // bandwidth to be used in bits per second
+
+ int noise_sensitivity; // pre processing blur: recommendation 0
+ int sharpness; // sharpening output: recommendation 0:
+ int speed;
+ // maximum allowed bitrate for any intra frame in % of bitrate target.
+ unsigned int rc_max_intra_bitrate_pct;
+ // maximum allowed bitrate for any inter frame in % of bitrate target.
+ unsigned int rc_max_inter_bitrate_pct;
+ // percent of rate boost for golden frame in CBR mode.
+ unsigned int gf_cbr_boost_pct;
+
+ MODE mode;
+ int pass;
+
+ // Key Framing Operations
+ int auto_key; // autodetect cut scenes and set the keyframes
+ int key_freq; // maximum distance to key frame.
+
+ int lag_in_frames; // how many frames lag before we start encoding
+
+ // ----------------------------------------------------------------
+ // DATARATE CONTROL OPTIONS
+
+ // vbr, cbr, constrained quality or constant quality
+ enum vpx_rc_mode rc_mode;
+
+ // buffer targeting aggressiveness
+ int under_shoot_pct;
+ int over_shoot_pct;
+
+ // buffering parameters
+ int64_t starting_buffer_level_ms;
+ int64_t optimal_buffer_level_ms;
+ int64_t maximum_buffer_size_ms;
+
+ // Frame drop threshold.
+ int drop_frames_water_mark;
+
+ // controlling quality
+ int fixed_q;
+ int worst_allowed_q;
+ int best_allowed_q;
+ int cq_level;
+ AQ_MODE aq_mode; // Adaptive Quantization mode
+#if CONFIG_AOM_QM
+ int using_qm;
+ int qm_minlevel;
+ int qm_maxlevel;
+#endif
+
+ // Internal frame size scaling.
+ RESIZE_TYPE resize_mode;
+ int scaled_frame_width;
+ int scaled_frame_height;
+
+ // Enable feature to reduce the frame quantization every x frames.
+ int frame_periodic_boost;
+
+ // two pass datarate control
+ int two_pass_vbrbias; // two pass datarate control tweaks
+ int two_pass_vbrmin_section;
+ int two_pass_vbrmax_section;
+ // END DATARATE CONTROL OPTIONS
+ // ----------------------------------------------------------------
+
+ int enable_auto_arf;
+#if CONFIG_EXT_REFS
+ int enable_auto_brf; // (b)ackward (r)ef (f)rame
+#endif // CONFIG_EXT_REFS
+
+ int encode_breakout; // early breakout : for video conf recommend 800
+
+ /* Bitfield defining the error resiliency features to enable.
+ * Can provide decodable frames after losses in previous
+ * frames and decodable partitions after losses in the same frame.
+ */
+ unsigned int error_resilient_mode;
+
+ /* Bitfield defining the parallel decoding mode where the
+ * decoding in successive frames may be conducted in parallel
+ * just by decoding the frame headers.
+ */
+ unsigned int frame_parallel_decoding_mode;
+
+ int arnr_max_frames;
+ int arnr_strength;
+
+ int min_gf_interval;
+ int max_gf_interval;
+
+ int tile_columns;
+ int tile_rows;
+
+ int max_threads;
+
+ vpx_fixed_buf_t two_pass_stats_in;
+ struct vpx_codec_pkt_list *output_pkt_list;
+
+#if CONFIG_FP_MB_STATS
+ vpx_fixed_buf_t firstpass_mb_stats_in;
+#endif
+
+ vpx_tune_metric tuning;
+ vpx_tune_content content;
+#if CONFIG_VP9_HIGHBITDEPTH
+ int use_highbitdepth;
+#endif
+ vpx_color_space_t color_space;
+ int color_range;
+ int render_width;
+ int render_height;
+
+#if CONFIG_EXT_PARTITION
+ vpx_superblock_size_t superblock_size;
+#endif // CONFIG_EXT_PARTITION
+} VP10EncoderConfig;
+
+static INLINE int is_lossless_requested(const VP10EncoderConfig *cfg) {
+ return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
+}
+
+// TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
+typedef struct TileDataEnc {
+ TileInfo tile_info;
+ int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
+ int mode_map[BLOCK_SIZES][MAX_MODES];
+} TileDataEnc;
+
+typedef struct RD_COUNTS {
+ vp10_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
+ int64_t comp_pred_diff[REFERENCE_MODES];
+ int m_search_count;
+ int ex_search_count;
+} RD_COUNTS;
+
+typedef struct ThreadData {
+ MACROBLOCK mb;
+ RD_COUNTS rd_counts;
+ FRAME_COUNTS *counts;
+
+ PICK_MODE_CONTEXT *leaf_tree;
+ PC_TREE *pc_tree;
+ PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1];
+
+ VAR_TREE *var_tree;
+ VAR_TREE *var_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1];
+} ThreadData;
+
+struct EncWorkerData;
+
+typedef struct ActiveMap {
+ int enabled;
+ int update;
+ unsigned char *map;
+} ActiveMap;
+
+typedef enum { Y, U, V, ALL } STAT_TYPE;
+
+typedef struct IMAGE_STAT {
+ double stat[ALL + 1];
+ double worst;
+} ImageStat;
+
+typedef struct {
+ int ref_count;
+ YV12_BUFFER_CONFIG buf;
+} EncRefCntBuffer;
+
+#if CONFIG_ENTROPY
+typedef struct SUBFRAME_STATS {
+ vp10_coeff_probs_model coef_probs_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES];
+ vp10_coeff_count coef_counts_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES];
+ unsigned int eob_counts_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES][REF_TYPES]
+ [COEF_BANDS][COEFF_CONTEXTS];
+ vp10_coeff_probs_model enc_starting_coef_probs[TX_SIZES][PLANE_TYPES];
+} SUBFRAME_STATS;
+#endif // CONFIG_ENTROPY
+
+typedef struct TileBufferEnc {
+ uint8_t *data;
+ size_t size;
+} TileBufferEnc;
+
+typedef struct VP10_COMP {
+ QUANTS quants;
+ ThreadData td;
+ MB_MODE_INFO_EXT *mbmi_ext_base;
+ DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]); // 8: SIMD width
+ DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]); // 8: SIMD width
+#if CONFIG_NEW_QUANT
+ DECLARE_ALIGNED(16, dequant_val_type_nuq,
+ y_dequant_val_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS]);
+ DECLARE_ALIGNED(16, dequant_val_type_nuq,
+ uv_dequant_val_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS]);
+#endif // CONFIG_NEW_QUANT
+ VP10_COMMON common;
+ VP10EncoderConfig oxcf;
+ struct lookahead_ctx *lookahead;
+ struct lookahead_entry *alt_ref_source;
+
+ YV12_BUFFER_CONFIG *Source;
+ YV12_BUFFER_CONFIG *Last_Source; // NULL for first frame and alt_ref frames
+ YV12_BUFFER_CONFIG *un_scaled_source;
+ YV12_BUFFER_CONFIG scaled_source;
+ YV12_BUFFER_CONFIG *unscaled_last_source;
+ YV12_BUFFER_CONFIG scaled_last_source;
+
+ // Up-sampled reference buffers
+ // NOTE(zoeliu): It is needed to allocate sufficient space to the up-sampled
+ // reference buffers, which should include the up-sampled version of all the
+ // possibly stored references plus the currently coded frame itself.
+ EncRefCntBuffer upsampled_ref_bufs[REF_FRAMES + 1];
+ int upsampled_ref_idx[REF_FRAMES + 1];
+
+ // For a still frame, this flag is set to 1 to skip partition search.
+ int partition_search_skippable_frame;
+
+ int scaled_ref_idx[TOTAL_REFS_PER_FRAME];
+#if CONFIG_EXT_REFS
+ int lst_fb_idxes[LAST_REF_FRAMES];
+#else
+ int lst_fb_idx;
+#endif // CONFIG_EXT_REFS
+ int gld_fb_idx;
+#if CONFIG_EXT_REFS
+ int bwd_fb_idx; // BWD_REF_FRAME
+#endif // CONFIG_EXT_REFS
+ int alt_fb_idx;
+
+ int last_show_frame_buf_idx; // last show frame buffer index
+
+ int refresh_last_frame;
+ int refresh_golden_frame;
+#if CONFIG_EXT_REFS
+ int refresh_bwd_ref_frame;
+#endif // CONFIG_EXT_REFS
+ int refresh_alt_ref_frame;
+
+ int ext_refresh_frame_flags_pending;
+ int ext_refresh_last_frame;
+ int ext_refresh_golden_frame;
+ int ext_refresh_alt_ref_frame;
+
+ int ext_refresh_frame_context_pending;
+ int ext_refresh_frame_context;
+
+ YV12_BUFFER_CONFIG last_frame_uf;
+#if CONFIG_LOOP_RESTORATION
+ YV12_BUFFER_CONFIG last_frame_db;
+#endif // CONFIG_LOOP_RESTORATION
+
+ // Ambient reconstruction err target for force key frames
+ int64_t ambient_err;
+
+ RD_OPT rd;
+
+ CODING_CONTEXT coding_context;
+
+#if CONFIG_REF_MV
+ int *nmv_costs[NMV_CONTEXTS][2];
+ int *nmv_costs_hp[NMV_CONTEXTS][2];
+#endif
+
+ int *nmvcosts[2];
+ int *nmvcosts_hp[2];
+ int *nmvsadcosts[2];
+ int *nmvsadcosts_hp[2];
+
+ int64_t last_time_stamp_seen;
+ int64_t last_end_time_stamp_seen;
+ int64_t first_time_stamp_ever;
+
+ RATE_CONTROL rc;
+ double framerate;
+
+ // NOTE(zoeliu): Any inter frame allows maximum of REF_FRAMES inter
+ // references; Plus the currently coded frame itself, it is needed to allocate
+ // sufficient space to the size of the maximum possible number of frames.
+ int interp_filter_selected[REF_FRAMES + 1][SWITCHABLE];
+
+ struct vpx_codec_pkt_list *output_pkt_list;
+
+ MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
+ int mbgraph_n_frames; // number of frames filled in the above
+ int static_mb_pct; // % forced skip mbs by segmentation
+ int ref_frame_flags;
+
+ SPEED_FEATURES sf;
+
+ unsigned int max_mv_magnitude;
+ int mv_step_param;
+
+ int allow_comp_inter_inter;
+
+ // Default value is 1. From first pass stats, encode_breakout may be disabled.
+ ENCODE_BREAKOUT_TYPE allow_encode_breakout;
+
+ // Get threshold from external input. A suggested threshold is 800 for HD
+ // clips, and 300 for < HD clips.
+ int encode_breakout;
+
+ uint8_t *segmentation_map;
+
+ // segment threashold for encode breakout
+ int segment_encode_breakout[MAX_SEGMENTS];
+
+ CYCLIC_REFRESH *cyclic_refresh;
+ ActiveMap active_map;
+
+ fractional_mv_step_fp *find_fractional_mv_step;
+ vp10_full_search_fn_t full_search_sad; // It is currently unused.
+ vp10_diamond_search_fn_t diamond_search_sad;
+ vpx_variance_fn_ptr_t fn_ptr[BLOCK_SIZES];
+ uint64_t time_receive_data;
+ uint64_t time_compress_data;
+ uint64_t time_pick_lpf;
+ uint64_t time_encode_sb_row;
+
+#if CONFIG_FP_MB_STATS
+ int use_fp_mb_stats;
+#endif
+
+ TWO_PASS twopass;
+
+ YV12_BUFFER_CONFIG alt_ref_buffer;
+
+#if CONFIG_INTERNAL_STATS
+ unsigned int mode_chosen_counts[MAX_MODES];
+
+ int count;
+ uint64_t total_sq_error;
+ uint64_t total_samples;
+ ImageStat psnr;
+
+ double total_blockiness;
+ double worst_blockiness;
+
+ int bytes;
+ double summed_quality;
+ double summed_weights;
+ unsigned int tot_recode_hits;
+ double worst_ssim;
+
+ ImageStat fastssim;
+ ImageStat psnrhvs;
+
+ int b_calculate_blockiness;
+ int b_calculate_consistency;
+
+ double total_inconsistency;
+ double worst_consistency;
+ Ssimv *ssim_vars;
+ Metrics metrics;
+#endif
+ int b_calculate_psnr;
+
+ int droppable;
+
+ int initial_width;
+ int initial_height;
+ int initial_mbs; // Number of MBs in the full-size frame; to be used to
+ // normalize the firstpass stats. This will differ from the
+ // number of MBs in the current frame when the frame is
+ // scaled.
+
+ // Store frame variance info in SOURCE_VAR_BASED_PARTITION search type.
+ diff *source_diff_var;
+ // The threshold used in SOURCE_VAR_BASED_PARTITION search type.
+ unsigned int source_var_thresh;
+ int frames_till_next_var_check;
+
+ int frame_flags;
+
+ search_site_config ss_cfg;
+
+ int mbmode_cost[BLOCK_SIZE_GROUPS][INTRA_MODES];
+#if CONFIG_REF_MV
+ int newmv_mode_cost[NEWMV_MODE_CONTEXTS][2];
+ int zeromv_mode_cost[ZEROMV_MODE_CONTEXTS][2];
+ int refmv_mode_cost[REFMV_MODE_CONTEXTS][2];
+ int drl_mode_cost0[DRL_MODE_CONTEXTS][2];
+#if CONFIG_EXT_INTER
+ int new2mv_mode_cost[2];
+#endif // CONFIG_EXT_INTER
+#endif
+
+ unsigned int inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
+#if CONFIG_EXT_INTER
+ unsigned int
+ inter_compound_mode_cost[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
+ unsigned int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
+#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ int motvar_cost[BLOCK_SIZES][MOTION_VARIATIONS];
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ int intra_uv_mode_cost[INTRA_MODES][INTRA_MODES];
+ int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
+ int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
+#if CONFIG_EXT_PARTITION_TYPES
+ int partition_cost[PARTITION_CONTEXTS][EXT_PARTITION_TYPES];
+#else
+ int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
+#endif
+ int palette_y_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
+ int palette_uv_size_cost[PALETTE_BLOCK_SIZES][PALETTE_SIZES];
+ int palette_y_color_cost[PALETTE_MAX_SIZE -
+ 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS];
+ int palette_uv_color_cost[PALETTE_MAX_SIZE -
+ 1][PALETTE_COLOR_CONTEXTS][PALETTE_COLORS];
+ int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
+#if CONFIG_EXT_TX
+ int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
+ int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
+ [TX_TYPES];
+#else
+ int intra_tx_type_costs[EXT_TX_SIZES][TX_TYPES][TX_TYPES];
+ int inter_tx_type_costs[EXT_TX_SIZES][TX_TYPES];
+#endif // CONFIG_EXT_TX
+#if CONFIG_EXT_INTRA
+ int intra_filter_cost[INTRA_FILTERS + 1][INTRA_FILTERS];
+#endif // CONFIG_EXT_INTRA
+
+ int multi_arf_allowed;
+ int multi_arf_enabled;
+ int multi_arf_last_grp_enabled;
+
+ TileDataEnc *tile_data;
+ int allocated_tiles; // Keep track of memory allocated for tiles.
+
+ TOKENEXTRA *tile_tok[MAX_TILE_ROWS][MAX_TILE_COLS];
+ unsigned int tok_count[MAX_TILE_ROWS][MAX_TILE_COLS];
+
+ TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
+
+ int resize_pending;
+ int resize_state;
+ int resize_scale_num;
+ int resize_scale_den;
+ int resize_avg_qp;
+ int resize_buffer_underflow;
+ int resize_count;
+
+ // VAR_BASED_PARTITION thresholds
+ // 0 - threshold_128x128;
+ // 1 - threshold_64x64;
+ // 2 - threshold_32x32;
+ // 3 - threshold_16x16;
+ // 4 - threshold_8x8;
+ int64_t vbp_thresholds[5];
+ int64_t vbp_threshold_minmax;
+ int64_t vbp_threshold_sad;
+ BLOCK_SIZE vbp_bsize_min;
+
+ // VARIANCE_AQ segment map refresh
+ int vaq_refresh;
+
+ // Multi-threading
+ int num_workers;
+ VPxWorker *workers;
+ struct EncWorkerData *tile_thr_data;
+ VP10LfSync lf_row_sync;
+#if CONFIG_ENTROPY
+ SUBFRAME_STATS subframe_stats;
+ // TODO(yaowu): minimize the size of count buffers
+ SUBFRAME_STATS wholeframe_stats;
+ vp10_coeff_stats branch_ct_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES];
+#endif // CONFIG_ENTROPY
+#if CONFIG_ANS
+ struct BufAnsCoder buf_ans;
+#endif
+#if CONFIG_EXT_REFS
+ int refresh_frame_mask;
+ int existing_fb_idx_to_show;
+ int is_arf_filter_off[MAX_EXT_ARFS + 1];
+ int num_extra_arfs;
+ int arf_map[MAX_EXT_ARFS + 1];
+#endif // CONFIG_EXT_REFS
+#if CONFIG_GLOBAL_MOTION
+ int global_motion_used[TOTAL_REFS_PER_FRAME];
+#endif
+} VP10_COMP;
+
+void vp10_initialize_enc(void);
+
+struct VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf,
+ BufferPool *const pool);
+void vp10_remove_compressor(VP10_COMP *cpi);
+
+void vp10_change_config(VP10_COMP *cpi, const VP10EncoderConfig *oxcf);
+
+// receive a frames worth of data. caller can assume that a copy of this
+// frame is made and not just a copy of the pointer..
+int vp10_receive_raw_frame(VP10_COMP *cpi, unsigned int frame_flags,
+ YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
+ int64_t end_time_stamp);
+
+int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
+ size_t *size, uint8_t *dest, int64_t *time_stamp,
+ int64_t *time_end, int flush);
+
+int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest);
+
+int vp10_get_last_show_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *frame);
+
+int vp10_use_as_reference(VP10_COMP *cpi, int ref_frame_flags);
+
+void vp10_update_reference(VP10_COMP *cpi, int ref_frame_flags);
+
+int vp10_copy_reference_enc(VP10_COMP *cpi, VPX_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd);
+
+int vp10_set_reference_enc(VP10_COMP *cpi, VPX_REFFRAME ref_frame_flag,
+ YV12_BUFFER_CONFIG *sd);
+
+int vp10_update_entropy(VP10_COMP *cpi, int update);
+
+int vp10_set_active_map(VP10_COMP *cpi, unsigned char *map, int rows, int cols);
+
+int vp10_get_active_map(VP10_COMP *cpi, unsigned char *map, int rows, int cols);
+
+int vp10_set_internal_size(VP10_COMP *cpi, VPX_SCALING horiz_mode,
+ VPX_SCALING vert_mode);
+
+int vp10_set_size_literal(VP10_COMP *cpi, unsigned int width,
+ unsigned int height);
+
+int vp10_get_quantizer(struct VP10_COMP *cpi);
+
+void vp10_full_to_model_counts(vp10_coeff_count_model *model_count,
+ vp10_coeff_count *full_count);
+
+static INLINE int frame_is_kf_gf_arf(const VP10_COMP *cpi) {
+ return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
+ (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
+}
+
+static INLINE int get_ref_frame_map_idx(const VP10_COMP *cpi,
+ MV_REFERENCE_FRAME ref_frame) {
+#if CONFIG_EXT_REFS
+ if (ref_frame >= LAST_FRAME && ref_frame <= LAST3_FRAME)
+ return cpi->lst_fb_idxes[ref_frame - 1];
+#else
+ if (ref_frame == LAST_FRAME) return cpi->lst_fb_idx;
+#endif // CONFIG_EXT_REFS
+ else if (ref_frame == GOLDEN_FRAME)
+ return cpi->gld_fb_idx;
+#if CONFIG_EXT_REFS
+ else if (ref_frame == BWDREF_FRAME)
+ return cpi->bwd_fb_idx;
+#endif // CONFIG_EXT_REFS
+ else
+ return cpi->alt_fb_idx;
+}
+
+static INLINE int get_ref_frame_buf_idx(const VP10_COMP *const cpi,
+ MV_REFERENCE_FRAME ref_frame) {
+ const VP10_COMMON *const cm = &cpi->common;
+ const int map_idx = get_ref_frame_map_idx(cpi, ref_frame);
+ return (map_idx != INVALID_IDX) ? cm->ref_frame_map[map_idx] : INVALID_IDX;
+}
+
+static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
+ VP10_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
+ VP10_COMMON *const cm = &cpi->common;
+ const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+ return buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf
+ : NULL;
+}
+
+static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(
+ VP10_COMP *cpi, const MV_REFERENCE_FRAME ref_frame) {
+ // Use up-sampled reference frames.
+ const int buf_idx =
+ cpi->upsampled_ref_idx[get_ref_frame_map_idx(cpi, ref_frame)];
+ return &cpi->upsampled_ref_bufs[buf_idx].buf;
+}
+
+#if CONFIG_EXT_REFS
+static INLINE int enc_is_ref_frame_buf(VP10_COMP *cpi,
+ RefCntBuffer *frame_buf) {
+ MV_REFERENCE_FRAME ref_frame;
+ VP10_COMMON *const cm = &cpi->common;
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+ if (buf_idx == INVALID_IDX) continue;
+ if (frame_buf == &cm->buffer_pool->frame_bufs[buf_idx]) break;
+ }
+ return (ref_frame <= ALTREF_FRAME);
+}
+#endif // CONFIG_EXT_REFS
+
+static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols) {
+ // TODO(JBB): double check we can't exceed this token count if we have a
+ // 32x32 transform crossing a boundary at a multiple of 16.
+ // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full
+ // resolution. We assume up to 1 token per pixel, and then allow
+ // a head room of 1 EOSB token per 8x8 block per plane.
+ return mb_rows * mb_cols * (16 * 16 + 4) * 3;
+}
+
+// Get the allocated token size for a tile. It does the same calculation as in
+// the frame token allocation.
+static INLINE unsigned int allocated_tokens(TileInfo tile) {
+ int tile_mb_rows = (tile.mi_row_end - tile.mi_row_start + 1) >> 1;
+ int tile_mb_cols = (tile.mi_col_end - tile.mi_col_start + 1) >> 1;
+
+ return get_token_alloc(tile_mb_rows, tile_mb_cols);
+}
+
+void vp10_alloc_compressor_data(VP10_COMP *cpi);
+
+void vp10_scale_references(VP10_COMP *cpi);
+
+void vp10_update_reference_frames(VP10_COMP *cpi);
+
+void vp10_set_high_precision_mv(VP10_COMP *cpi, int allow_high_precision_mv);
+
+YV12_BUFFER_CONFIG *vp10_scale_if_required_fast(VP10_COMMON *cm,
+ YV12_BUFFER_CONFIG *unscaled,
+ YV12_BUFFER_CONFIG *scaled);
+
+YV12_BUFFER_CONFIG *vp10_scale_if_required(VP10_COMMON *cm,
+ YV12_BUFFER_CONFIG *unscaled,
+ YV12_BUFFER_CONFIG *scaled);
+
+void vp10_apply_encoding_flags(VP10_COMP *cpi, vpx_enc_frame_flags_t flags);
+
+static INLINE int is_altref_enabled(const VP10_COMP *const cpi) {
+ return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 &&
+ cpi->oxcf.enable_auto_arf;
+}
+
+// TODO(zoeliu): To set up cpi->oxcf.enable_auto_brf
+#if 0 && CONFIG_EXT_REFS
+static INLINE int is_bwdref_enabled(const VP10_COMP *const cpi) {
+ // NOTE(zoeliu): The enabling of bi-predictive frames depends on the use of
+ // alt_ref, and now will be off when the alt_ref interval is
+ // not sufficiently large.
+ return is_altref_enabled(cpi) && cpi->oxcf.enable_auto_brf;
+}
+#endif // CONFIG_EXT_REFS
+
+static INLINE void set_ref_ptrs(VP10_COMMON *cm, MACROBLOCKD *xd,
+ MV_REFERENCE_FRAME ref0,
+ MV_REFERENCE_FRAME ref1) {
+ xd->block_refs[0] =
+ &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME : 0];
+ xd->block_refs[1] =
+ &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME : 0];
+}
+
+static INLINE int get_chessboard_index(const int frame_index) {
+ return frame_index & 0x1;
+}
+
+static INLINE int *cond_cost_list(const struct VP10_COMP *cpi, int *cost_list) {
+ return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL;
+}
+
+void vp10_new_framerate(VP10_COMP *cpi, double framerate);
+
+#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
+
+// Update up-sampled reference frame index.
+static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx,
+ int new_uidx) {
+ const int ref_index = *uidx;
+
+ if (ref_index >= 0 && ubufs[ref_index].ref_count > 0)
+ ubufs[ref_index].ref_count--;
+
+ *uidx = new_uidx;
+ ubufs[new_uidx].ref_count++;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_ENCODER_H_
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
new file mode 100644
index 0000000..63d716c
--- /dev/null
+++ b/av1/encoder/ethread.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/encoder/encodeframe.h"
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/ethread.h"
+#include "aom_dsp/vpx_dsp_common.h"
+
+static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
+ int i, j, k, l, m, n;
+
+ for (i = 0; i < REFERENCE_MODES; i++)
+ td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
+
+ for (i = 0; i < TX_SIZES; i++)
+ for (j = 0; j < PLANE_TYPES; j++)
+ for (k = 0; k < REF_TYPES; k++)
+ for (l = 0; l < COEF_BANDS; l++)
+ for (m = 0; m < COEFF_CONTEXTS; m++)
+ for (n = 0; n < ENTROPY_TOKENS; n++)
+ td->rd_counts.coef_counts[i][j][k][l][m][n] +=
+ td_t->rd_counts.coef_counts[i][j][k][l][m][n];
+
+ // Counts of all motion searches and exhuastive mesh searches.
+ td->rd_counts.m_search_count += td_t->rd_counts.m_search_count;
+ td->rd_counts.ex_search_count += td_t->rd_counts.ex_search_count;
+}
+
+static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
+ VP10_COMP *const cpi = thread_data->cpi;
+ const VP10_COMMON *const cm = &cpi->common;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
+ int t;
+
+ (void)unused;
+
+ for (t = thread_data->start; t < tile_rows * tile_cols;
+ t += cpi->num_workers) {
+ int tile_row = t / tile_cols;
+ int tile_col = t % tile_cols;
+
+ vp10_encode_tile(cpi, thread_data->td, tile_row, tile_col);
+ }
+
+ return 0;
+}
+
+void vp10_encode_tiles_mt(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ const int tile_cols = cm->tile_cols;
+ const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+ const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
+ int i;
+
+ vp10_init_tile_data(cpi);
+
+ // Only run once to create threads and allocate thread data.
+ if (cpi->num_workers == 0) {
+ CHECK_MEM_ERROR(cm, cpi->workers,
+ vpx_malloc(num_workers * sizeof(*cpi->workers)));
+
+ CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
+ vpx_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
+
+ for (i = 0; i < num_workers; i++) {
+ VPxWorker *const worker = &cpi->workers[i];
+ EncWorkerData *const thread_data = &cpi->tile_thr_data[i];
+
+ ++cpi->num_workers;
+ winterface->init(worker);
+
+ thread_data->cpi = cpi;
+
+ if (i < num_workers - 1) {
+ // Allocate thread data.
+ CHECK_MEM_ERROR(cm, thread_data->td,
+ vpx_memalign(32, sizeof(*thread_data->td)));
+ vp10_zero(*thread_data->td);
+
+ // Set up pc_tree.
+ thread_data->td->leaf_tree = NULL;
+ thread_data->td->pc_tree = NULL;
+ vp10_setup_pc_tree(cm, thread_data->td);
+
+ // Set up variance tree if needed.
+ if (cpi->sf.partition_search_type == VAR_BASED_PARTITION)
+ vp10_setup_var_tree(cm, &cpi->td);
+
+ // Allocate frame counters in thread data.
+ CHECK_MEM_ERROR(cm, thread_data->td->counts,
+ vpx_calloc(1, sizeof(*thread_data->td->counts)));
+
+ // Create threads
+ if (!winterface->reset(worker))
+ vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
+ "Tile encoder thread creation failed");
+ } else {
+ // Main thread acts as a worker and uses the thread data in cpi.
+ thread_data->td = &cpi->td;
+ }
+
+ winterface->sync(worker);
+ }
+ }
+
+ for (i = 0; i < num_workers; i++) {
+ VPxWorker *const worker = &cpi->workers[i];
+ EncWorkerData *thread_data;
+
+ worker->hook = (VPxWorkerHook)enc_worker_hook;
+ worker->data1 = &cpi->tile_thr_data[i];
+ worker->data2 = NULL;
+ thread_data = (EncWorkerData *)worker->data1;
+
+ // Before encoding a frame, copy the thread data from cpi.
+ if (thread_data->td != &cpi->td) {
+ thread_data->td->mb = cpi->td.mb;
+ thread_data->td->rd_counts = cpi->td.rd_counts;
+ }
+ if (thread_data->td->counts != &cpi->common.counts) {
+ memcpy(thread_data->td->counts, &cpi->common.counts,
+ sizeof(cpi->common.counts));
+ }
+
+ // Allocate buffers used by palette coding mode.
+ if (cpi->common.allow_screen_content_tools && i < num_workers - 1) {
+ MACROBLOCK *x = &thread_data->td->mb;
+ CHECK_MEM_ERROR(cm, x->palette_buffer,
+ vpx_memalign(16, sizeof(*x->palette_buffer)));
+ }
+ }
+
+ // Encode a frame
+ for (i = 0; i < num_workers; i++) {
+ VPxWorker *const worker = &cpi->workers[i];
+ EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
+
+ // Set the starting tile for each thread.
+ thread_data->start = i;
+
+ if (i == cpi->num_workers - 1)
+ winterface->execute(worker);
+ else
+ winterface->launch(worker);
+ }
+
+ // Encoding ends.
+ for (i = 0; i < num_workers; i++) {
+ VPxWorker *const worker = &cpi->workers[i];
+ winterface->sync(worker);
+ }
+
+ for (i = 0; i < num_workers; i++) {
+ VPxWorker *const worker = &cpi->workers[i];
+ EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
+
+ // Accumulate counters.
+ if (i < cpi->num_workers - 1) {
+ vp10_accumulate_frame_counts(cm, thread_data->td->counts);
+ accumulate_rd_opt(&cpi->td, thread_data->td);
+ }
+ }
+}
diff --git a/av1/encoder/ethread.h b/av1/encoder/ethread.h
new file mode 100644
index 0000000..d72816cd
--- /dev/null
+++ b/av1/encoder/ethread.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_ETHREAD_H_
+#define VP10_ENCODER_ETHREAD_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP10_COMP;
+struct ThreadData;
+
+typedef struct EncWorkerData {
+ struct VP10_COMP *cpi;
+ struct ThreadData *td;
+ int start;
+} EncWorkerData;
+
+void vp10_encode_tiles_mt(struct VP10_COMP *cpi);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_ETHREAD_H_
diff --git a/av1/encoder/extend.c b/av1/encoder/extend.c
new file mode 100644
index 0000000..1b0c442
--- /dev/null
+++ b/av1/encoder/extend.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+
+#include "av1/common/common.h"
+#include "av1/encoder/extend.h"
+
+static void copy_and_extend_plane(const uint8_t *src, int src_pitch,
+ uint8_t *dst, int dst_pitch, int w, int h,
+ int extend_top, int extend_left,
+ int extend_bottom, int extend_right) {
+ int i, linesize;
+
+ // copy the left and right most columns out
+ const uint8_t *src_ptr1 = src;
+ const uint8_t *src_ptr2 = src + w - 1;
+ uint8_t *dst_ptr1 = dst - extend_left;
+ uint8_t *dst_ptr2 = dst + w;
+
+ for (i = 0; i < h; i++) {
+ memset(dst_ptr1, src_ptr1[0], extend_left);
+ memcpy(dst_ptr1 + extend_left, src_ptr1, w);
+ memset(dst_ptr2, src_ptr2[0], extend_right);
+ src_ptr1 += src_pitch;
+ src_ptr2 += src_pitch;
+ dst_ptr1 += dst_pitch;
+ dst_ptr2 += dst_pitch;
+ }
+
+ // Now copy the top and bottom lines into each line of the respective
+ // borders
+ src_ptr1 = dst - extend_left;
+ src_ptr2 = dst + dst_pitch * (h - 1) - extend_left;
+ dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left;
+ dst_ptr2 = dst + dst_pitch * (h)-extend_left;
+ linesize = extend_left + extend_right + w;
+
+ for (i = 0; i < extend_top; i++) {
+ memcpy(dst_ptr1, src_ptr1, linesize);
+ dst_ptr1 += dst_pitch;
+ }
+
+ for (i = 0; i < extend_bottom; i++) {
+ memcpy(dst_ptr2, src_ptr2, linesize);
+ dst_ptr2 += dst_pitch;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch,
+ uint8_t *dst8, int dst_pitch, int w,
+ int h, int extend_top, int extend_left,
+ int extend_bottom, int extend_right) {
+ int i, linesize;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+
+ // copy the left and right most columns out
+ const uint16_t *src_ptr1 = src;
+ const uint16_t *src_ptr2 = src + w - 1;
+ uint16_t *dst_ptr1 = dst - extend_left;
+ uint16_t *dst_ptr2 = dst + w;
+
+ for (i = 0; i < h; i++) {
+ vpx_memset16(dst_ptr1, src_ptr1[0], extend_left);
+ memcpy(dst_ptr1 + extend_left, src_ptr1, w * sizeof(src_ptr1[0]));
+ vpx_memset16(dst_ptr2, src_ptr2[0], extend_right);
+ src_ptr1 += src_pitch;
+ src_ptr2 += src_pitch;
+ dst_ptr1 += dst_pitch;
+ dst_ptr2 += dst_pitch;
+ }
+
+ // Now copy the top and bottom lines into each line of the respective
+ // borders
+ src_ptr1 = dst - extend_left;
+ src_ptr2 = dst + dst_pitch * (h - 1) - extend_left;
+ dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left;
+ dst_ptr2 = dst + dst_pitch * (h)-extend_left;
+ linesize = extend_left + extend_right + w;
+
+ for (i = 0; i < extend_top; i++) {
+ memcpy(dst_ptr1, src_ptr1, linesize * sizeof(src_ptr1[0]));
+ dst_ptr1 += dst_pitch;
+ }
+
+ for (i = 0; i < extend_bottom; i++) {
+ memcpy(dst_ptr2, src_ptr2, linesize * sizeof(src_ptr2[0]));
+ dst_ptr2 += dst_pitch;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst) {
+ // Extend src frame in buffer
+ // Altref filtering assumes 16 pixel extension
+ const int et_y = 16;
+ const int el_y = 16;
+ // Motion estimation may use src block variance with the block size up
+ // to 64x64, so the right and bottom need to be extended to 64 multiple
+ // or up to 16, whichever is greater.
+ const int er_y =
+ VPXMAX(src->y_width + 16, ALIGN_POWER_OF_TWO(src->y_width, 6)) -
+ src->y_crop_width;
+ const int eb_y =
+ VPXMAX(src->y_height + 16, ALIGN_POWER_OF_TWO(src->y_height, 6)) -
+ src->y_crop_height;
+ const int uv_width_subsampling = (src->uv_width != src->y_width);
+ const int uv_height_subsampling = (src->uv_height != src->y_height);
+ const int et_uv = et_y >> uv_height_subsampling;
+ const int el_uv = el_y >> uv_width_subsampling;
+ const int eb_uv = eb_y >> uv_height_subsampling;
+ const int er_uv = er_y >> uv_width_subsampling;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
+ highbd_copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
+ dst->y_stride, src->y_crop_width,
+ src->y_crop_height, et_y, el_y, eb_y, er_y);
+
+ highbd_copy_and_extend_plane(
+ src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride,
+ src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv);
+
+ highbd_copy_and_extend_plane(
+ src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride,
+ src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv);
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer,
+ dst->y_stride, src->y_crop_width, src->y_crop_height,
+ et_y, el_y, eb_y, er_y);
+
+ copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer,
+ dst->uv_stride, src->uv_crop_width, src->uv_crop_height,
+ et_uv, el_uv, eb_uv, er_uv);
+
+ copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer,
+ dst->uv_stride, src->uv_crop_width, src->uv_crop_height,
+ et_uv, el_uv, eb_uv, er_uv);
+}
+
+void vp10_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst, int srcy,
+ int srcx, int srch, int srcw) {
+ // If the side is not touching the bounder then don't extend.
+ const int et_y = srcy ? 0 : dst->border;
+ const int el_y = srcx ? 0 : dst->border;
+ const int eb_y = srcy + srch != src->y_height
+ ? 0
+ : dst->border + dst->y_height - src->y_height;
+ const int er_y = srcx + srcw != src->y_width
+ ? 0
+ : dst->border + dst->y_width - src->y_width;
+ const int src_y_offset = srcy * src->y_stride + srcx;
+ const int dst_y_offset = srcy * dst->y_stride + srcx;
+
+ const int et_uv = ROUND_POWER_OF_TWO(et_y, 1);
+ const int el_uv = ROUND_POWER_OF_TWO(el_y, 1);
+ const int eb_uv = ROUND_POWER_OF_TWO(eb_y, 1);
+ const int er_uv = ROUND_POWER_OF_TWO(er_y, 1);
+ const int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
+ const int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
+ const int srch_uv = ROUND_POWER_OF_TWO(srch, 1);
+ const int srcw_uv = ROUND_POWER_OF_TWO(srcw, 1);
+
+ copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride,
+ dst->y_buffer + dst_y_offset, dst->y_stride, srcw, srch,
+ et_y, el_y, eb_y, er_y);
+
+ copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride,
+ dst->u_buffer + dst_uv_offset, dst->uv_stride, srcw_uv,
+ srch_uv, et_uv, el_uv, eb_uv, er_uv);
+
+ copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride,
+ dst->v_buffer + dst_uv_offset, dst->uv_stride, srcw_uv,
+ srch_uv, et_uv, el_uv, eb_uv, er_uv);
+}
diff --git a/av1/encoder/extend.h b/av1/encoder/extend.h
new file mode 100644
index 0000000..1ad763e
--- /dev/null
+++ b/av1/encoder/extend.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_EXTEND_H_
+#define VP10_ENCODER_EXTEND_H_
+
+#include "aom_scale/yv12config.h"
+#include "aom/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst);
+
+void vp10_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
+ YV12_BUFFER_CONFIG *dst, int srcy,
+ int srcx, int srch, int srcw);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_EXTEND_H_
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
new file mode 100644
index 0000000..b23b839
--- /dev/null
+++ b/av1/encoder/firstpass.c
@@ -0,0 +1,2924 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_scale_rtcd.h"
+
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/system_state.h"
+#include "aom_scale/vpx_scale.h"
+#include "aom_scale/yv12config.h"
+
+#include "av1/common/entropymv.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/reconinter.h" // vp10_setup_dst_planes()
+#include "av1/encoder/aq_variance.h"
+#include "av1/encoder/block.h"
+#include "av1/encoder/encodeframe.h"
+#include "av1/encoder/encodemb.h"
+#include "av1/encoder/encodemv.h"
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/extend.h"
+#include "av1/encoder/firstpass.h"
+#include "av1/encoder/mcomp.h"
+#include "av1/encoder/quantize.h"
+#include "av1/encoder/rd.h"
+#include "aom_dsp/variance.h"
+
+#define OUTPUT_FPF 0
+#define ARF_STATS_OUTPUT 0
+
+#define GROUP_ADAPTIVE_MAXQ 1
+
+#define BOOST_BREAKOUT 12.5
+#define BOOST_FACTOR 12.5
+#define FACTOR_PT_LOW 0.70
+#define FACTOR_PT_HIGH 0.90
+#define FIRST_PASS_Q 10.0
+#define GF_MAX_BOOST 96.0
+#define INTRA_MODE_PENALTY 1024
+#define KF_MAX_BOOST 128.0
+#define MIN_ARF_GF_BOOST 240
+#define MIN_DECAY_FACTOR 0.01
+#define MIN_KF_BOOST 300
+#define NEW_MV_MODE_PENALTY 32
+#define DARK_THRESH 64
+#define DEFAULT_GRP_WEIGHT 1.0
+#define RC_FACTOR_MIN 0.75
+#define RC_FACTOR_MAX 1.75
+
+#define NCOUNT_INTRA_THRESH 8192
+#define NCOUNT_INTRA_FACTOR 3
+#define NCOUNT_FRAME_II_THRESH 5.0
+
+#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x)-0.000001 : (x) + 0.000001)
+
+#if ARF_STATS_OUTPUT
+unsigned int arf_count = 0;
+#endif
+
+// Resets the first pass file to the given position using a relative seek from
+// the current position.
+static void reset_fpf_position(TWO_PASS *p, const FIRSTPASS_STATS *position) {
+ p->stats_in = position;
+}
+
+// Read frame stats at an offset from the current position.
+static const FIRSTPASS_STATS *read_frame_stats(const TWO_PASS *p, int offset) {
+ if ((offset >= 0 && p->stats_in + offset >= p->stats_in_end) ||
+ (offset < 0 && p->stats_in + offset < p->stats_in_start)) {
+ return NULL;
+ }
+
+ return &p->stats_in[offset];
+}
+
+static int input_stats(TWO_PASS *p, FIRSTPASS_STATS *fps) {
+ if (p->stats_in >= p->stats_in_end) return EOF;
+
+ *fps = *p->stats_in;
+ ++p->stats_in;
+ return 1;
+}
+
+static void output_stats(FIRSTPASS_STATS *stats,
+ struct vpx_codec_pkt_list *pktlist) {
+ struct vpx_codec_cx_pkt pkt;
+ pkt.kind = VPX_CODEC_STATS_PKT;
+ pkt.data.twopass_stats.buf = stats;
+ pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
+ vpx_codec_pkt_list_add(pktlist, &pkt);
+
+// TEMP debug code
+#if OUTPUT_FPF
+ {
+ FILE *fpfile;
+ fpfile = fopen("firstpass.stt", "a");
+
+ fprintf(fpfile,
+ "%12.0lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf"
+ "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
+ "%12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf\n",
+ stats->frame, stats->weight, stats->intra_error, stats->coded_error,
+ stats->sr_coded_error, stats->pcnt_inter, stats->pcnt_motion,
+ stats->pcnt_second_ref, stats->pcnt_neutral, stats->intra_skip_pct,
+ stats->inactive_zone_rows, stats->inactive_zone_cols, stats->MVr,
+ stats->mvr_abs, stats->MVc, stats->mvc_abs, stats->MVrv,
+ stats->MVcv, stats->mv_in_out_count, stats->new_mv_count,
+ stats->count, stats->duration);
+ fclose(fpfile);
+ }
+#endif
+}
+
+#if CONFIG_FP_MB_STATS
+static void output_fpmb_stats(uint8_t *this_frame_mb_stats, VP10_COMMON *cm,
+ struct vpx_codec_pkt_list *pktlist) {
+ struct vpx_codec_cx_pkt pkt;
+ pkt.kind = VPX_CODEC_FPMB_STATS_PKT;
+ pkt.data.firstpass_mb_stats.buf = this_frame_mb_stats;
+ pkt.data.firstpass_mb_stats.sz = cm->initial_mbs * sizeof(uint8_t);
+ vpx_codec_pkt_list_add(pktlist, &pkt);
+}
+#endif
+
+static void zero_stats(FIRSTPASS_STATS *section) {
+ section->frame = 0.0;
+ section->weight = 0.0;
+ section->intra_error = 0.0;
+ section->coded_error = 0.0;
+ section->sr_coded_error = 0.0;
+ section->pcnt_inter = 0.0;
+ section->pcnt_motion = 0.0;
+ section->pcnt_second_ref = 0.0;
+ section->pcnt_neutral = 0.0;
+ section->intra_skip_pct = 0.0;
+ section->inactive_zone_rows = 0.0;
+ section->inactive_zone_cols = 0.0;
+ section->MVr = 0.0;
+ section->mvr_abs = 0.0;
+ section->MVc = 0.0;
+ section->mvc_abs = 0.0;
+ section->MVrv = 0.0;
+ section->MVcv = 0.0;
+ section->mv_in_out_count = 0.0;
+ section->new_mv_count = 0.0;
+ section->count = 0.0;
+ section->duration = 1.0;
+}
+
+static void accumulate_stats(FIRSTPASS_STATS *section,
+ const FIRSTPASS_STATS *frame) {
+ section->frame += frame->frame;
+ section->weight += frame->weight;
+ section->intra_error += frame->intra_error;
+ section->coded_error += frame->coded_error;
+ section->sr_coded_error += frame->sr_coded_error;
+ section->pcnt_inter += frame->pcnt_inter;
+ section->pcnt_motion += frame->pcnt_motion;
+ section->pcnt_second_ref += frame->pcnt_second_ref;
+ section->pcnt_neutral += frame->pcnt_neutral;
+ section->intra_skip_pct += frame->intra_skip_pct;
+ section->inactive_zone_rows += frame->inactive_zone_rows;
+ section->inactive_zone_cols += frame->inactive_zone_cols;
+ section->MVr += frame->MVr;
+ section->mvr_abs += frame->mvr_abs;
+ section->MVc += frame->MVc;
+ section->mvc_abs += frame->mvc_abs;
+ section->MVrv += frame->MVrv;
+ section->MVcv += frame->MVcv;
+ section->mv_in_out_count += frame->mv_in_out_count;
+ section->new_mv_count += frame->new_mv_count;
+ section->count += frame->count;
+ section->duration += frame->duration;
+}
+
+static void subtract_stats(FIRSTPASS_STATS *section,
+ const FIRSTPASS_STATS *frame) {
+ section->frame -= frame->frame;
+ section->weight -= frame->weight;
+ section->intra_error -= frame->intra_error;
+ section->coded_error -= frame->coded_error;
+ section->sr_coded_error -= frame->sr_coded_error;
+ section->pcnt_inter -= frame->pcnt_inter;
+ section->pcnt_motion -= frame->pcnt_motion;
+ section->pcnt_second_ref -= frame->pcnt_second_ref;
+ section->pcnt_neutral -= frame->pcnt_neutral;
+ section->intra_skip_pct -= frame->intra_skip_pct;
+ section->inactive_zone_rows -= frame->inactive_zone_rows;
+ section->inactive_zone_cols -= frame->inactive_zone_cols;
+ section->MVr -= frame->MVr;
+ section->mvr_abs -= frame->mvr_abs;
+ section->MVc -= frame->MVc;
+ section->mvc_abs -= frame->mvc_abs;
+ section->MVrv -= frame->MVrv;
+ section->MVcv -= frame->MVcv;
+ section->mv_in_out_count -= frame->mv_in_out_count;
+ section->new_mv_count -= frame->new_mv_count;
+ section->count -= frame->count;
+ section->duration -= frame->duration;
+}
+
+// Calculate the linear size relative to a baseline of 1080P
+#define BASE_SIZE 2073600.0 // 1920x1080
+static double get_linear_size_factor(const VP10_COMP *cpi) {
+ const double this_area = cpi->initial_width * cpi->initial_height;
+ return pow(this_area / BASE_SIZE, 0.5);
+}
+
+// Calculate an active area of the image that discounts formatting
+// bars and partially discounts other 0 energy areas.
+#define MIN_ACTIVE_AREA 0.5
+#define MAX_ACTIVE_AREA 1.0
+static double calculate_active_area(const VP10_COMP *cpi,
+ const FIRSTPASS_STATS *this_frame) {
+ double active_pct;
+
+ active_pct =
+ 1.0 -
+ ((this_frame->intra_skip_pct / 2) +
+ ((this_frame->inactive_zone_rows * 2) / (double)cpi->common.mb_rows));
+ return fclamp(active_pct, MIN_ACTIVE_AREA, MAX_ACTIVE_AREA);
+}
+
+// Calculate a modified Error used in distributing bits between easier and
+// harder frames.
+#define ACT_AREA_CORRECTION 0.5
+static double calculate_modified_err(const VP10_COMP *cpi,
+ const TWO_PASS *twopass,
+ const VP10EncoderConfig *oxcf,
+ const FIRSTPASS_STATS *this_frame) {
+ const FIRSTPASS_STATS *const stats = &twopass->total_stats;
+ const double av_weight = stats->weight / stats->count;
+ const double av_err = (stats->coded_error * av_weight) / stats->count;
+ double modified_error =
+ av_err * pow(this_frame->coded_error * this_frame->weight /
+ DOUBLE_DIVIDE_CHECK(av_err),
+ oxcf->two_pass_vbrbias / 100.0);
+
+ // Correction for active area. Frames with a reduced active area
+ // (eg due to formatting bars) have a higher error per mb for the
+ // remaining active MBs. The correction here assumes that coding
+ // 0.5N blocks of complexity 2X is a little easier than coding N
+ // blocks of complexity X.
+ modified_error *=
+ pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION);
+
+ return fclamp(modified_error, twopass->modified_error_min,
+ twopass->modified_error_max);
+}
+
+// This function returns the maximum target rate per frame.
+static int frame_max_bits(const RATE_CONTROL *rc,
+ const VP10EncoderConfig *oxcf) {
+ int64_t max_bits = ((int64_t)rc->avg_frame_bandwidth *
+ (int64_t)oxcf->two_pass_vbrmax_section) /
+ 100;
+ if (max_bits < 0)
+ max_bits = 0;
+ else if (max_bits > rc->max_frame_bandwidth)
+ max_bits = rc->max_frame_bandwidth;
+
+ return (int)max_bits;
+}
+
+void vp10_init_first_pass(VP10_COMP *cpi) {
+ zero_stats(&cpi->twopass.total_stats);
+}
+
+void vp10_end_first_pass(VP10_COMP *cpi) {
+ output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
+}
+
+static vpx_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
+ switch (bsize) {
+ case BLOCK_8X8: return vpx_mse8x8;
+ case BLOCK_16X8: return vpx_mse16x8;
+ case BLOCK_8X16: return vpx_mse8x16;
+ default: return vpx_mse16x16;
+ }
+}
+
+static unsigned int get_prediction_error(BLOCK_SIZE bsize,
+ const struct buf_2d *src,
+ const struct buf_2d *ref) {
+ unsigned int sse;
+ const vpx_variance_fn_t fn = get_block_variance_fn(bsize);
+ fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
+ return sse;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static vpx_variance_fn_t highbd_get_block_variance_fn(BLOCK_SIZE bsize,
+ int bd) {
+ switch (bd) {
+ default:
+ switch (bsize) {
+ case BLOCK_8X8: return vpx_highbd_8_mse8x8;
+ case BLOCK_16X8: return vpx_highbd_8_mse16x8;
+ case BLOCK_8X16: return vpx_highbd_8_mse8x16;
+ default: return vpx_highbd_8_mse16x16;
+ }
+ break;
+ case 10:
+ switch (bsize) {
+ case BLOCK_8X8: return vpx_highbd_10_mse8x8;
+ case BLOCK_16X8: return vpx_highbd_10_mse16x8;
+ case BLOCK_8X16: return vpx_highbd_10_mse8x16;
+ default: return vpx_highbd_10_mse16x16;
+ }
+ break;
+ case 12:
+ switch (bsize) {
+ case BLOCK_8X8: return vpx_highbd_12_mse8x8;
+ case BLOCK_16X8: return vpx_highbd_12_mse16x8;
+ case BLOCK_8X16: return vpx_highbd_12_mse8x16;
+ default: return vpx_highbd_12_mse16x16;
+ }
+ break;
+ }
+}
+
+static unsigned int highbd_get_prediction_error(BLOCK_SIZE bsize,
+ const struct buf_2d *src,
+ const struct buf_2d *ref,
+ int bd) {
+ unsigned int sse;
+ const vpx_variance_fn_t fn = highbd_get_block_variance_fn(bsize, bd);
+ fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
+ return sse;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+// Refine the motion search range according to the frame dimension
+// for first pass test.
+static int get_search_range(const VP10_COMP *cpi) {
+ int sr = 0;
+ const int dim = VPXMIN(cpi->initial_width, cpi->initial_height);
+
+ while ((dim << sr) < MAX_FULL_PEL_VAL) ++sr;
+ return sr;
+}
+
+static void first_pass_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
+ const MV *ref_mv, MV *best_mv,
+ int *best_motion_err) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MV tmp_mv = { 0, 0 };
+ MV ref_mv_full = { ref_mv->row >> 3, ref_mv->col >> 3 };
+ int num00, tmp_err, n;
+ const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ vpx_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
+ const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY;
+
+ int step_param = 3;
+ int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
+ const int sr = get_search_range(cpi);
+ step_param += sr;
+ further_steps -= sr;
+
+ // Override the default variance function to use MSE.
+ v_fn_ptr.vf = get_block_variance_fn(bsize);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, xd->bd);
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // Center the initial step/diamond search on best mv.
+ tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
+ step_param, x->sadperbit16, &num00,
+ &v_fn_ptr, ref_mv);
+ if (tmp_err < INT_MAX)
+ tmp_err = vp10_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
+ if (tmp_err < INT_MAX - new_mv_mode_penalty) tmp_err += new_mv_mode_penalty;
+
+ if (tmp_err < *best_motion_err) {
+ *best_motion_err = tmp_err;
+ *best_mv = tmp_mv;
+ }
+
+ // Carry out further step/diamond searches as necessary.
+ n = num00;
+ num00 = 0;
+
+ while (n < further_steps) {
+ ++n;
+
+ if (num00) {
+ --num00;
+ } else {
+ tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
+ step_param + n, x->sadperbit16, &num00,
+ &v_fn_ptr, ref_mv);
+ if (tmp_err < INT_MAX)
+ tmp_err = vp10_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
+ if (tmp_err < INT_MAX - new_mv_mode_penalty)
+ tmp_err += new_mv_mode_penalty;
+
+ if (tmp_err < *best_motion_err) {
+ *best_motion_err = tmp_err;
+ *best_mv = tmp_mv;
+ }
+ }
+ }
+}
+
+static BLOCK_SIZE get_bsize(const VP10_COMMON *cm, int mb_row, int mb_col) {
+ if (2 * mb_col + 1 < cm->mi_cols) {
+ return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_16X16 : BLOCK_16X8;
+ } else {
+ return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_8X16 : BLOCK_8X8;
+ }
+}
+
+static int find_fp_qindex(vpx_bit_depth_t bit_depth) {
+ int i;
+
+ for (i = 0; i < QINDEX_RANGE; ++i)
+ if (vp10_convert_qindex_to_q(i, bit_depth) >= FIRST_PASS_Q) break;
+
+ if (i == QINDEX_RANGE) i--;
+
+ return i;
+}
+
+static void set_first_pass_params(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ if (!cpi->refresh_alt_ref_frame &&
+ (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY))) {
+ cm->frame_type = KEY_FRAME;
+ } else {
+ cm->frame_type = INTER_FRAME;
+ }
+ // Do not use periodic key frames.
+ cpi->rc.frames_to_key = INT_MAX;
+}
+
+#define UL_INTRA_THRESH 50
+#define INVALID_ROW -1
+void vp10_first_pass(VP10_COMP *cpi, const struct lookahead_entry *source) {
+ int mb_row, mb_col;
+ MACROBLOCK *const x = &cpi->td.mb;
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ TileInfo tile;
+ struct macroblock_plane *const p = x->plane;
+ struct macroblockd_plane *const pd = xd->plane;
+ const PICK_MODE_CONTEXT *ctx =
+ &cpi->td.pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2]->none;
+ int i;
+
+ int recon_yoffset, recon_uvoffset;
+ int64_t intra_error = 0;
+ int64_t coded_error = 0;
+ int64_t sr_coded_error = 0;
+
+ int sum_mvr = 0, sum_mvc = 0;
+ int sum_mvr_abs = 0, sum_mvc_abs = 0;
+ int64_t sum_mvrs = 0, sum_mvcs = 0;
+ int mvcount = 0;
+ int intercount = 0;
+ int second_ref_count = 0;
+ const int intrapenalty = INTRA_MODE_PENALTY;
+ double neutral_count;
+ int intra_skip_count = 0;
+ int image_data_start_row = INVALID_ROW;
+ int new_mv_count = 0;
+ int sum_in_vectors = 0;
+ MV lastmv = { 0, 0 };
+ TWO_PASS *twopass = &cpi->twopass;
+ const MV zero_mv = { 0, 0 };
+ int recon_y_stride, recon_uv_stride, uv_mb_height;
+
+ YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
+ YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
+ const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
+ double intra_factor;
+ double brightness_factor;
+ BufferPool *const pool = cm->buffer_pool;
+
+ // First pass code requires valid last and new frame buffers.
+ assert(new_yv12 != NULL);
+ assert(frame_is_intra_only(cm) || (lst_yv12 != NULL));
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ vp10_zero_array(cpi->twopass.frame_mb_stats_buf, cm->initial_mbs);
+ }
+#endif
+
+ vpx_clear_system_state();
+
+ intra_factor = 0.0;
+ brightness_factor = 0.0;
+ neutral_count = 0.0;
+
+ set_first_pass_params(cpi);
+ vp10_set_quantizer(cm, find_fp_qindex(cm->bit_depth));
+
+ vp10_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
+
+ vp10_setup_src_planes(x, cpi->Source, 0, 0);
+ vp10_setup_dst_planes(xd->plane, new_yv12, 0, 0);
+
+ if (!frame_is_intra_only(cm)) {
+ vp10_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL);
+ }
+
+ xd->mi = cm->mi_grid_visible;
+ xd->mi[0] = cm->mi;
+
+ vp10_frame_init_quantizer(cpi);
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ p[i].coeff = ctx->coeff[i][1];
+ p[i].qcoeff = ctx->qcoeff[i][1];
+ pd[i].dqcoeff = ctx->dqcoeff[i][1];
+ p[i].eobs = ctx->eobs[i][1];
+ }
+
+ vp10_init_mv_probs(cm);
+ vp10_initialize_rd_consts(cpi);
+
+ // Tiling is ignored in the first pass.
+ vp10_tile_init(&tile, cm, 0, 0);
+
+ recon_y_stride = new_yv12->y_stride;
+ recon_uv_stride = new_yv12->uv_stride;
+ uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height);
+
+ for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
+ MV best_ref_mv = { 0, 0 };
+
+ // Reset above block coeffs.
+ xd->up_available = (mb_row != 0);
+ recon_yoffset = (mb_row * recon_y_stride * 16);
+ recon_uvoffset = (mb_row * recon_uv_stride * uv_mb_height);
+
+ // Set up limit values for motion vectors to prevent them extending
+ // outside the UMV borders.
+ x->mv_row_min = -((mb_row * 16) + BORDER_MV_PIXELS_B16);
+ x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + BORDER_MV_PIXELS_B16;
+
+ for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
+ int this_error;
+ const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
+ const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col);
+ double log_intra;
+ int level_sample;
+
+#if CONFIG_FP_MB_STATS
+ const int mb_index = mb_row * cm->mb_cols + mb_col;
+#endif
+
+ vpx_clear_system_state();
+
+ xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
+ xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
+ xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset;
+ xd->left_available = (mb_col != 0);
+ xd->mi[0]->mbmi.sb_type = bsize;
+ xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
+ set_mi_row_col(xd, &tile, mb_row << 1, num_8x8_blocks_high_lookup[bsize],
+ mb_col << 1, num_8x8_blocks_wide_lookup[bsize],
+ cm->mi_rows, cm->mi_cols);
+
+ // Do intra 16x16 prediction.
+ xd->mi[0]->mbmi.segment_id = 0;
+#if CONFIG_SUPERTX
+ xd->mi[0]->mbmi.segment_id_supertx = 0;
+#endif // CONFIG_SUPERTX
+ xd->mi[0]->mbmi.mode = DC_PRED;
+ xd->mi[0]->mbmi.tx_size =
+ use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
+ vp10_encode_intra_block_plane(x, bsize, 0, 0);
+ this_error = vpx_get_mb_ss(x->plane[0].src_diff);
+
+ // Keep a record of blocks that have almost no intra error residual
+ // (i.e. are in effect completely flat and untextured in the intra
+ // domain). In natural videos this is uncommon, but it is much more
+ // common in animations, graphics and screen content, so may be used
+ // as a signal to detect these types of content.
+ if (this_error < UL_INTRA_THRESH) {
+ ++intra_skip_count;
+ } else if ((mb_col > 0) && (image_data_start_row == INVALID_ROW)) {
+ image_data_start_row = mb_row;
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ switch (cm->bit_depth) {
+ case VPX_BITS_8: break;
+ case VPX_BITS_10: this_error >>= 4; break;
+ case VPX_BITS_12: this_error >>= 8; break;
+ default:
+ assert(0 &&
+ "cm->bit_depth should be VPX_BITS_8, "
+ "VPX_BITS_10 or VPX_BITS_12");
+ return;
+ }
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ vpx_clear_system_state();
+ log_intra = log(this_error + 1.0);
+ if (log_intra < 10.0)
+ intra_factor += 1.0 + ((10.0 - log_intra) * 0.05);
+ else
+ intra_factor += 1.0;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth)
+ level_sample = CONVERT_TO_SHORTPTR(x->plane[0].src.buf)[0];
+ else
+ level_sample = x->plane[0].src.buf[0];
+#else
+ level_sample = x->plane[0].src.buf[0];
+#endif
+ if ((level_sample < DARK_THRESH) && (log_intra < 9.0))
+ brightness_factor += 1.0 + (0.01 * (DARK_THRESH - level_sample));
+ else
+ brightness_factor += 1.0;
+
+ // Intrapenalty below deals with situations where the intra and inter
+ // error scores are very low (e.g. a plain black frame).
+ // We do not have special cases in first pass for 0,0 and nearest etc so
+ // all inter modes carry an overhead cost estimate for the mv.
+ // When the error score is very low this causes us to pick all or lots of
+ // INTRA modes and throw lots of key frames.
+ // This penalty adds a cost matching that of a 0,0 mv to the intra case.
+ this_error += intrapenalty;
+
+ // Accumulate the intra error.
+ intra_error += (int64_t)this_error;
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ // initialization
+ cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
+ }
+#endif
+
+ // Set up limit values for motion vectors to prevent them extending
+ // outside the UMV borders.
+ x->mv_col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
+ x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16;
+
+ // Other than for the first frame do a motion search.
+ if (cm->current_video_frame > 0) {
+ int tmp_err, motion_error, raw_motion_error;
+ // Assume 0,0 motion with no mv overhead.
+ MV mv = { 0, 0 }, tmp_mv = { 0, 0 };
+ struct buf_2d unscaled_last_source_buf_2d;
+
+ xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ motion_error = highbd_get_prediction_error(
+ bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
+ } else {
+ motion_error = get_prediction_error(bsize, &x->plane[0].src,
+ &xd->plane[0].pre[0]);
+ }
+#else
+ motion_error =
+ get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // Compute the motion error of the 0,0 motion using the last source
+ // frame as the reference. Skip the further motion search on
+ // reconstructed frame if this error is small.
+ unscaled_last_source_buf_2d.buf =
+ cpi->unscaled_last_source->y_buffer + recon_yoffset;
+ unscaled_last_source_buf_2d.stride =
+ cpi->unscaled_last_source->y_stride;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ raw_motion_error = highbd_get_prediction_error(
+ bsize, &x->plane[0].src, &unscaled_last_source_buf_2d, xd->bd);
+ } else {
+ raw_motion_error = get_prediction_error(bsize, &x->plane[0].src,
+ &unscaled_last_source_buf_2d);
+ }
+#else
+ raw_motion_error = get_prediction_error(bsize, &x->plane[0].src,
+ &unscaled_last_source_buf_2d);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // TODO(pengchong): Replace the hard-coded threshold
+ if (raw_motion_error > 25) {
+ // Test last reference frame using the previous best mv as the
+ // starting point (best reference) for the search.
+ first_pass_motion_search(cpi, x, &best_ref_mv, &mv, &motion_error);
+
+ // If the current best reference mv is not centered on 0,0 then do a
+ // 0,0 based search as well.
+ if (!is_zero_mv(&best_ref_mv)) {
+ tmp_err = INT_MAX;
+ first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &tmp_err);
+
+ if (tmp_err < motion_error) {
+ motion_error = tmp_err;
+ mv = tmp_mv;
+ }
+ }
+
+ // Search in an older reference frame.
+ if ((cm->current_video_frame > 1) && gld_yv12 != NULL) {
+ // Assume 0,0 motion with no mv overhead.
+ int gf_motion_error;
+
+ xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ gf_motion_error = highbd_get_prediction_error(
+ bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
+ } else {
+ gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
+ &xd->plane[0].pre[0]);
+ }
+#else
+ gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
+ &xd->plane[0].pre[0]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv,
+ &gf_motion_error);
+
+ if (gf_motion_error < motion_error && gf_motion_error < this_error)
+ ++second_ref_count;
+
+ // Reset to last frame as reference buffer.
+ xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
+ xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
+ xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;
+
+ // In accumulating a score for the older reference frame take the
+ // best of the motion predicted score and the intra coded error
+ // (just as will be done for) accumulation of "coded_error" for
+ // the last frame.
+ if (gf_motion_error < this_error)
+ sr_coded_error += gf_motion_error;
+ else
+ sr_coded_error += this_error;
+ } else {
+ sr_coded_error += motion_error;
+ }
+ } else {
+ sr_coded_error += motion_error;
+ }
+
+ // Start by assuming that intra mode is best.
+ best_ref_mv.row = 0;
+ best_ref_mv.col = 0;
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ // intra predication statistics
+ cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
+ cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_DCINTRA_MASK;
+ cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK;
+ if (this_error > FPMB_ERROR_LARGE_TH) {
+ cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK;
+ } else if (this_error < FPMB_ERROR_SMALL_TH) {
+ cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK;
+ }
+ }
+#endif
+
+ if (motion_error <= this_error) {
+ vpx_clear_system_state();
+
+ // Keep a count of cases where the inter and intra were very close
+ // and very low. This helps with scene cut detection for example in
+ // cropped clips with black bars at the sides or top and bottom.
+ if (((this_error - intrapenalty) * 9 <= motion_error * 10) &&
+ (this_error < (2 * intrapenalty))) {
+ neutral_count += 1.0;
+ // Also track cases where the intra is not much worse than the inter
+ // and use this in limiting the GF/arf group length.
+ } else if ((this_error > NCOUNT_INTRA_THRESH) &&
+ (this_error < (NCOUNT_INTRA_FACTOR * motion_error))) {
+ neutral_count +=
+ (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error);
+ }
+
+ mv.row *= 8;
+ mv.col *= 8;
+ this_error = motion_error;
+ xd->mi[0]->mbmi.mode = NEWMV;
+ xd->mi[0]->mbmi.mv[0].as_mv = mv;
+ xd->mi[0]->mbmi.tx_size = TX_4X4;
+ xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME;
+ xd->mi[0]->mbmi.ref_frame[1] = NONE;
+ vp10_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize);
+ vp10_encode_sby_pass1(x, bsize);
+ sum_mvr += mv.row;
+ sum_mvr_abs += abs(mv.row);
+ sum_mvc += mv.col;
+ sum_mvc_abs += abs(mv.col);
+ sum_mvrs += mv.row * mv.row;
+ sum_mvcs += mv.col * mv.col;
+ ++intercount;
+
+ best_ref_mv = mv;
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ // inter predication statistics
+ cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
+ cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_DCINTRA_MASK;
+ cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK;
+ if (this_error > FPMB_ERROR_LARGE_TH) {
+ cpi->twopass.frame_mb_stats_buf[mb_index] |=
+ FPMB_ERROR_LARGE_MASK;
+ } else if (this_error < FPMB_ERROR_SMALL_TH) {
+ cpi->twopass.frame_mb_stats_buf[mb_index] |=
+ FPMB_ERROR_SMALL_MASK;
+ }
+ }
+#endif
+
+ if (!is_zero_mv(&mv)) {
+ ++mvcount;
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ cpi->twopass.frame_mb_stats_buf[mb_index] &=
+ ~FPMB_MOTION_ZERO_MASK;
+ // check estimated motion direction
+ if (mv.as_mv.col > 0 && mv.as_mv.col >= abs(mv.as_mv.row)) {
+ // right direction
+ cpi->twopass.frame_mb_stats_buf[mb_index] |=
+ FPMB_MOTION_RIGHT_MASK;
+ } else if (mv.as_mv.row < 0 &&
+ abs(mv.as_mv.row) >= abs(mv.as_mv.col)) {
+ // up direction
+ cpi->twopass.frame_mb_stats_buf[mb_index] |=
+ FPMB_MOTION_UP_MASK;
+ } else if (mv.as_mv.col < 0 &&
+ abs(mv.as_mv.col) >= abs(mv.as_mv.row)) {
+ // left direction
+ cpi->twopass.frame_mb_stats_buf[mb_index] |=
+ FPMB_MOTION_LEFT_MASK;
+ } else {
+ // down direction
+ cpi->twopass.frame_mb_stats_buf[mb_index] |=
+ FPMB_MOTION_DOWN_MASK;
+ }
+ }
+#endif
+
+ // Non-zero vector, was it different from the last non zero vector?
+ if (!is_equal_mv(&mv, &lastmv)) ++new_mv_count;
+ lastmv = mv;
+
+ // Does the row vector point inwards or outwards?
+ if (mb_row < cm->mb_rows / 2) {
+ if (mv.row > 0)
+ --sum_in_vectors;
+ else if (mv.row < 0)
+ ++sum_in_vectors;
+ } else if (mb_row > cm->mb_rows / 2) {
+ if (mv.row > 0)
+ ++sum_in_vectors;
+ else if (mv.row < 0)
+ --sum_in_vectors;
+ }
+
+ // Does the col vector point inwards or outwards?
+ if (mb_col < cm->mb_cols / 2) {
+ if (mv.col > 0)
+ --sum_in_vectors;
+ else if (mv.col < 0)
+ ++sum_in_vectors;
+ } else if (mb_col > cm->mb_cols / 2) {
+ if (mv.col > 0)
+ ++sum_in_vectors;
+ else if (mv.col < 0)
+ --sum_in_vectors;
+ }
+ }
+ }
+ } else {
+ sr_coded_error += (int64_t)this_error;
+ }
+ coded_error += (int64_t)this_error;
+
+ // Adjust to the next column of MBs.
+ x->plane[0].src.buf += 16;
+ x->plane[1].src.buf += uv_mb_height;
+ x->plane[2].src.buf += uv_mb_height;
+
+ recon_yoffset += 16;
+ recon_uvoffset += uv_mb_height;
+ }
+
+ // Adjust to the next row of MBs.
+ x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols;
+ x->plane[1].src.buf +=
+ uv_mb_height * x->plane[1].src.stride - uv_mb_height * cm->mb_cols;
+ x->plane[2].src.buf +=
+ uv_mb_height * x->plane[1].src.stride - uv_mb_height * cm->mb_cols;
+
+ vpx_clear_system_state();
+ }
+
+ // Clamp the image start to rows/2. This number of rows is discarded top
+ // and bottom as dead data so rows / 2 means the frame is blank.
+ if ((image_data_start_row > cm->mb_rows / 2) ||
+ (image_data_start_row == INVALID_ROW)) {
+ image_data_start_row = cm->mb_rows / 2;
+ }
+ // Exclude any image dead zone
+ if (image_data_start_row > 0) {
+ intra_skip_count =
+ VPXMAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2));
+ }
+
+ {
+ FIRSTPASS_STATS fps;
+ // The minimum error here insures some bit allocation to frames even
+ // in static regions. The allocation per MB declines for larger formats
+ // where the typical "real" energy per MB also falls.
+ // Initial estimate here uses sqrt(mbs) to define the min_err, where the
+ // number of mbs is proportional to the image area.
+ const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
+ ? cpi->initial_mbs
+ : cpi->common.MBs;
+ const double min_err = 200 * sqrt(num_mbs);
+
+ intra_factor = intra_factor / (double)num_mbs;
+ brightness_factor = brightness_factor / (double)num_mbs;
+ fps.weight = intra_factor * brightness_factor;
+
+ fps.frame = cm->current_video_frame;
+ fps.coded_error = (double)(coded_error >> 8) + min_err;
+ fps.sr_coded_error = (double)(sr_coded_error >> 8) + min_err;
+ fps.intra_error = (double)(intra_error >> 8) + min_err;
+ fps.count = 1.0;
+ fps.pcnt_inter = (double)intercount / num_mbs;
+ fps.pcnt_second_ref = (double)second_ref_count / num_mbs;
+ fps.pcnt_neutral = (double)neutral_count / num_mbs;
+ fps.intra_skip_pct = (double)intra_skip_count / num_mbs;
+ fps.inactive_zone_rows = (double)image_data_start_row;
+ fps.inactive_zone_cols = (double)0; // TODO(paulwilkins): fix
+
+ if (mvcount > 0) {
+ fps.MVr = (double)sum_mvr / mvcount;
+ fps.mvr_abs = (double)sum_mvr_abs / mvcount;
+ fps.MVc = (double)sum_mvc / mvcount;
+ fps.mvc_abs = (double)sum_mvc_abs / mvcount;
+ fps.MVrv =
+ ((double)sum_mvrs - ((double)sum_mvr * sum_mvr / mvcount)) / mvcount;
+ fps.MVcv =
+ ((double)sum_mvcs - ((double)sum_mvc * sum_mvc / mvcount)) / mvcount;
+ fps.mv_in_out_count = (double)sum_in_vectors / (mvcount * 2);
+ fps.new_mv_count = new_mv_count;
+ fps.pcnt_motion = (double)mvcount / num_mbs;
+ } else {
+ fps.MVr = 0.0;
+ fps.mvr_abs = 0.0;
+ fps.MVc = 0.0;
+ fps.mvc_abs = 0.0;
+ fps.MVrv = 0.0;
+ fps.MVcv = 0.0;
+ fps.mv_in_out_count = 0.0;
+ fps.new_mv_count = 0.0;
+ fps.pcnt_motion = 0.0;
+ }
+
+ // TODO(paulwilkins): Handle the case when duration is set to 0, or
+ // something less than the full time between subsequent values of
+ // cpi->source_time_stamp.
+ fps.duration = (double)(source->ts_end - source->ts_start);
+
+ // Don't want to do output stats with a stack variable!
+ twopass->this_frame_stats = fps;
+ output_stats(&twopass->this_frame_stats, cpi->output_pkt_list);
+ accumulate_stats(&twopass->total_stats, &fps);
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ output_fpmb_stats(twopass->frame_mb_stats_buf, cm, cpi->output_pkt_list);
+ }
+#endif
+ }
+
+ // Copy the previous Last Frame back into gf and and arf buffers if
+ // the prediction is good enough... but also don't allow it to lag too far.
+ if ((twopass->sr_update_lag > 3) ||
+ ((cm->current_video_frame > 0) &&
+ (twopass->this_frame_stats.pcnt_inter > 0.20) &&
+ ((twopass->this_frame_stats.intra_error /
+ DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) {
+ if (gld_yv12 != NULL) {
+#if CONFIG_EXT_REFS
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
+ cm->ref_frame_map[cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]]);
+#else
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
+ cm->ref_frame_map[cpi->lst_fb_idx]);
+#endif // CONFIG_EXT_REFS
+ }
+ twopass->sr_update_lag = 1;
+ } else {
+ ++twopass->sr_update_lag;
+ }
+
+ vpx_extend_frame_borders(new_yv12);
+
+// The frame we just compressed now becomes the last frame.
+#if CONFIG_EXT_REFS
+ ref_cnt_fb(pool->frame_bufs,
+ &cm->ref_frame_map[cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]],
+ cm->new_fb_idx);
+#else
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
+ cm->new_fb_idx);
+#endif // CONFIG_EXT_REFS
+
+ // Special case for the first frame. Copy into the GF buffer as a second
+ // reference.
+ if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX) {
+#if CONFIG_EXT_REFS
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
+ cm->ref_frame_map[cpi->lst_fb_idxes[LAST_FRAME - LAST_FRAME]]);
+#else
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
+ cm->ref_frame_map[cpi->lst_fb_idx]);
+#endif // CONFIG_EXT_REFS
+ }
+
+ // Use this to see what the first pass reconstruction looks like.
+ if (0) {
+ char filename[512];
+ FILE *recon_file;
+ snprintf(filename, sizeof(filename), "enc%04d.yuv",
+ (int)cm->current_video_frame);
+
+ if (cm->current_video_frame == 0)
+ recon_file = fopen(filename, "wb");
+ else
+ recon_file = fopen(filename, "ab");
+
+ (void)fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file);
+ fclose(recon_file);
+ }
+
+ ++cm->current_video_frame;
+}
+
+static double calc_correction_factor(double err_per_mb, double err_divisor,
+ double pt_low, double pt_high, int q,
+ vpx_bit_depth_t bit_depth) {
+ const double error_term = err_per_mb / err_divisor;
+
+ // Adjustment based on actual quantizer to power term.
+ const double power_term =
+ VPXMIN(vp10_convert_qindex_to_q(q, bit_depth) * 0.01 + pt_low, pt_high);
+
+ // Calculate correction factor.
+ if (power_term < 1.0) assert(error_term >= 0.0);
+
+ return fclamp(pow(error_term, power_term), 0.05, 5.0);
+}
+
+#define ERR_DIVISOR 100.0
+static int get_twopass_worst_quality(const VP10_COMP *cpi,
+ const double section_err,
+ double inactive_zone,
+ int section_target_bandwidth,
+ double group_weight_factor) {
+ const RATE_CONTROL *const rc = &cpi->rc;
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+
+ inactive_zone = fclamp(inactive_zone, 0.0, 1.0);
+
+ if (section_target_bandwidth <= 0) {
+ return rc->worst_quality; // Highest value allowed
+ } else {
+ const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
+ ? cpi->initial_mbs
+ : cpi->common.MBs;
+ const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
+ const double av_err_per_mb = section_err / active_mbs;
+ const double speed_term = 1.0 + 0.04 * oxcf->speed;
+ double ediv_size_correction;
+ const int target_norm_bits_per_mb =
+ ((uint64_t)section_target_bandwidth << BPER_MB_NORMBITS) / active_mbs;
+ int q;
+
+ // Larger image formats are expected to be a little harder to code
+ // relatively given the same prediction error score. This in part at
+ // least relates to the increased size and hence coding overheads of
+ // motion vectors. Some account of this is made through adjustment of
+ // the error divisor.
+ ediv_size_correction =
+ VPXMAX(0.2, VPXMIN(5.0, get_linear_size_factor(cpi)));
+ if (ediv_size_correction < 1.0)
+ ediv_size_correction = -(1.0 / ediv_size_correction);
+ ediv_size_correction *= 4.0;
+
+ // Try and pick a max Q that will be high enough to encode the
+ // content at the given rate.
+ for (q = rc->best_quality; q < rc->worst_quality; ++q) {
+ const double factor = calc_correction_factor(
+ av_err_per_mb, ERR_DIVISOR - ediv_size_correction, FACTOR_PT_LOW,
+ FACTOR_PT_HIGH, q, cpi->common.bit_depth);
+ const int bits_per_mb = vp10_rc_bits_per_mb(
+ INTER_FRAME, q, factor * speed_term * group_weight_factor,
+ cpi->common.bit_depth);
+ if (bits_per_mb <= target_norm_bits_per_mb) break;
+ }
+
+ // Restriction on active max q for constrained quality mode.
+ if (cpi->oxcf.rc_mode == VPX_CQ) q = VPXMAX(q, oxcf->cq_level);
+ return q;
+ }
+}
+
+static void setup_rf_level_maxq(VP10_COMP *cpi) {
+ int i;
+ RATE_CONTROL *const rc = &cpi->rc;
+ for (i = INTER_NORMAL; i < RATE_FACTOR_LEVELS; ++i) {
+ int qdelta = vp10_frame_type_qdelta(cpi, i, rc->worst_quality);
+ rc->rf_level_maxq[i] = VPXMAX(rc->worst_quality + qdelta, rc->best_quality);
+ }
+}
+
+void vp10_init_subsampling(VP10_COMP *cpi) {
+ const VP10_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ const int w = cm->width;
+ const int h = cm->height;
+ int i;
+
+ for (i = 0; i < FRAME_SCALE_STEPS; ++i) {
+ // Note: Frames with odd-sized dimensions may result from this scaling.
+ rc->frame_width[i] = (w * 16) / frame_scale_factor[i];
+ rc->frame_height[i] = (h * 16) / frame_scale_factor[i];
+ }
+
+ setup_rf_level_maxq(cpi);
+}
+
+void vp10_calculate_coded_size(VP10_COMP *cpi, int *scaled_frame_width,
+ int *scaled_frame_height) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ *scaled_frame_width = rc->frame_width[rc->frame_size_selector];
+ *scaled_frame_height = rc->frame_height[rc->frame_size_selector];
+}
+
+void vp10_init_second_pass(VP10_COMP *cpi) {
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ TWO_PASS *const twopass = &cpi->twopass;
+ double frame_rate;
+ FIRSTPASS_STATS *stats;
+
+ zero_stats(&twopass->total_stats);
+ zero_stats(&twopass->total_left_stats);
+
+ if (!twopass->stats_in_end) return;
+
+ stats = &twopass->total_stats;
+
+ *stats = *twopass->stats_in_end;
+ twopass->total_left_stats = *stats;
+
+ frame_rate = 10000000.0 * stats->count / stats->duration;
+ // Each frame can have a different duration, as the frame rate in the source
+ // isn't guaranteed to be constant. The frame rate prior to the first frame
+ // encoded in the second pass is a guess. However, the sum duration is not.
+ // It is calculated based on the actual durations of all frames from the
+ // first pass.
+ vp10_new_framerate(cpi, frame_rate);
+ twopass->bits_left =
+ (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0);
+
+ // This variable monitors how far behind the second ref update is lagging.
+ twopass->sr_update_lag = 1;
+
+ // Scan the first pass file and calculate a modified total error based upon
+ // the bias/power function used to allocate bits.
+ {
+ const double avg_error =
+ stats->coded_error / DOUBLE_DIVIDE_CHECK(stats->count);
+ const FIRSTPASS_STATS *s = twopass->stats_in;
+ double modified_error_total = 0.0;
+ twopass->modified_error_min =
+ (avg_error * oxcf->two_pass_vbrmin_section) / 100;
+ twopass->modified_error_max =
+ (avg_error * oxcf->two_pass_vbrmax_section) / 100;
+ while (s < twopass->stats_in_end) {
+ modified_error_total += calculate_modified_err(cpi, twopass, oxcf, s);
+ ++s;
+ }
+ twopass->modified_error_left = modified_error_total;
+ }
+
+ // Reset the vbr bits off target counters
+ cpi->rc.vbr_bits_off_target = 0;
+ cpi->rc.vbr_bits_off_target_fast = 0;
+
+ cpi->rc.rate_error_estimate = 0;
+
+ // Static sequence monitor variables.
+ twopass->kf_zeromotion_pct = 100;
+ twopass->last_kfgroup_zeromotion_pct = 100;
+
+ if (oxcf->resize_mode != RESIZE_NONE) {
+ vp10_init_subsampling(cpi);
+ }
+}
+
+#define SR_DIFF_PART 0.0015
+#define MOTION_AMP_PART 0.003
+#define INTRA_PART 0.005
+#define DEFAULT_DECAY_LIMIT 0.75
+#define LOW_SR_DIFF_TRHESH 0.1
+#define SR_DIFF_MAX 128.0
+
+static double get_sr_decay_rate(const VP10_COMP *cpi,
+ const FIRSTPASS_STATS *frame) {
+ const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
+ : cpi->common.MBs;
+ double sr_diff = (frame->sr_coded_error - frame->coded_error) / num_mbs;
+ double sr_decay = 1.0;
+ double modified_pct_inter;
+ double modified_pcnt_intra;
+ const double motion_amplitude_factor =
+ frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2);
+
+ modified_pct_inter = frame->pcnt_inter;
+ if ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) <
+ (double)NCOUNT_FRAME_II_THRESH) {
+ modified_pct_inter = frame->pcnt_inter - frame->pcnt_neutral;
+ }
+ modified_pcnt_intra = 100 * (1.0 - modified_pct_inter);
+
+ if ((sr_diff > LOW_SR_DIFF_TRHESH)) {
+ sr_diff = VPXMIN(sr_diff, SR_DIFF_MAX);
+ sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) -
+ (MOTION_AMP_PART * motion_amplitude_factor) -
+ (INTRA_PART * modified_pcnt_intra);
+ }
+ return VPXMAX(sr_decay, VPXMIN(DEFAULT_DECAY_LIMIT, modified_pct_inter));
+}
+
+// This function gives an estimate of how badly we believe the prediction
+// quality is decaying from frame to frame.
+static double get_zero_motion_factor(const VP10_COMP *cpi,
+ const FIRSTPASS_STATS *frame) {
+ const double zero_motion_pct = frame->pcnt_inter - frame->pcnt_motion;
+ double sr_decay = get_sr_decay_rate(cpi, frame);
+ return VPXMIN(sr_decay, zero_motion_pct);
+}
+
+#define ZM_POWER_FACTOR 0.75
+
+static double get_prediction_decay_rate(const VP10_COMP *cpi,
+ const FIRSTPASS_STATS *next_frame) {
+ const double sr_decay_rate = get_sr_decay_rate(cpi, next_frame);
+ const double zero_motion_factor =
+ (0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion),
+ ZM_POWER_FACTOR));
+
+ return VPXMAX(zero_motion_factor,
+ (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor)));
+}
+
+// Function to test for a condition where a complex transition is followed
+// by a static section. For example in slide shows where there is a fade
+// between slides. This is to help with more optimal kf and gf positioning.
+static int detect_transition_to_still(VP10_COMP *cpi, int frame_interval,
+ int still_interval,
+ double loop_decay_rate,
+ double last_decay_rate) {
+ TWO_PASS *const twopass = &cpi->twopass;
+ RATE_CONTROL *const rc = &cpi->rc;
+
+ // Break clause to detect very still sections after motion
+ // For example a static image after a fade or other transition
+ // instead of a clean scene cut.
+ if (frame_interval > rc->min_gf_interval && loop_decay_rate >= 0.999 &&
+ last_decay_rate < 0.9) {
+ int j;
+
+ // Look ahead a few frames to see if static condition persists...
+ for (j = 0; j < still_interval; ++j) {
+ const FIRSTPASS_STATS *stats = &twopass->stats_in[j];
+ if (stats >= twopass->stats_in_end) break;
+
+ if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break;
+ }
+
+ // Only if it does do we signal a transition to still.
+ return j == still_interval;
+ }
+
+ return 0;
+}
+
+// This function detects a flash through the high relative pcnt_second_ref
+// score in the frame following a flash frame. The offset passed in should
+// reflect this.
+static int detect_flash(const TWO_PASS *twopass, int offset) {
+ const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset);
+
+ // What we are looking for here is a situation where there is a
+ // brief break in prediction (such as a flash) but subsequent frames
+ // are reasonably well predicted by an earlier (pre flash) frame.
+ // The recovery after a flash is indicated by a high pcnt_second_ref
+ // compared to pcnt_inter.
+ return next_frame != NULL &&
+ next_frame->pcnt_second_ref > next_frame->pcnt_inter &&
+ next_frame->pcnt_second_ref >= 0.5;
+}
+
+// Update the motion related elements to the GF arf boost calculation.
+static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats,
+ double *mv_in_out,
+ double *mv_in_out_accumulator,
+ double *abs_mv_in_out_accumulator,
+ double *mv_ratio_accumulator) {
+ const double pct = stats->pcnt_motion;
+
+ // Accumulate Motion In/Out of frame stats.
+ *mv_in_out = stats->mv_in_out_count * pct;
+ *mv_in_out_accumulator += *mv_in_out;
+ *abs_mv_in_out_accumulator += fabs(*mv_in_out);
+
+ // Accumulate a measure of how uniform (or conversely how random) the motion
+ // field is (a ratio of abs(mv) / mv).
+ if (pct > 0.05) {
+ const double mvr_ratio =
+ fabs(stats->mvr_abs) / DOUBLE_DIVIDE_CHECK(fabs(stats->MVr));
+ const double mvc_ratio =
+ fabs(stats->mvc_abs) / DOUBLE_DIVIDE_CHECK(fabs(stats->MVc));
+
+ *mv_ratio_accumulator +=
+ pct * (mvr_ratio < stats->mvr_abs ? mvr_ratio : stats->mvr_abs);
+ *mv_ratio_accumulator +=
+ pct * (mvc_ratio < stats->mvc_abs ? mvc_ratio : stats->mvc_abs);
+ }
+}
+
+#define BASELINE_ERR_PER_MB 1000.0
+static double calc_frame_boost(VP10_COMP *cpi,
+ const FIRSTPASS_STATS *this_frame,
+ double this_frame_mv_in_out, double max_boost) {
+ double frame_boost;
+ const double lq = vp10_convert_qindex_to_q(
+ cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth);
+ const double boost_q_correction = VPXMIN((0.5 + (lq * 0.015)), 1.5);
+ int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
+ : cpi->common.MBs;
+
+ // Correct for any inactive region in the image
+ num_mbs = (int)VPXMAX(1, num_mbs * calculate_active_area(cpi, this_frame));
+
+ // Underlying boost factor is based on inter error ratio.
+ frame_boost = (BASELINE_ERR_PER_MB * num_mbs) /
+ DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
+ frame_boost = frame_boost * BOOST_FACTOR * boost_q_correction;
+
+ // Increase boost for frames where new data coming into frame (e.g. zoom out).
+ // Slightly reduce boost if there is a net balance of motion out of the frame
+ // (zoom in). The range for this_frame_mv_in_out is -1.0 to +1.0.
+ if (this_frame_mv_in_out > 0.0)
+ frame_boost += frame_boost * (this_frame_mv_in_out * 2.0);
+ // In the extreme case the boost is halved.
+ else
+ frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
+
+ return VPXMIN(frame_boost, max_boost * boost_q_correction);
+}
+
+static int calc_arf_boost(VP10_COMP *cpi, int offset, int f_frames,
+ int b_frames, int *f_boost, int *b_boost) {
+ TWO_PASS *const twopass = &cpi->twopass;
+ int i;
+ double boost_score = 0.0;
+ double mv_ratio_accumulator = 0.0;
+ double decay_accumulator = 1.0;
+ double this_frame_mv_in_out = 0.0;
+ double mv_in_out_accumulator = 0.0;
+ double abs_mv_in_out_accumulator = 0.0;
+ int arf_boost;
+ int flash_detected = 0;
+
+ // Search forward from the proposed arf/next gf position.
+ for (i = 0; i < f_frames; ++i) {
+ const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i + offset);
+ if (this_frame == NULL) break;
+
+ // Update the motion related elements to the boost calculation.
+ accumulate_frame_motion_stats(
+ this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
+ &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
+
+ // We want to discount the flash frame itself and the recovery
+ // frame that follows as both will have poor scores.
+ flash_detected = detect_flash(twopass, i + offset) ||
+ detect_flash(twopass, i + offset + 1);
+
+ // Accumulate the effect of prediction quality decay.
+ if (!flash_detected) {
+ decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
+ decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
+ ? MIN_DECAY_FACTOR
+ : decay_accumulator;
+ }
+
+ boost_score +=
+ decay_accumulator *
+ calc_frame_boost(cpi, this_frame, this_frame_mv_in_out, GF_MAX_BOOST);
+ }
+
+ *f_boost = (int)boost_score;
+
+ // Reset for backward looking loop.
+ boost_score = 0.0;
+ mv_ratio_accumulator = 0.0;
+ decay_accumulator = 1.0;
+ this_frame_mv_in_out = 0.0;
+ mv_in_out_accumulator = 0.0;
+ abs_mv_in_out_accumulator = 0.0;
+
+ // Search backward towards last gf position.
+ for (i = -1; i >= -b_frames; --i) {
+ const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i + offset);
+ if (this_frame == NULL) break;
+
+ // Update the motion related elements to the boost calculation.
+ accumulate_frame_motion_stats(
+ this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
+ &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
+
+ // We want to discount the the flash frame itself and the recovery
+ // frame that follows as both will have poor scores.
+ flash_detected = detect_flash(twopass, i + offset) ||
+ detect_flash(twopass, i + offset + 1);
+
+ // Cumulative effect of prediction quality decay.
+ if (!flash_detected) {
+ decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
+ decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
+ ? MIN_DECAY_FACTOR
+ : decay_accumulator;
+ }
+
+ boost_score +=
+ decay_accumulator *
+ calc_frame_boost(cpi, this_frame, this_frame_mv_in_out, GF_MAX_BOOST);
+ }
+ *b_boost = (int)boost_score;
+
+ arf_boost = (*f_boost + *b_boost);
+ if (arf_boost < ((b_frames + f_frames) * 20))
+ arf_boost = ((b_frames + f_frames) * 20);
+ arf_boost = VPXMAX(arf_boost, MIN_ARF_GF_BOOST);
+
+ return arf_boost;
+}
+
+// Calculate a section intra ratio used in setting max loop filter.
+static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin,
+ const FIRSTPASS_STATS *end,
+ int section_length) {
+ const FIRSTPASS_STATS *s = begin;
+ double intra_error = 0.0;
+ double coded_error = 0.0;
+ int i = 0;
+
+ while (s < end && i < section_length) {
+ intra_error += s->intra_error;
+ coded_error += s->coded_error;
+ ++s;
+ ++i;
+ }
+
+ return (int)(intra_error / DOUBLE_DIVIDE_CHECK(coded_error));
+}
+
+// Calculate the total bits to allocate in this GF/ARF group.
+static int64_t calculate_total_gf_group_bits(VP10_COMP *cpi,
+ double gf_group_err) {
+ const RATE_CONTROL *const rc = &cpi->rc;
+ const TWO_PASS *const twopass = &cpi->twopass;
+ const int max_bits = frame_max_bits(rc, &cpi->oxcf);
+ int64_t total_group_bits;
+
+ // Calculate the bits to be allocated to the group as a whole.
+ if ((twopass->kf_group_bits > 0) && (twopass->kf_group_error_left > 0)) {
+ total_group_bits = (int64_t)(twopass->kf_group_bits *
+ (gf_group_err / twopass->kf_group_error_left));
+ } else {
+ total_group_bits = 0;
+ }
+
+ // Clamp odd edge cases.
+ total_group_bits =
+ (total_group_bits < 0) ? 0 : (total_group_bits > twopass->kf_group_bits)
+ ? twopass->kf_group_bits
+ : total_group_bits;
+
+ // Clip based on user supplied data rate variability limit.
+ if (total_group_bits > (int64_t)max_bits * rc->baseline_gf_interval)
+ total_group_bits = (int64_t)max_bits * rc->baseline_gf_interval;
+
+ return total_group_bits;
+}
+
+// Calculate the number bits extra to assign to boosted frames in a group.
+static int calculate_boost_bits(int frame_count, int boost,
+ int64_t total_group_bits) {
+ int allocation_chunks;
+
+ // return 0 for invalid inputs (could arise e.g. through rounding errors)
+ if (!boost || (total_group_bits <= 0) || (frame_count <= 0)) return 0;
+
+ allocation_chunks = (frame_count * 100) + boost;
+
+ // Prevent overflow.
+ if (boost > 1023) {
+ int divisor = boost >> 10;
+ boost /= divisor;
+ allocation_chunks /= divisor;
+ }
+
+ // Calculate the number of extra bits for use in the boosted frame or frames.
+ return VPXMAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks),
+ 0);
+}
+
+#if !CONFIG_EXT_REFS
+// Current limit on maximum number of active arfs in a GF/ARF group.
+#define MAX_ACTIVE_ARFS 2
+#define ARF_SLOT1 2
+#define ARF_SLOT2 3
+// This function indirects the choice of buffers for arfs.
+// At the moment the values are fixed but this may change as part of
+// the integration process with other codec features that swap buffers around.
+static void get_arf_buffer_indices(unsigned char *arf_buffer_indices) {
+ arf_buffer_indices[0] = ARF_SLOT1;
+ arf_buffer_indices[1] = ARF_SLOT2;
+}
+#endif
+
+static void allocate_gf_group_bits(VP10_COMP *cpi, int64_t gf_group_bits,
+ double group_error, int gf_arf_bits) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ TWO_PASS *const twopass = &cpi->twopass;
+ GF_GROUP *const gf_group = &twopass->gf_group;
+ FIRSTPASS_STATS frame_stats;
+ int i;
+ int frame_index = 0;
+ int target_frame_size;
+ int key_frame;
+ const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
+ int64_t total_group_bits = gf_group_bits;
+ double modified_err = 0.0;
+ double err_fraction;
+ int mid_boost_bits = 0;
+#if !CONFIG_EXT_REFS
+ int mid_frame_idx;
+ unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS];
+#endif
+#if CONFIG_EXT_REFS
+ // The use of bi-predictive frames are only enabled when following 3
+ // conditions are met:
+ // (1) Alt-ref is enabled;
+ // (2) The bi-predictive group interval is at least 2; and
+ // (3) The bi-predictive group interval is strictly smaller than the
+ // golden group interval.
+ const int is_bipred_enabled =
+ rc->source_alt_ref_pending && rc->bipred_group_interval &&
+ rc->bipred_group_interval <=
+ (rc->baseline_gf_interval - rc->source_alt_ref_pending);
+ int bipred_group_end = 0;
+ int bipred_frame_index = 0;
+ int arf_pos[MAX_EXT_ARFS + 1];
+ const unsigned char ext_arf_interval =
+ (unsigned char)(rc->baseline_gf_interval / (cpi->num_extra_arfs + 1) - 1);
+ int which_arf = cpi->num_extra_arfs;
+ int subgroup_interval[MAX_EXT_ARFS + 1];
+ int ext_arf_boost[MAX_EXT_ARFS];
+ int is_sg_bipred_enabled = is_bipred_enabled;
+ int accumulative_subgroup_interval = 0;
+#endif // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+ vp10_zero_array(ext_arf_boost, MAX_EXT_ARFS);
+#endif
+
+ key_frame = cpi->common.frame_type == KEY_FRAME;
+
+#if !CONFIG_EXT_REFS
+ get_arf_buffer_indices(arf_buffer_indices);
+#endif
+
+ // For key frames the frame target rate is already set and it
+ // is also the golden frame.
+ if (!key_frame) {
+ if (rc->source_alt_ref_active) {
+ gf_group->update_type[frame_index] = OVERLAY_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+ gf_group->bit_allocation[frame_index] = 0;
+ } else {
+ gf_group->update_type[frame_index] = GF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
+ gf_group->bit_allocation[frame_index] = gf_arf_bits;
+ }
+#if CONFIG_EXT_REFS
+ gf_group->arf_update_idx[frame_index] = 0;
+ gf_group->arf_ref_idx[frame_index] = 0;
+#else
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
+ gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
+#endif
+ // Step over the golden frame / overlay frame
+ if (EOF == input_stats(twopass, &frame_stats)) return;
+ }
+
+#if CONFIG_EXT_REFS
+ gf_group->bidir_pred_enabled[frame_index] = 0;
+ gf_group->brf_src_offset[frame_index] = 0;
+#endif // CONFIG_EXT_REFS
+
+ // Deduct the boost bits for arf (or gf if it is not a key frame)
+ // from the group total.
+ if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits;
+
+ frame_index++;
+
+#if CONFIG_EXT_REFS
+ bipred_frame_index++;
+#endif // CONFIG_EXT_REFS
+
+ // Store the bits to spend on the ARF if there is one.
+ if (rc->source_alt_ref_pending) {
+ gf_group->update_type[frame_index] = ARF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
+ gf_group->bit_allocation[frame_index] = gf_arf_bits;
+
+ gf_group->arf_src_offset[frame_index] =
+ (unsigned char)(rc->baseline_gf_interval - 1);
+
+#if CONFIG_EXT_REFS
+ gf_group->arf_update_idx[frame_index] = 0;
+ gf_group->arf_ref_idx[frame_index] = 0;
+#else
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
+ gf_group->arf_ref_idx[frame_index] =
+ arf_buffer_indices[cpi->multi_arf_last_grp_enabled &&
+ rc->source_alt_ref_active];
+#endif // CONFIG_EXT_REFS && CONFIG_EXT_ARFS
+#if CONFIG_EXT_REFS
+ gf_group->bidir_pred_enabled[frame_index] = 0;
+ gf_group->brf_src_offset[frame_index] = 0;
+// NOTE: "bidir_pred_frame_index" stays unchanged for ARF_UPDATE frames.
+#endif // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+ // Work out the ARFs' positions in this gf group
+ // NOTE(weitinglin): ALT_REFs' are indexed inversely, but coded in display
+ // order (except for the original ARF). In the example of three ALT_REF's,
+ // We index ALTREF's as: KEY ----- ALT2 ----- ALT1 ----- ALT0
+ // but code them in the following order:
+ // KEY-ALT0-ALT2 ----- OVERLAY2-ALT1 ----- OVERLAY1 ----- OVERLAY0
+ arf_pos[0] =
+ frame_index + cpi->num_extra_arfs + gf_group->arf_src_offset[1] + 1;
+ for (i = 0; i < cpi->num_extra_arfs; ++i) {
+ arf_pos[i + 1] =
+ frame_index + (cpi->num_extra_arfs - i) * (ext_arf_interval + 2);
+ subgroup_interval[i] = arf_pos[i] - arf_pos[i + 1] - (i == 0 ? 1 : 2);
+ }
+ subgroup_interval[cpi->num_extra_arfs] = arf_pos[cpi->num_extra_arfs] -
+ frame_index -
+ (cpi->num_extra_arfs == 0 ? 1 : 2);
+#endif // CONFIG_EXT_REFS
+
+ ++frame_index;
+
+#if CONFIG_EXT_REFS
+ // Insert an extra ARF
+ if (cpi->num_extra_arfs) {
+ gf_group->update_type[frame_index] = ARF_UPDATE;
+ // Note (weitinglin): GF_ARF_LOW is also used as an identifier
+ // for internal ALT_REF's:
+ gf_group->rf_level[frame_index] = GF_ARF_LOW;
+ gf_group->arf_src_offset[frame_index] = ext_arf_interval;
+ gf_group->arf_update_idx[frame_index] = which_arf;
+ gf_group->arf_ref_idx[frame_index] = 0;
+ ++frame_index;
+ }
+ accumulative_subgroup_interval += subgroup_interval[cpi->num_extra_arfs];
+#else
+ if (cpi->multi_arf_enabled) {
+ // Set aside a slot for a level 1 arf.
+ gf_group->update_type[frame_index] = ARF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_LOW;
+ gf_group->arf_src_offset[frame_index] =
+ (unsigned char)((rc->baseline_gf_interval >> 1) - 1);
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[1];
+ gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
+ ++frame_index;
+ }
+#endif // CONFIG_EXT_ARFS
+ }
+
+#if !CONFIG_EXT_REFS
+ // Define middle frame
+ mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
+#endif
+
+ // Allocate bits to the other frames in the group.
+ for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) {
+#if !CONFIG_EXT_REFS
+ int arf_idx = 0;
+#endif
+ if (EOF == input_stats(twopass, &frame_stats)) break;
+
+ modified_err = calculate_modified_err(cpi, twopass, oxcf, &frame_stats);
+
+ if (group_error > 0)
+ err_fraction = modified_err / DOUBLE_DIVIDE_CHECK(group_error);
+ else
+ err_fraction = 0.0;
+
+ target_frame_size = (int)((double)total_group_bits * err_fraction);
+
+ if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) {
+ mid_boost_bits += (target_frame_size >> 4);
+ target_frame_size -= (target_frame_size >> 4);
+#if !CONFIG_EXT_REFS
+ if (frame_index <= mid_frame_idx) arf_idx = 1;
+#endif
+ }
+#if CONFIG_EXT_REFS
+ gf_group->arf_update_idx[frame_index] = which_arf;
+ gf_group->arf_ref_idx[frame_index] = which_arf;
+#else
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[arf_idx];
+ gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
+#endif // CONFIG_EXT_REFS
+ target_frame_size =
+ clamp(target_frame_size, 0, VPXMIN(max_bits, (int)total_group_bits));
+
+#if CONFIG_EXT_REFS
+ // If we are going to have ARFs, check if we can have BWDREF in this
+ // subgroup.
+ if (rc->source_alt_ref_pending) {
+ is_sg_bipred_enabled =
+ is_bipred_enabled &&
+ (subgroup_interval[which_arf] > rc->bipred_group_interval);
+ }
+ // NOTE: BIDIR_PRED is only enabled when the length of the bi-predictive
+ // frame group interval is strictly smaller than that of the GOLDEN
+ // FRAME group interval.
+ // TODO(zoeliu): Currently BIDIR_PRED is only enabled when alt-ref is on.
+ if (is_sg_bipred_enabled && !bipred_group_end) {
+ const int cur_brf_src_offset = rc->bipred_group_interval - 1;
+
+ // --- BRF_UPDATE ---
+ if (bipred_frame_index == 1) {
+ gf_group->update_type[frame_index] = BRF_UPDATE;
+ gf_group->bidir_pred_enabled[frame_index] = 1;
+ gf_group->brf_src_offset[frame_index] = cur_brf_src_offset;
+ // --- LAST_BIPRED_UPDATE ---
+ } else if (bipred_frame_index == rc->bipred_group_interval) {
+ gf_group->update_type[frame_index] = LAST_BIPRED_UPDATE;
+ gf_group->bidir_pred_enabled[frame_index] = 1;
+ gf_group->brf_src_offset[frame_index] = 0;
+ // Reset the bi-predictive frame index.
+ bipred_frame_index = 0;
+ // --- BIPRED_UPDATE ---
+ } else {
+ gf_group->update_type[frame_index] = BIPRED_UPDATE;
+ gf_group->bidir_pred_enabled[frame_index] = 1;
+ gf_group->brf_src_offset[frame_index] = 0;
+ }
+
+ bipred_frame_index++;
+ // Check whether the next bi-predictive frame group would entirely be
+ // included within the current golden frame group.
+ // In addition, we need to avoid coding a BRF right before an ARF.
+ if (bipred_frame_index == 1 &&
+ (i + 2 + cur_brf_src_offset) >= accumulative_subgroup_interval) {
+ bipred_group_end = 1;
+ }
+ } else {
+#endif // CONFIG_EXT_REFS
+ gf_group->update_type[frame_index] = LF_UPDATE;
+#if CONFIG_EXT_REFS
+ gf_group->bidir_pred_enabled[frame_index] = 0;
+ gf_group->brf_src_offset[frame_index] = 0;
+ }
+#endif // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+ if (gf_group->update_type[frame_index] == BRF_UPDATE) {
+ // Boost up the allocated bits on BWDREF_FRAME
+ gf_group->rf_level[frame_index] = INTER_HIGH;
+ gf_group->bit_allocation[frame_index] =
+ target_frame_size + (target_frame_size >> 2);
+ } else if (gf_group->update_type[frame_index] == LAST_BIPRED_UPDATE) {
+ // Press down the allocated bits on LAST_BIPRED_UPDATE frames
+ gf_group->rf_level[frame_index] = INTER_LOW;
+ gf_group->bit_allocation[frame_index] =
+ target_frame_size - (target_frame_size >> 1);
+ } else if (gf_group->update_type[frame_index] == BIPRED_UPDATE) {
+ // TODO(zoeliu): To investigate whether the allocated bits on
+ // BIPRED_UPDATE frames need to be further adjusted.
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+ gf_group->bit_allocation[frame_index] = target_frame_size;
+ } else {
+#endif // CONFIG_EXT_REFS
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+ gf_group->bit_allocation[frame_index] = target_frame_size;
+#if CONFIG_EXT_REFS
+ }
+#endif // CONFIG_EXT_REFS
+
+ ++frame_index;
+#if CONFIG_EXT_REFS
+ // Check if we need to update the ARF
+ if (cpi->num_extra_arfs && frame_index > arf_pos[which_arf]) {
+ --which_arf;
+ accumulative_subgroup_interval += subgroup_interval[which_arf] + 1;
+ // Meet the new subgroup. Reset the bipred_group_end flag;
+ bipred_group_end = 0;
+ // Insert another extra ARF after the overlay frame
+ if (which_arf) {
+ gf_group->update_type[frame_index] = ARF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_LOW;
+ gf_group->arf_src_offset[frame_index] = ext_arf_interval;
+ gf_group->arf_update_idx[frame_index] = which_arf;
+ gf_group->arf_ref_idx[frame_index] = 0;
+ ++frame_index;
+ }
+ }
+#endif
+ }
+
+// Note:
+// We need to configure the frame at the end of the sequence + 1 that will be
+// the start frame for the next group. Otherwise prior to the call to
+// vp10_rc_get_second_pass_params() the data will be undefined.
+#if CONFIG_EXT_REFS
+ gf_group->arf_update_idx[frame_index] = 0;
+ gf_group->arf_ref_idx[frame_index] = 0;
+#else
+ gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
+ gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
+#endif
+ if (rc->source_alt_ref_pending) {
+ gf_group->update_type[frame_index] = OVERLAY_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+
+#if CONFIG_EXT_REFS
+ if (cpi->num_extra_arfs) {
+ for (i = cpi->num_extra_arfs; i > 0; --i) {
+ int arf_pos_in_gf = (i == cpi->num_extra_arfs ? 2 : arf_pos[i + 1] + 1);
+ gf_group->bit_allocation[arf_pos_in_gf] =
+ gf_group->bit_allocation[arf_pos[i]];
+ gf_group->update_type[arf_pos[i]] = INTNL_OVERLAY_UPDATE;
+ gf_group->bit_allocation[arf_pos[i]] = 0;
+ gf_group->rf_level[arf_pos[i]] = INTER_LOW;
+ }
+ }
+#endif
+#if !CONFIG_EXT_REFS
+ // Final setup for second arf and its overlay.
+ if (cpi->multi_arf_enabled) {
+ gf_group->bit_allocation[2] =
+ gf_group->bit_allocation[mid_frame_idx] + mid_boost_bits;
+ gf_group->update_type[mid_frame_idx] = OVERLAY_UPDATE;
+ gf_group->bit_allocation[mid_frame_idx] = 0;
+ }
+#endif
+ } else {
+ gf_group->update_type[frame_index] = GF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
+ }
+#if CONFIG_EXT_REFS
+ gf_group->bidir_pred_enabled[frame_index] = 0;
+ gf_group->brf_src_offset[frame_index] = 0;
+#endif // CONFIG_EXT_REFS
+
+ // Note whether multi-arf was enabled this group for next time.
+ cpi->multi_arf_last_grp_enabled = cpi->multi_arf_enabled;
+}
+// Analyse and define a gf/arf group.
+static void define_gf_group(VP10_COMP *cpi, FIRSTPASS_STATS *this_frame) {
+ VP10_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ TWO_PASS *const twopass = &cpi->twopass;
+ FIRSTPASS_STATS next_frame;
+ const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
+ int i;
+
+ double boost_score = 0.0;
+ double old_boost_score = 0.0;
+ double gf_group_err = 0.0;
+#if GROUP_ADAPTIVE_MAXQ
+ double gf_group_raw_error = 0.0;
+#endif
+ double gf_group_skip_pct = 0.0;
+ double gf_group_inactive_zone_rows = 0.0;
+ double gf_first_frame_err = 0.0;
+ double mod_frame_err = 0.0;
+
+ double mv_ratio_accumulator = 0.0;
+ double decay_accumulator = 1.0;
+ double zero_motion_accumulator = 1.0;
+
+ double loop_decay_rate = 1.00;
+ double last_loop_decay_rate = 1.00;
+
+ double this_frame_mv_in_out = 0.0;
+ double mv_in_out_accumulator = 0.0;
+ double abs_mv_in_out_accumulator = 0.0;
+ double mv_ratio_accumulator_thresh;
+ unsigned int allow_alt_ref = is_altref_enabled(cpi);
+
+ int f_boost = 0;
+ int b_boost = 0;
+ int flash_detected;
+ int active_max_gf_interval;
+ int active_min_gf_interval;
+ int64_t gf_group_bits;
+ double gf_group_error_left;
+ int gf_arf_bits;
+ const int is_key_frame = frame_is_intra_only(cm);
+ const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
+
+ // Reset the GF group data structures unless this is a key
+ // frame in which case it will already have been done.
+ if (is_key_frame == 0) {
+ vp10_zero(twopass->gf_group);
+ }
+
+ vpx_clear_system_state();
+ vp10_zero(next_frame);
+
+ // Load stats for the current frame.
+ mod_frame_err = calculate_modified_err(cpi, twopass, oxcf, this_frame);
+
+ // Note the error of the frame at the start of the group. This will be
+ // the GF frame error if we code a normal gf.
+ gf_first_frame_err = mod_frame_err;
+
+ // If this is a key frame or the overlay from a previous arf then
+ // the error score / cost of this frame has already been accounted for.
+ if (arf_active_or_kf) {
+ gf_group_err -= gf_first_frame_err;
+#if GROUP_ADAPTIVE_MAXQ
+ gf_group_raw_error -= this_frame->coded_error;
+#endif
+ gf_group_skip_pct -= this_frame->intra_skip_pct;
+ gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows;
+ }
+
+ // Motion breakout threshold for loop below depends on image size.
+ mv_ratio_accumulator_thresh =
+ (cpi->initial_height + cpi->initial_width) / 4.0;
+
+ // Set a maximum and minimum interval for the GF group.
+ // If the image appears almost completely static we can extend beyond this.
+ {
+ int int_max_q = (int)(vp10_convert_qindex_to_q(
+ twopass->active_worst_quality, cpi->common.bit_depth));
+ int int_lbq = (int)(vp10_convert_qindex_to_q(rc->last_boosted_qindex,
+ cpi->common.bit_depth));
+
+ active_min_gf_interval = rc->min_gf_interval + VPXMIN(2, int_max_q / 200);
+ if (active_min_gf_interval > rc->max_gf_interval)
+ active_min_gf_interval = rc->max_gf_interval;
+
+ if (cpi->multi_arf_allowed) {
+ active_max_gf_interval = rc->max_gf_interval;
+ } else {
+ // The value chosen depends on the active Q range. At low Q we have
+ // bits to spare and are better with a smaller interval and smaller boost.
+ // At high Q when there are few bits to spare we are better with a longer
+ // interval to spread the cost of the GF.
+ active_max_gf_interval = 12 + VPXMIN(4, (int_lbq / 6));
+
+ // We have: active_min_gf_interval <= rc->max_gf_interval
+ if (active_max_gf_interval < active_min_gf_interval)
+ active_max_gf_interval = active_min_gf_interval;
+ else if (active_max_gf_interval > rc->max_gf_interval)
+ active_max_gf_interval = rc->max_gf_interval;
+ }
+ }
+
+ i = 0;
+ while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) {
+ ++i;
+
+ // Accumulate error score of frames in this gf group.
+ mod_frame_err = calculate_modified_err(cpi, twopass, oxcf, this_frame);
+ gf_group_err += mod_frame_err;
+#if GROUP_ADAPTIVE_MAXQ
+ gf_group_raw_error += this_frame->coded_error;
+#endif
+ gf_group_skip_pct += this_frame->intra_skip_pct;
+ gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
+
+ if (EOF == input_stats(twopass, &next_frame)) break;
+
+ // Test for the case where there is a brief flash but the prediction
+ // quality back to an earlier frame is then restored.
+ flash_detected = detect_flash(twopass, 0);
+
+ // Update the motion related elements to the boost calculation.
+ accumulate_frame_motion_stats(
+ &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
+ &abs_mv_in_out_accumulator, &mv_ratio_accumulator);
+
+ // Accumulate the effect of prediction quality decay.
+ if (!flash_detected) {
+ last_loop_decay_rate = loop_decay_rate;
+ loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
+
+ decay_accumulator = decay_accumulator * loop_decay_rate;
+
+ // Monitor for static sections.
+ zero_motion_accumulator = VPXMIN(
+ zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+
+ // Break clause to detect very still sections after motion. For example,
+ // a static image after a fade or other transition.
+ if (detect_transition_to_still(cpi, i, 5, loop_decay_rate,
+ last_loop_decay_rate)) {
+ allow_alt_ref = 0;
+ break;
+ }
+ }
+
+ // Calculate a boost number for this frame.
+ boost_score +=
+ decay_accumulator *
+ calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out, GF_MAX_BOOST);
+
+ // Break out conditions.
+ if (
+ // Break at active_max_gf_interval unless almost totally static.
+ (i >= (active_max_gf_interval + arf_active_or_kf) &&
+ zero_motion_accumulator < 0.995) ||
+ (
+ // Don't break out with a very short interval.
+ (i >= active_min_gf_interval + arf_active_or_kf) &&
+ (!flash_detected) &&
+ ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
+ (abs_mv_in_out_accumulator > 3.0) ||
+ (mv_in_out_accumulator < -2.0) ||
+ ((boost_score - old_boost_score) < BOOST_BREAKOUT)))) {
+ boost_score = old_boost_score;
+ break;
+ }
+
+ *this_frame = next_frame;
+ old_boost_score = boost_score;
+ }
+
+ twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0);
+
+ // Was the group length constrained by the requirement for a new KF?
+ rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
+
+ // Should we use the alternate reference frame.
+ if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) &&
+ (i >= rc->min_gf_interval)) {
+ // Calculate the boost for alt ref.
+ rc->gfu_boost =
+ calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost, &b_boost);
+ rc->source_alt_ref_pending = 1;
+
+ // Test to see if multi arf is appropriate.
+ cpi->multi_arf_enabled =
+ (cpi->multi_arf_allowed && (rc->baseline_gf_interval >= 6) &&
+ (zero_motion_accumulator < 0.995))
+ ? 1
+ : 0;
+ } else {
+ rc->gfu_boost = VPXMAX((int)boost_score, MIN_ARF_GF_BOOST);
+ rc->source_alt_ref_pending = 0;
+ }
+
+ // Set the interval until the next gf.
+ rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
+
+#if CONFIG_EXT_REFS
+ // Compute how many extra alt_refs we can have
+ cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval,
+ rc->source_alt_ref_pending);
+ // Currently at maximum two extra ARFs' are allowed
+ assert(cpi->num_extra_arfs <= 2);
+#endif
+
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+
+#if CONFIG_EXT_REFS
+ rc->bipred_group_interval = BFG_INTERVAL;
+ // The minimum bi-predictive frame group interval is 2.
+ if (rc->bipred_group_interval < 2) rc->bipred_group_interval = 0;
+#endif // CONFIG_EXT_REFS
+
+ // Reset the file position.
+ reset_fpf_position(twopass, start_pos);
+
+ // Calculate the bits to be allocated to the gf/arf group as a whole
+ gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err);
+
+#if GROUP_ADAPTIVE_MAXQ
+ // Calculate an estimate of the maxq needed for the group.
+ // We are more agressive about correcting for sections
+ // where there could be significant overshoot than for easier
+ // sections where we do not wish to risk creating an overshoot
+ // of the allocated bit budget.
+ if ((cpi->oxcf.rc_mode != VPX_Q) && (rc->baseline_gf_interval > 1)) {
+ const int vbr_group_bits_per_frame =
+ (int)(gf_group_bits / rc->baseline_gf_interval);
+ const double group_av_err = gf_group_raw_error / rc->baseline_gf_interval;
+ const double group_av_skip_pct =
+ gf_group_skip_pct / rc->baseline_gf_interval;
+ const double group_av_inactive_zone =
+ ((gf_group_inactive_zone_rows * 2) /
+ (rc->baseline_gf_interval * (double)cm->mb_rows));
+
+ int tmp_q;
+ // rc factor is a weight factor that corrects for local rate control drift.
+ double rc_factor = 1.0;
+ if (rc->rate_error_estimate > 0) {
+ rc_factor = VPXMAX(RC_FACTOR_MIN,
+ (double)(100 - rc->rate_error_estimate) / 100.0);
+ } else {
+ rc_factor = VPXMIN(RC_FACTOR_MAX,
+ (double)(100 - rc->rate_error_estimate) / 100.0);
+ }
+ tmp_q = get_twopass_worst_quality(
+ cpi, group_av_err, (group_av_skip_pct + group_av_inactive_zone),
+ vbr_group_bits_per_frame, twopass->kfgroup_inter_fraction * rc_factor);
+ twopass->active_worst_quality =
+ VPXMAX(tmp_q, twopass->active_worst_quality >> 1);
+ }
+#endif
+
+ // Calculate the extra bits to be used for boosted frame(s)
+ gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, rc->gfu_boost,
+ gf_group_bits);
+
+ // Adjust KF group bits and error remaining.
+ twopass->kf_group_error_left -= (int64_t)gf_group_err;
+
+ // If this is an arf update we want to remove the score for the overlay
+ // frame at the end which will usually be very cheap to code.
+ // The overlay frame has already, in effect, been coded so we want to spread
+ // the remaining bits among the other frames.
+ // For normal GFs remove the score for the GF itself unless this is
+ // also a key frame in which case it has already been accounted for.
+ if (rc->source_alt_ref_pending) {
+ gf_group_error_left = gf_group_err - mod_frame_err;
+ } else if (is_key_frame == 0) {
+ gf_group_error_left = gf_group_err - gf_first_frame_err;
+ } else {
+ gf_group_error_left = gf_group_err;
+ }
+
+ // Allocate bits to each of the frames in the GF group.
+ allocate_gf_group_bits(cpi, gf_group_bits, gf_group_error_left, gf_arf_bits);
+
+ // Reset the file position.
+ reset_fpf_position(twopass, start_pos);
+
+ // Calculate a section intra ratio used in setting max loop filter.
+ if (cpi->common.frame_type != KEY_FRAME) {
+ twopass->section_intra_rating = calculate_section_intra_ratio(
+ start_pos, twopass->stats_in_end, rc->baseline_gf_interval);
+ }
+
+ if (oxcf->resize_mode == RESIZE_DYNAMIC) {
+ // Default to starting GF groups at normal frame size.
+ cpi->rc.next_frame_size_selector = UNSCALED;
+ }
+}
+
+// Threshold for use of the lagging second reference frame. High second ref
+// usage may point to a transient event like a flash or occlusion rather than
+// a real scene cut.
+#define SECOND_REF_USEAGE_THRESH 0.1
+// Minimum % intra coding observed in first pass (1.0 = 100%)
+#define MIN_INTRA_LEVEL 0.25
+// Minimum ratio between the % of intra coding and inter coding in the first
+// pass after discounting neutral blocks (discounting neutral blocks in this
+// way helps catch scene cuts in clips with very flat areas or letter box
+// format clips with image padding.
+#define INTRA_VS_INTER_THRESH 2.0
+// Hard threshold where the first pass chooses intra for almost all blocks.
+// In such a case even if the frame is not a scene cut coding a key frame
+// may be a good option.
+#define VERY_LOW_INTER_THRESH 0.05
+// Maximum threshold for the relative ratio of intra error score vs best
+// inter error score.
+#define KF_II_ERR_THRESHOLD 2.5
+// In real scene cuts there is almost always a sharp change in the intra
+// or inter error score.
+#define ERR_CHANGE_THRESHOLD 0.4
+// For real scene cuts we expect an improvment in the intra inter error
+// ratio in the next frame.
+#define II_IMPROVEMENT_THRESHOLD 3.5
+#define KF_II_MAX 128.0
+
+static int test_candidate_kf(TWO_PASS *twopass,
+ const FIRSTPASS_STATS *last_frame,
+ const FIRSTPASS_STATS *this_frame,
+ const FIRSTPASS_STATS *next_frame) {
+ int is_viable_kf = 0;
+ double pcnt_intra = 1.0 - this_frame->pcnt_inter;
+ double modified_pcnt_inter =
+ this_frame->pcnt_inter - this_frame->pcnt_neutral;
+
+ // Does the frame satisfy the primary criteria of a key frame?
+ // See above for an explanation of the test criteria.
+ // If so, then examine how well it predicts subsequent frames.
+ if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
+ (next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
+ ((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) ||
+ ((pcnt_intra > MIN_INTRA_LEVEL) &&
+ (pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) &&
+ ((this_frame->intra_error /
+ DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) <
+ KF_II_ERR_THRESHOLD) &&
+ ((fabs(last_frame->coded_error - this_frame->coded_error) /
+ DOUBLE_DIVIDE_CHECK(this_frame->coded_error) >
+ ERR_CHANGE_THRESHOLD) ||
+ (fabs(last_frame->intra_error - this_frame->intra_error) /
+ DOUBLE_DIVIDE_CHECK(this_frame->intra_error) >
+ ERR_CHANGE_THRESHOLD) ||
+ ((next_frame->intra_error /
+ DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) >
+ II_IMPROVEMENT_THRESHOLD))))) {
+ int i;
+ const FIRSTPASS_STATS *start_pos = twopass->stats_in;
+ FIRSTPASS_STATS local_next_frame = *next_frame;
+ double boost_score = 0.0;
+ double old_boost_score = 0.0;
+ double decay_accumulator = 1.0;
+
+ // Examine how well the key frame predicts subsequent frames.
+ for (i = 0; i < 16; ++i) {
+ double next_iiratio = (BOOST_FACTOR * local_next_frame.intra_error /
+ DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error));
+
+ if (next_iiratio > KF_II_MAX) next_iiratio = KF_II_MAX;
+
+ // Cumulative effect of decay in prediction quality.
+ if (local_next_frame.pcnt_inter > 0.85)
+ decay_accumulator *= local_next_frame.pcnt_inter;
+ else
+ decay_accumulator *= (0.85 + local_next_frame.pcnt_inter) / 2.0;
+
+ // Keep a running total.
+ boost_score += (decay_accumulator * next_iiratio);
+
+ // Test various breakout clauses.
+ if ((local_next_frame.pcnt_inter < 0.05) || (next_iiratio < 1.5) ||
+ (((local_next_frame.pcnt_inter - local_next_frame.pcnt_neutral) <
+ 0.20) &&
+ (next_iiratio < 3.0)) ||
+ ((boost_score - old_boost_score) < 3.0) ||
+ (local_next_frame.intra_error < 200)) {
+ break;
+ }
+
+ old_boost_score = boost_score;
+
+ // Get the next frame details
+ if (EOF == input_stats(twopass, &local_next_frame)) break;
+ }
+
+ // If there is tolerable prediction for at least the next 3 frames then
+ // break out else discard this potential key frame and move on
+ if (boost_score > 30.0 && (i > 3)) {
+ is_viable_kf = 1;
+ } else {
+ // Reset the file position
+ reset_fpf_position(twopass, start_pos);
+
+ is_viable_kf = 0;
+ }
+ }
+
+ return is_viable_kf;
+}
+
+#define FRAMES_TO_CHECK_DECAY 8
+
+static void find_next_key_frame(VP10_COMP *cpi, FIRSTPASS_STATS *this_frame) {
+ int i, j;
+ RATE_CONTROL *const rc = &cpi->rc;
+ TWO_PASS *const twopass = &cpi->twopass;
+ GF_GROUP *const gf_group = &twopass->gf_group;
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ const FIRSTPASS_STATS first_frame = *this_frame;
+ const FIRSTPASS_STATS *const start_position = twopass->stats_in;
+ FIRSTPASS_STATS next_frame;
+ FIRSTPASS_STATS last_frame;
+ int kf_bits = 0;
+ int loop_decay_counter = 0;
+ double decay_accumulator = 1.0;
+ double av_decay_accumulator = 0.0;
+ double zero_motion_accumulator = 1.0;
+ double boost_score = 0.0;
+ double kf_mod_err = 0.0;
+ double kf_group_err = 0.0;
+ double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
+
+ vp10_zero(next_frame);
+
+ cpi->common.frame_type = KEY_FRAME;
+
+ // Reset the GF group data structures.
+ vp10_zero(*gf_group);
+
+ // Is this a forced key frame by interval.
+ rc->this_key_frame_forced = rc->next_key_frame_forced;
+
+ // Clear the alt ref active flag and last group multi arf flags as they
+ // can never be set for a key frame.
+ rc->source_alt_ref_active = 0;
+ cpi->multi_arf_last_grp_enabled = 0;
+
+ // KF is always a GF so clear frames till next gf counter.
+ rc->frames_till_gf_update_due = 0;
+
+ rc->frames_to_key = 1;
+
+ twopass->kf_group_bits = 0; // Total bits available to kf group
+ twopass->kf_group_error_left = 0; // Group modified error score.
+
+ kf_mod_err = calculate_modified_err(cpi, twopass, oxcf, this_frame);
+
+ // Initialize the decay rates for the recent frames to check
+ for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) recent_loop_decay[j] = 1.0;
+
+ // Find the next keyframe.
+ i = 0;
+ while (twopass->stats_in < twopass->stats_in_end &&
+ rc->frames_to_key < cpi->oxcf.key_freq) {
+ // Accumulate kf group error.
+ kf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame);
+
+ // Load the next frame's stats.
+ last_frame = *this_frame;
+ input_stats(twopass, this_frame);
+
+ // Provided that we are not at the end of the file...
+ if (cpi->oxcf.auto_key && twopass->stats_in < twopass->stats_in_end) {
+ double loop_decay_rate;
+
+ // Check for a scene cut.
+ if (test_candidate_kf(twopass, &last_frame, this_frame,
+ twopass->stats_in))
+ break;
+
+ // How fast is the prediction quality decaying?
+ loop_decay_rate = get_prediction_decay_rate(cpi, twopass->stats_in);
+
+ // We want to know something about the recent past... rather than
+ // as used elsewhere where we are concerned with decay in prediction
+ // quality since the last GF or KF.
+ recent_loop_decay[i % FRAMES_TO_CHECK_DECAY] = loop_decay_rate;
+ decay_accumulator = 1.0;
+ for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j)
+ decay_accumulator *= recent_loop_decay[j];
+
+ // Special check for transition or high motion followed by a
+ // static scene.
+ if (detect_transition_to_still(cpi, i, cpi->oxcf.key_freq - i,
+ loop_decay_rate, decay_accumulator))
+ break;
+
+ // Step on to the next frame.
+ ++rc->frames_to_key;
+
+ // If we don't have a real key frame within the next two
+ // key_freq intervals then break out of the loop.
+ if (rc->frames_to_key >= 2 * cpi->oxcf.key_freq) break;
+ } else {
+ ++rc->frames_to_key;
+ }
+ ++i;
+ }
+
+ // If there is a max kf interval set by the user we must obey it.
+ // We already breakout of the loop above at 2x max.
+ // This code centers the extra kf if the actual natural interval
+ // is between 1x and 2x.
+ if (cpi->oxcf.auto_key && rc->frames_to_key > cpi->oxcf.key_freq) {
+ FIRSTPASS_STATS tmp_frame = first_frame;
+
+ rc->frames_to_key /= 2;
+
+ // Reset to the start of the group.
+ reset_fpf_position(twopass, start_position);
+
+ kf_group_err = 0.0;
+
+ // Rescan to get the correct error data for the forced kf group.
+ for (i = 0; i < rc->frames_to_key; ++i) {
+ kf_group_err += calculate_modified_err(cpi, twopass, oxcf, &tmp_frame);
+ input_stats(twopass, &tmp_frame);
+ }
+ rc->next_key_frame_forced = 1;
+ } else if (twopass->stats_in == twopass->stats_in_end ||
+ rc->frames_to_key >= cpi->oxcf.key_freq) {
+ rc->next_key_frame_forced = 1;
+ } else {
+ rc->next_key_frame_forced = 0;
+ }
+
+ // Special case for the last key frame of the file.
+ if (twopass->stats_in >= twopass->stats_in_end) {
+ // Accumulate kf group error.
+ kf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame);
+ }
+
+ // Calculate the number of bits that should be assigned to the kf group.
+ if (twopass->bits_left > 0 && twopass->modified_error_left > 0.0) {
+ // Maximum number of bits for a single normal frame (not key frame).
+ const int max_bits = frame_max_bits(rc, &cpi->oxcf);
+
+ // Maximum number of bits allocated to the key frame group.
+ int64_t max_grp_bits;
+
+ // Default allocation based on bits left and relative
+ // complexity of the section.
+ twopass->kf_group_bits = (int64_t)(
+ twopass->bits_left * (kf_group_err / twopass->modified_error_left));
+
+ // Clip based on maximum per frame rate defined by the user.
+ max_grp_bits = (int64_t)max_bits * (int64_t)rc->frames_to_key;
+ if (twopass->kf_group_bits > max_grp_bits)
+ twopass->kf_group_bits = max_grp_bits;
+ } else {
+ twopass->kf_group_bits = 0;
+ }
+ twopass->kf_group_bits = VPXMAX(0, twopass->kf_group_bits);
+
+ // Reset the first pass file position.
+ reset_fpf_position(twopass, start_position);
+
+ // Scan through the kf group collating various stats used to determine
+ // how many bits to spend on it.
+ decay_accumulator = 1.0;
+ boost_score = 0.0;
+ for (i = 0; i < (rc->frames_to_key - 1); ++i) {
+ if (EOF == input_stats(twopass, &next_frame)) break;
+
+ // Monitor for static sections.
+ zero_motion_accumulator = VPXMIN(zero_motion_accumulator,
+ get_zero_motion_factor(cpi, &next_frame));
+
+ // Not all frames in the group are necessarily used in calculating boost.
+ if ((i <= rc->max_gf_interval) ||
+ ((i <= (rc->max_gf_interval * 4)) && (decay_accumulator > 0.5))) {
+ const double frame_boost =
+ calc_frame_boost(cpi, this_frame, 0, KF_MAX_BOOST);
+
+ // How fast is prediction quality decaying.
+ if (!detect_flash(twopass, 0)) {
+ const double loop_decay_rate =
+ get_prediction_decay_rate(cpi, &next_frame);
+ decay_accumulator *= loop_decay_rate;
+ decay_accumulator = VPXMAX(decay_accumulator, MIN_DECAY_FACTOR);
+ av_decay_accumulator += decay_accumulator;
+ ++loop_decay_counter;
+ }
+ boost_score += (decay_accumulator * frame_boost);
+ }
+ }
+ av_decay_accumulator /= (double)loop_decay_counter;
+
+ reset_fpf_position(twopass, start_position);
+
+ // Store the zero motion percentage
+ twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0);
+
+ // Calculate a section intra ratio used in setting max loop filter.
+ twopass->section_intra_rating = calculate_section_intra_ratio(
+ start_position, twopass->stats_in_end, rc->frames_to_key);
+
+ // Apply various clamps for min and max boost
+ rc->kf_boost = (int)(av_decay_accumulator * boost_score);
+ rc->kf_boost = VPXMAX(rc->kf_boost, (rc->frames_to_key * 3));
+ rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_BOOST);
+
+ // Work out how many bits to allocate for the key frame itself.
+ kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost,
+ twopass->kf_group_bits);
+
+ // Work out the fraction of the kf group bits reserved for the inter frames
+ // within the group after discounting the bits for the kf itself.
+ if (twopass->kf_group_bits) {
+ twopass->kfgroup_inter_fraction =
+ (double)(twopass->kf_group_bits - kf_bits) /
+ (double)twopass->kf_group_bits;
+ } else {
+ twopass->kfgroup_inter_fraction = 1.0;
+ }
+
+ twopass->kf_group_bits -= kf_bits;
+
+ // Save the bits to spend on the key frame.
+ gf_group->bit_allocation[0] = kf_bits;
+ gf_group->update_type[0] = KF_UPDATE;
+ gf_group->rf_level[0] = KF_STD;
+
+ // Note the total error score of the kf group minus the key frame itself.
+ twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err);
+
+ // Adjust the count of total modified error left.
+ // The count of bits left is adjusted elsewhere based on real coded frame
+ // sizes.
+ twopass->modified_error_left -= kf_group_err;
+
+ if (oxcf->resize_mode == RESIZE_DYNAMIC) {
+ // Default to normal-sized frame on keyframes.
+ cpi->rc.next_frame_size_selector = UNSCALED;
+ }
+}
+
+// Define the reference buffers that will be updated post encode.
+static void configure_buffer_updates(VP10_COMP *cpi) {
+ TWO_PASS *const twopass = &cpi->twopass;
+
+ // Wei-Ting: Should we define another function to take care of
+ // cpi->rc.is_$Source_Type to make this function as it is in the comment?
+
+ cpi->rc.is_src_frame_alt_ref = 0;
+#if CONFIG_EXT_REFS
+ cpi->rc.is_bwd_ref_frame = 0;
+ cpi->rc.is_last_bipred_frame = 0;
+ cpi->rc.is_bipred_frame = 0;
+ cpi->rc.is_src_frame_ext_arf = 0;
+#endif // CONFIG_EXT_REFS
+
+ switch (twopass->gf_group.update_type[twopass->gf_group.index]) {
+ case KF_UPDATE:
+#if CONFIG_EXT_REFS
+ cpi->refresh_bwd_ref_frame = 1;
+#endif // CONFIG_EXT_REFS
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 1;
+ break;
+
+ case LF_UPDATE:
+#if CONFIG_EXT_REFS
+ // If we have extra ALT_REFs, we can use the farthest ALT (ALT0) as
+ // the BWD_REF.
+ if (cpi->num_extra_arfs) {
+ int tmp = cpi->bwd_fb_idx;
+
+ cpi->rc.is_bwd_ref_frame = 1;
+ cpi->bwd_fb_idx = cpi->alt_fb_idx;
+ cpi->alt_fb_idx = cpi->arf_map[0];
+ cpi->arf_map[0] = tmp;
+ } else {
+ cpi->rc.is_bwd_ref_frame = 0;
+ }
+#endif // CONFIG_EXT_REFS
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ break;
+
+ case GF_UPDATE:
+#if CONFIG_EXT_REFS
+ cpi->refresh_bwd_ref_frame = 0;
+#endif // CONFIG_EXT_REFS
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 0;
+ break;
+
+ case OVERLAY_UPDATE:
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 1;
+#if CONFIG_EXT_REFS
+ cpi->refresh_bwd_ref_frame = 0;
+#endif // CONFIG_EXT_REFS
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_src_frame_alt_ref = 1;
+ break;
+
+ case ARF_UPDATE:
+#if CONFIG_EXT_REFS
+ cpi->refresh_bwd_ref_frame = 1;
+#endif // CONFIG_EXT_REFS
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 1;
+ break;
+
+#if CONFIG_EXT_REFS
+ case BRF_UPDATE:
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 1;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_bwd_ref_frame = 1;
+ if (cpi->num_extra_arfs) {
+ // Allow BRF use the farthest ALT_REF (ALT0) as BWD_REF by swapping
+ // the virtual indices.
+ // NOTE: The indices will be swapped back after this frame is encoded
+ // (in vp10_update_reference_frames()).
+ int tmp = cpi->bwd_fb_idx;
+ cpi->bwd_fb_idx = cpi->alt_fb_idx;
+ cpi->alt_fb_idx = cpi->arf_map[0];
+ cpi->arf_map[0] = tmp;
+ }
+ break;
+ case LAST_BIPRED_UPDATE:
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_last_bipred_frame = 1;
+ break;
+
+ case BIPRED_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_bipred_frame = 1;
+ break;
+
+ case INTNL_OVERLAY_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_bwd_ref_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_src_frame_alt_ref = 1;
+ cpi->rc.is_src_frame_ext_arf = 1;
+ break;
+#endif // CONFIG_EXT_REFS
+
+ default: assert(0); break;
+ }
+}
+
+static int is_skippable_frame(const VP10_COMP *cpi) {
+ // If the current frame does not have non-zero motion vector detected in the
+ // first pass, and so do its previous and forward frames, then this frame
+ // can be skipped for partition check, and the partition size is assigned
+ // according to the variance
+ const TWO_PASS *const twopass = &cpi->twopass;
+
+ return (!frame_is_intra_only(&cpi->common) &&
+ twopass->stats_in - 2 > twopass->stats_in_start &&
+ twopass->stats_in < twopass->stats_in_end &&
+ (twopass->stats_in - 1)->pcnt_inter -
+ (twopass->stats_in - 1)->pcnt_motion ==
+ 1 &&
+ (twopass->stats_in - 2)->pcnt_inter -
+ (twopass->stats_in - 2)->pcnt_motion ==
+ 1 &&
+ twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1);
+}
+
+void vp10_rc_get_second_pass_params(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ TWO_PASS *const twopass = &cpi->twopass;
+ GF_GROUP *const gf_group = &twopass->gf_group;
+ int frames_left;
+ FIRSTPASS_STATS this_frame;
+
+ int target_rate;
+
+ frames_left = (int)(twopass->total_stats.count - cm->current_video_frame);
+
+ if (!twopass->stats_in) return;
+
+ // If this is an arf frame then we dont want to read the stats file or
+ // advance the input pointer as we already have what we need.
+ if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
+ int target_rate;
+ configure_buffer_updates(cpi);
+ target_rate = gf_group->bit_allocation[gf_group->index];
+ target_rate = vp10_rc_clamp_pframe_target_size(cpi, target_rate);
+ rc->base_frame_target = target_rate;
+
+ cm->frame_type = INTER_FRAME;
+
+ // Do the firstpass stats indicate that this frame is skippable for the
+ // partition search?
+ if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2) {
+ cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
+ }
+
+ return;
+ }
+
+ vpx_clear_system_state();
+
+ if (cpi->oxcf.rc_mode == VPX_Q) {
+ twopass->active_worst_quality = cpi->oxcf.cq_level;
+ } else if (cm->current_video_frame == 0) {
+ // Special case code for first frame.
+ const int section_target_bandwidth =
+ (int)(twopass->bits_left / frames_left);
+ const double section_length = twopass->total_left_stats.count;
+ const double section_error =
+ twopass->total_left_stats.coded_error / section_length;
+ const double section_intra_skip =
+ twopass->total_left_stats.intra_skip_pct / section_length;
+ const double section_inactive_zone =
+ (twopass->total_left_stats.inactive_zone_rows * 2) /
+ ((double)cm->mb_rows * section_length);
+ const int tmp_q = get_twopass_worst_quality(
+ cpi, section_error, section_intra_skip + section_inactive_zone,
+ section_target_bandwidth, DEFAULT_GRP_WEIGHT);
+
+ twopass->active_worst_quality = tmp_q;
+ twopass->baseline_active_worst_quality = tmp_q;
+ rc->ni_av_qi = tmp_q;
+ rc->last_q[INTER_FRAME] = tmp_q;
+ rc->avg_q = vp10_convert_qindex_to_q(tmp_q, cm->bit_depth);
+ rc->avg_frame_qindex[INTER_FRAME] = tmp_q;
+ rc->last_q[KEY_FRAME] = (tmp_q + cpi->oxcf.best_allowed_q) / 2;
+ rc->avg_frame_qindex[KEY_FRAME] = rc->last_q[KEY_FRAME];
+ }
+
+ vp10_zero(this_frame);
+ if (EOF == input_stats(twopass, &this_frame)) return;
+
+ // Set the frame content type flag.
+ if (this_frame.intra_skip_pct >= FC_ANIMATION_THRESH)
+ twopass->fr_content_type = FC_GRAPHICS_ANIMATION;
+ else
+ twopass->fr_content_type = FC_NORMAL;
+
+ // Keyframe and section processing.
+ if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) {
+ FIRSTPASS_STATS this_frame_copy;
+ this_frame_copy = this_frame;
+ // Define next KF group and assign bits to it.
+ find_next_key_frame(cpi, &this_frame);
+ this_frame = this_frame_copy;
+ } else {
+ cm->frame_type = INTER_FRAME;
+ }
+
+ // Define a new GF/ARF group. (Should always enter here for key frames).
+ if (rc->frames_till_gf_update_due == 0) {
+ define_gf_group(cpi, &this_frame);
+
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+
+#if ARF_STATS_OUTPUT
+ {
+ FILE *fpfile;
+ fpfile = fopen("arf.stt", "a");
+ ++arf_count;
+ fprintf(fpfile, "%10d %10ld %10d %10d %10ld\n", cm->current_video_frame,
+ rc->frames_till_gf_update_due, rc->kf_boost, arf_count,
+ rc->gfu_boost);
+
+ fclose(fpfile);
+ }
+#endif
+ }
+
+ configure_buffer_updates(cpi);
+
+ // Do the firstpass stats indicate that this frame is skippable for the
+ // partition search?
+ if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2) {
+ cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
+ }
+
+ target_rate = gf_group->bit_allocation[gf_group->index];
+
+ if (cpi->common.frame_type == KEY_FRAME)
+ target_rate = vp10_rc_clamp_iframe_target_size(cpi, target_rate);
+ else
+ target_rate = vp10_rc_clamp_pframe_target_size(cpi, target_rate);
+
+ rc->base_frame_target = target_rate;
+
+ {
+ const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
+ ? cpi->initial_mbs
+ : cpi->common.MBs;
+ // The multiplication by 256 reverses a scaling factor of (>> 8)
+ // applied when combining MB error values for the frame.
+ twopass->mb_av_energy =
+ log(((this_frame.intra_error * 256.0) / num_mbs) + 1.0);
+ }
+
+ // Update the total stats remaining structure.
+ subtract_stats(&twopass->total_left_stats, &this_frame);
+}
+
+#define MINQ_ADJ_LIMIT 48
+#define MINQ_ADJ_LIMIT_CQ 20
+#define HIGH_UNDERSHOOT_RATIO 2
+void vp10_twopass_postencode_update(VP10_COMP *cpi) {
+ TWO_PASS *const twopass = &cpi->twopass;
+ RATE_CONTROL *const rc = &cpi->rc;
+ const int bits_used = rc->base_frame_target;
+
+ // VBR correction is done through rc->vbr_bits_off_target. Based on the
+ // sign of this value, a limited % adjustment is made to the target rate
+ // of subsequent frames, to try and push it back towards 0. This method
+ // is designed to prevent extreme behaviour at the end of a clip
+ // or group of frames.
+ rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size;
+ twopass->bits_left = VPXMAX(twopass->bits_left - bits_used, 0);
+
+ // Calculate the pct rc error.
+ if (rc->total_actual_bits) {
+ rc->rate_error_estimate =
+ (int)((rc->vbr_bits_off_target * 100) / rc->total_actual_bits);
+ rc->rate_error_estimate = clamp(rc->rate_error_estimate, -100, 100);
+ } else {
+ rc->rate_error_estimate = 0;
+ }
+
+ if (cpi->common.frame_type != KEY_FRAME) {
+ twopass->kf_group_bits -= bits_used;
+ twopass->last_kfgroup_zeromotion_pct = twopass->kf_zeromotion_pct;
+ }
+ twopass->kf_group_bits = VPXMAX(twopass->kf_group_bits, 0);
+
+ // Increment the gf group index ready for the next frame.
+ ++twopass->gf_group.index;
+
+ // If the rate control is drifting consider adjustment to min or maxq.
+ if ((cpi->oxcf.rc_mode != VPX_Q) &&
+ (cpi->twopass.gf_zeromotion_pct < VLOW_MOTION_THRESHOLD) &&
+ !cpi->rc.is_src_frame_alt_ref) {
+ const int maxq_adj_limit =
+ rc->worst_quality - twopass->active_worst_quality;
+ const int minq_adj_limit =
+ (cpi->oxcf.rc_mode == VPX_CQ ? MINQ_ADJ_LIMIT_CQ : MINQ_ADJ_LIMIT);
+
+ // Undershoot.
+ if (rc->rate_error_estimate > cpi->oxcf.under_shoot_pct) {
+ --twopass->extend_maxq;
+ if (rc->rolling_target_bits >= rc->rolling_actual_bits)
+ ++twopass->extend_minq;
+ // Overshoot.
+ } else if (rc->rate_error_estimate < -cpi->oxcf.over_shoot_pct) {
+ --twopass->extend_minq;
+ if (rc->rolling_target_bits < rc->rolling_actual_bits)
+ ++twopass->extend_maxq;
+ } else {
+ // Adjustment for extreme local overshoot.
+ if (rc->projected_frame_size > (2 * rc->base_frame_target) &&
+ rc->projected_frame_size > (2 * rc->avg_frame_bandwidth))
+ ++twopass->extend_maxq;
+
+ // Unwind undershoot or overshoot adjustment.
+ if (rc->rolling_target_bits < rc->rolling_actual_bits)
+ --twopass->extend_minq;
+ else if (rc->rolling_target_bits > rc->rolling_actual_bits)
+ --twopass->extend_maxq;
+ }
+
+ twopass->extend_minq = clamp(twopass->extend_minq, 0, minq_adj_limit);
+ twopass->extend_maxq = clamp(twopass->extend_maxq, 0, maxq_adj_limit);
+
+ // If there is a big and undexpected undershoot then feed the extra
+ // bits back in quickly. One situation where this may happen is if a
+ // frame is unexpectedly almost perfectly predicted by the ARF or GF
+ // but not very well predcited by the previous frame.
+ if (!frame_is_kf_gf_arf(cpi) && !cpi->rc.is_src_frame_alt_ref) {
+ int fast_extra_thresh = rc->base_frame_target / HIGH_UNDERSHOOT_RATIO;
+ if (rc->projected_frame_size < fast_extra_thresh) {
+ rc->vbr_bits_off_target_fast +=
+ fast_extra_thresh - rc->projected_frame_size;
+ rc->vbr_bits_off_target_fast =
+ VPXMIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth));
+
+ // Fast adaptation of minQ if necessary to use up the extra bits.
+ if (rc->avg_frame_bandwidth) {
+ twopass->extend_minq_fast =
+ (int)(rc->vbr_bits_off_target_fast * 8 / rc->avg_frame_bandwidth);
+ }
+ twopass->extend_minq_fast = VPXMIN(
+ twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq);
+ } else if (rc->vbr_bits_off_target_fast) {
+ twopass->extend_minq_fast = VPXMIN(
+ twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq);
+ } else {
+ twopass->extend_minq_fast = 0;
+ }
+ }
+ }
+}
diff --git a/av1/encoder/firstpass.h b/av1/encoder/firstpass.h
new file mode 100644
index 0000000..5623540
--- /dev/null
+++ b/av1/encoder/firstpass.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_FIRSTPASS_H_
+#define VP10_ENCODER_FIRSTPASS_H_
+
+#include "av1/encoder/lookahead.h"
+#include "av1/encoder/ratectrl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if CONFIG_FP_MB_STATS
+
+#define FPMB_DCINTRA_MASK 0x01
+
+#define FPMB_MOTION_ZERO_MASK 0x02
+#define FPMB_MOTION_LEFT_MASK 0x04
+#define FPMB_MOTION_RIGHT_MASK 0x08
+#define FPMB_MOTION_UP_MASK 0x10
+#define FPMB_MOTION_DOWN_MASK 0x20
+
+#define FPMB_ERROR_SMALL_MASK 0x40
+#define FPMB_ERROR_LARGE_MASK 0x80
+#define FPMB_ERROR_SMALL_TH 2000
+#define FPMB_ERROR_LARGE_TH 48000
+
+typedef struct {
+ uint8_t *mb_stats_start;
+ uint8_t *mb_stats_end;
+} FIRSTPASS_MB_STATS;
+#endif
+
+#if CONFIG_EXT_REFS
+// Length of the bi-predictive frame group (BFG)
+// NOTE: Currently each BFG contains one backward ref (BWF) frame plus a certain
+// number of bi-predictive frames.
+#define BFG_INTERVAL 2
+// The maximum number of extra ALT_REF's
+// NOTE: This number cannot be greater than 2 or the reference frame buffer will
+// overflow.
+#define MAX_EXT_ARFS 2
+#define MIN_EXT_ARF_INTERVAL 4
+#endif // CONFIG_EXT_REFS
+
+#define VLOW_MOTION_THRESHOLD 950
+
+typedef struct {
+ double frame;
+ double weight;
+ double intra_error;
+ double coded_error;
+ double sr_coded_error;
+ double pcnt_inter;
+ double pcnt_motion;
+ double pcnt_second_ref;
+ double pcnt_neutral;
+ double intra_skip_pct;
+ double inactive_zone_rows; // Image mask rows top and bottom.
+ double inactive_zone_cols; // Image mask columns at left and right edges.
+ double MVr;
+ double mvr_abs;
+ double MVc;
+ double mvc_abs;
+ double MVrv;
+ double MVcv;
+ double mv_in_out_count;
+ double new_mv_count;
+ double duration;
+ double count;
+} FIRSTPASS_STATS;
+
+typedef enum {
+ KF_UPDATE = 0,
+ LF_UPDATE = 1,
+ GF_UPDATE = 2,
+ ARF_UPDATE = 3,
+ OVERLAY_UPDATE = 4,
+#if CONFIG_EXT_REFS
+ BRF_UPDATE = 5, // Backward Reference Frame
+ LAST_BIPRED_UPDATE = 6, // Last Bi-predictive Frame
+ BIPRED_UPDATE = 7, // Bi-predictive Frame, but not the last one
+ INTNL_OVERLAY_UPDATE = 8, // Internal Overlay Frame
+ FRAME_UPDATE_TYPES = 9
+#else
+ FRAME_UPDATE_TYPES = 5
+#endif // CONFIG_EXT_REFS
+} FRAME_UPDATE_TYPE;
+
+#define FC_ANIMATION_THRESH 0.15
+typedef enum {
+ FC_NORMAL = 0,
+ FC_GRAPHICS_ANIMATION = 1,
+ FRAME_CONTENT_TYPES = 2
+} FRAME_CONTENT_TYPE;
+
+typedef struct {
+ unsigned char index;
+ RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1];
+ FRAME_UPDATE_TYPE update_type[(MAX_LAG_BUFFERS * 2) + 1];
+ unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
+ unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1];
+ unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1];
+#if CONFIG_EXT_REFS
+ unsigned char brf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
+ unsigned char bidir_pred_enabled[(MAX_LAG_BUFFERS * 2) + 1];
+#endif // CONFIG_EXT_REFS
+ int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1];
+} GF_GROUP;
+
+typedef struct {
+ unsigned int section_intra_rating;
+ FIRSTPASS_STATS total_stats;
+ FIRSTPASS_STATS this_frame_stats;
+ const FIRSTPASS_STATS *stats_in;
+ const FIRSTPASS_STATS *stats_in_start;
+ const FIRSTPASS_STATS *stats_in_end;
+ FIRSTPASS_STATS total_left_stats;
+ int first_pass_done;
+ int64_t bits_left;
+ double modified_error_min;
+ double modified_error_max;
+ double modified_error_left;
+ double mb_av_energy;
+
+#if CONFIG_FP_MB_STATS
+ uint8_t *frame_mb_stats_buf;
+ uint8_t *this_frame_mb_stats;
+ FIRSTPASS_MB_STATS firstpass_mb_stats;
+#endif
+ // An indication of the content type of the current frame
+ FRAME_CONTENT_TYPE fr_content_type;
+
+ // Projected total bits available for a key frame group of frames
+ int64_t kf_group_bits;
+
+ // Error score of frames still to be coded in kf group
+ int64_t kf_group_error_left;
+
+ // The fraction for a kf groups total bits allocated to the inter frames
+ double kfgroup_inter_fraction;
+
+ int sr_update_lag;
+
+ int kf_zeromotion_pct;
+ int last_kfgroup_zeromotion_pct;
+ int gf_zeromotion_pct;
+ int active_worst_quality;
+ int baseline_active_worst_quality;
+ int extend_minq;
+ int extend_maxq;
+ int extend_minq_fast;
+
+ GF_GROUP gf_group;
+} TWO_PASS;
+
+struct VP10_COMP;
+
+void vp10_init_first_pass(struct VP10_COMP *cpi);
+void vp10_rc_get_first_pass_params(struct VP10_COMP *cpi);
+void vp10_first_pass(struct VP10_COMP *cpi,
+ const struct lookahead_entry *source);
+void vp10_end_first_pass(struct VP10_COMP *cpi);
+
+void vp10_init_second_pass(struct VP10_COMP *cpi);
+void vp10_rc_get_second_pass_params(struct VP10_COMP *cpi);
+void vp10_twopass_postencode_update(struct VP10_COMP *cpi);
+
+// Post encode update of the rate control parameters for 2-pass
+void vp10_twopass_postencode_update(struct VP10_COMP *cpi);
+
+void vp10_init_subsampling(struct VP10_COMP *cpi);
+
+void vp10_calculate_coded_size(struct VP10_COMP *cpi, int *scaled_frame_width,
+ int *scaled_frame_height);
+
+#if CONFIG_EXT_REFS
+static inline int get_number_of_extra_arfs(int interval, int arf_pending) {
+ if (arf_pending && MAX_EXT_ARFS > 0)
+ return interval >= MIN_EXT_ARF_INTERVAL * (MAX_EXT_ARFS + 1)
+ ? MAX_EXT_ARFS
+ : interval >= MIN_EXT_ARF_INTERVAL * MAX_EXT_ARFS
+ ? MAX_EXT_ARFS - 1
+ : 0;
+ else
+ return 0;
+}
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_FIRSTPASS_H_
diff --git a/av1/encoder/global_motion.c b/av1/encoder/global_motion.c
new file mode 100644
index 0000000..cda6aaf
--- /dev/null
+++ b/av1/encoder/global_motion.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <math.h>
+#include <assert.h>
+
+#include "av1/common/warped_motion.h"
+
+#include "av1/encoder/segmentation.h"
+#include "av1/encoder/global_motion.h"
+#include "av1/encoder/corner_detect.h"
+#include "av1/encoder/corner_match.h"
+#include "av1/encoder/ransac.h"
+
+#define MAX_CORNERS 4096
+#define MIN_INLIER_PROB 0.1
+
+INLINE RansacType get_ransac_type(TransformationType type) {
+ switch (type) {
+ case HOMOGRAPHY: return ransacHomography;
+ case AFFINE: return ransacAffine;
+ case ROTZOOM: return ransacRotZoom;
+ case TRANSLATION: return ransacTranslation;
+ default: assert(0); return NULL;
+ }
+}
+
+// computes global motion parameters by fitting a model using RANSAC
+static int compute_global_motion_params(TransformationType type,
+ double *correspondences,
+ int num_correspondences, double *params,
+ int *inlier_map) {
+ int result;
+ int num_inliers = 0;
+ RansacType ransac = get_ransac_type(type);
+ if (ransac == NULL) return 0;
+
+ result = ransac(correspondences, num_correspondences, &num_inliers,
+ inlier_map, params);
+ if (!result && num_inliers < MIN_INLIER_PROB * num_correspondences) {
+ result = 1;
+ num_inliers = 0;
+ }
+ return num_inliers;
+}
+
+int compute_global_motion_feature_based(TransformationType type,
+ YV12_BUFFER_CONFIG *frm,
+ YV12_BUFFER_CONFIG *ref,
+ double *params) {
+ int num_frm_corners, num_ref_corners;
+ int num_correspondences;
+ double *correspondences;
+ int num_inliers;
+ int frm_corners[2 * MAX_CORNERS], ref_corners[2 * MAX_CORNERS];
+ int *inlier_map = NULL;
+
+ // compute interest points in images using FAST features
+ num_frm_corners = FastCornerDetect(frm->y_buffer, frm->y_width, frm->y_height,
+ frm->y_stride, frm_corners, MAX_CORNERS);
+ num_ref_corners = FastCornerDetect(ref->y_buffer, ref->y_width, ref->y_height,
+ ref->y_stride, ref_corners, MAX_CORNERS);
+
+ // find correspondences between the two images
+ correspondences =
+ (double *)malloc(num_frm_corners * 4 * sizeof(*correspondences));
+ num_correspondences = determine_correspondence(
+ frm->y_buffer, (int *)frm_corners, num_frm_corners, ref->y_buffer,
+ (int *)ref_corners, num_ref_corners, frm->y_width, frm->y_height,
+ frm->y_stride, ref->y_stride, correspondences);
+
+ inlier_map = (int *)malloc(num_correspondences * sizeof(*inlier_map));
+ num_inliers = compute_global_motion_params(
+ type, correspondences, num_correspondences, params, inlier_map);
+ free(correspondences);
+ free(inlier_map);
+ return (num_inliers > 0);
+}
diff --git a/av1/encoder/global_motion.h b/av1/encoder/global_motion.h
new file mode 100644
index 0000000..ed088d6
--- /dev/null
+++ b/av1/encoder/global_motion.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_GLOBAL_MOTION_H_
+#define VP10_ENCODER_GLOBAL_MOTION_H_
+
+#include "aom/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// compute global motion parameters between two frames
+int compute_global_motion_feature_based(TransformationType type,
+ YV12_BUFFER_CONFIG *frm,
+ YV12_BUFFER_CONFIG *ref,
+ double *params);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif // VP10_ENCODER_GLOBAL_MOTION_H_
diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c
new file mode 100644
index 0000000..ccfab0a
--- /dev/null
+++ b/av1/encoder/hybrid_fwd_txfm.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "av1/common/idct.h"
+#include "av1/encoder/hybrid_fwd_txfm.h"
+
+static INLINE void fdct32x32(int rd_transform, const int16_t *src,
+ tran_low_t *dst, int src_stride) {
+ if (rd_transform)
+ vpx_fdct32x32_rd(src, dst, src_stride);
+ else
+ vpx_fdct32x32(src, dst, src_stride);
+}
+
+static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type, int lossless) {
+ if (lossless) {
+ assert(tx_type == DCT_DCT);
+ vp10_fwht4x4(src_diff, coeff, diff_stride);
+ return;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST: vp10_fht4x4(src_diff, coeff, diff_stride, tx_type); break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST: vp10_fht4x4(src_diff, coeff, diff_stride, tx_type); break;
+ case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0);
+ }
+}
+
+#if CONFIG_EXT_TX
+static void fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void)fwd_txfm_opt;
+ vp10_fht4x8(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void)fwd_txfm_opt;
+ vp10_fht8x4(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void)fwd_txfm_opt;
+ vp10_fht8x16(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void)fwd_txfm_opt;
+ vp10_fht16x8(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void)fwd_txfm_opt;
+ vp10_fht16x32(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ (void)fwd_txfm_opt;
+ vp10_fht32x16(src_diff, coeff, diff_stride, tx_type);
+}
+#endif // CONFIG_EXT_TX
+
+static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL)
+ vp10_fht8x8(src_diff, coeff, diff_stride, tx_type);
+ else // FWD_TXFM_OPT_DC
+ vpx_fdct8x8_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST: vp10_fht8x8(src_diff, coeff, diff_stride, tx_type); break;
+ case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0);
+ }
+}
+
+static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL)
+ vp10_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ else // FWD_TXFM_OPT_DC
+ vpx_fdct16x16_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ vp10_fht16x16(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0);
+ }
+}
+
+static void fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
+ tran_low_t *coeff, int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt) {
+ switch (tx_type) {
+ case DCT_DCT:
+ if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL)
+ fdct32x32(rd_transform, src_diff, coeff, diff_stride);
+ else // FWD_TXFM_OPT_DC
+ vpx_fdct32x32_1(src_diff, coeff, diff_stride);
+ break;
+#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ vp10_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type, int lossless,
+ const int bd) {
+ if (lossless) {
+ assert(tx_type == DCT_DCT);
+ vp10_highbd_fwht4x4(src_diff, coeff, diff_stride);
+ return;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd);
+ break;
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ vp10_highbd_fht4x4_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0);
+ }
+}
+
+#if CONFIG_EXT_TX
+static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
+ (void)fwd_txfm_opt;
+ (void)bd;
+ vp10_highbd_fht4x8(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
+ (void)fwd_txfm_opt;
+ (void)bd;
+ vp10_highbd_fht8x4(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void highbd_fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
+ (void)fwd_txfm_opt;
+ (void)bd;
+ vp10_highbd_fht8x16(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void highbd_fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
+ (void)fwd_txfm_opt;
+ (void)bd;
+ vp10_highbd_fht16x8(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void highbd_fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
+ (void)fwd_txfm_opt;
+ (void)bd;
+ vp10_highbd_fht16x32(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
+ (void)fwd_txfm_opt;
+ (void)bd;
+ vp10_highbd_fht32x16(src_diff, coeff, diff_stride, tx_type);
+}
+#endif // CONFIG_EXT_TX
+
+static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
+ (void)fwd_txfm_opt;
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride, tx_type, bd);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_fwd_txfm2d_8x8(src_diff, coeff, diff_stride, tx_type, bd);
+ break;
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht8x8_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX: vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type); break;
+#endif // CONFIG_EXT_TX
+ default: assert(0);
+ }
+}
+
+static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, TX_TYPE tx_type,
+ FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
+ (void)fwd_txfm_opt;
+ switch (tx_type) {
+ case DCT_DCT:
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride, tx_type, bd);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ vp10_fwd_txfm2d_16x16(src_diff, coeff, diff_stride, tx_type, bd);
+ break;
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ // Use C version since DST exists only in C
+ vp10_highbd_fht16x16_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0);
+ }
+}
+
+static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
+ tran_low_t *coeff, int diff_stride,
+ TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt,
+ const int bd) {
+ (void)rd_transform;
+ (void)fwd_txfm_opt;
+ switch (tx_type) {
+ case DCT_DCT:
+ vp10_fwd_txfm2d_32x32(src_diff, coeff, diff_stride, tx_type, bd);
+ break;
+#if CONFIG_EXT_TX
+ case ADST_DCT:
+ case DCT_ADST:
+ case ADST_ADST:
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case V_DCT:
+ case H_DCT:
+ case V_ADST:
+ case H_ADST:
+ case V_FLIPADST:
+ case H_FLIPADST:
+ vp10_highbd_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
+ break;
+ case IDTX:
+ vp10_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
+ FWD_TXFM_PARAM *fwd_txfm_param) {
+ const int fwd_txfm_opt = fwd_txfm_param->fwd_txfm_opt;
+ const TX_TYPE tx_type = fwd_txfm_param->tx_type;
+ const TX_SIZE tx_size = fwd_txfm_param->tx_size;
+ const int rd_transform = fwd_txfm_param->rd_transform;
+ const int lossless = fwd_txfm_param->lossless;
+ switch (tx_size) {
+ case TX_32X32:
+ fwd_txfm_32x32(rd_transform, src_diff, coeff, diff_stride, tx_type,
+ fwd_txfm_opt);
+ break;
+ case TX_16X16:
+ fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+ case TX_8X8:
+ fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ fwd_txfm_4x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+ case TX_8X4:
+ fwd_txfm_8x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+ case TX_8X16:
+ fwd_txfm_8x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+ case TX_16X8:
+ fwd_txfm_16x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+ case TX_16X32:
+ fwd_txfm_16x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+ case TX_32X16:
+ fwd_txfm_32x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+ break;
+#endif // CONFIG_EXT_TX
+ case TX_4X4:
+ fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless);
+ break;
+ default: assert(0); break;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param) {
+ const int fwd_txfm_opt = fwd_txfm_param->fwd_txfm_opt;
+ const TX_TYPE tx_type = fwd_txfm_param->tx_type;
+ const TX_SIZE tx_size = fwd_txfm_param->tx_size;
+ const int rd_transform = fwd_txfm_param->rd_transform;
+ const int lossless = fwd_txfm_param->lossless;
+ const int bd = fwd_txfm_param->bd;
+ switch (tx_size) {
+ case TX_32X32:
+ highbd_fwd_txfm_32x32(rd_transform, src_diff, coeff, diff_stride, tx_type,
+ fwd_txfm_opt, bd);
+ break;
+ case TX_16X16:
+ highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
+ bd);
+ break;
+ case TX_8X8:
+ highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
+ bd);
+ break;
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ highbd_fwd_txfm_4x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
+ bd);
+ break;
+ case TX_8X4:
+ highbd_fwd_txfm_8x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
+ bd);
+ break;
+ case TX_8X16:
+ highbd_fwd_txfm_8x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
+ bd);
+ break;
+ case TX_16X8:
+ highbd_fwd_txfm_16x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
+ bd);
+ break;
+ case TX_16X32:
+ highbd_fwd_txfm_16x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
+ bd);
+ break;
+ case TX_32X16:
+ highbd_fwd_txfm_32x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
+ bd);
+ break;
+#endif // CONFIG_EXT_TX
+ case TX_4X4:
+ highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless, bd);
+ break;
+ default: assert(0); break;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/encoder/hybrid_fwd_txfm.h b/av1/encoder/hybrid_fwd_txfm.h
new file mode 100644
index 0000000..07b832c
--- /dev/null
+++ b/av1/encoder/hybrid_fwd_txfm.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_HYBRID_FWD_TXFM_H_
+#define VP10_ENCODER_HYBRID_FWD_TXFM_H_
+
+#include "./vpx_config.h"
+
+typedef enum FWD_TXFM_OPT { FWD_TXFM_OPT_NORMAL, FWD_TXFM_OPT_DC } FWD_TXFM_OPT;
+
+typedef struct FWD_TXFM_PARAM {
+ TX_TYPE tx_type;
+ TX_SIZE tx_size;
+ FWD_TXFM_OPT fwd_txfm_opt;
+ int rd_transform;
+ int lossless;
+#if CONFIG_VP9_HIGHBITDEPTH
+ int bd;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+} FWD_TXFM_PARAM;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
+ FWD_TXFM_PARAM *fwd_txfm_param);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
+ int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_HYBRID_FWD_TXFM_H_
diff --git a/av1/encoder/lookahead.c b/av1/encoder/lookahead.c
new file mode 100644
index 0000000..3c4ff7d
--- /dev/null
+++ b/av1/encoder/lookahead.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <assert.h>
+#include <stdlib.h>
+
+#include "./vpx_config.h"
+
+#include "av1/common/common.h"
+
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/extend.h"
+#include "av1/encoder/lookahead.h"
+
+/* Return the buffer at the given absolute index and increment the index */
+static struct lookahead_entry *pop(struct lookahead_ctx *ctx, int *idx) {
+ int index = *idx;
+ struct lookahead_entry *buf = ctx->buf + index;
+
+ assert(index < ctx->max_sz);
+ if (++index >= ctx->max_sz) index -= ctx->max_sz;
+ *idx = index;
+ return buf;
+}
+
+void vp10_lookahead_destroy(struct lookahead_ctx *ctx) {
+ if (ctx) {
+ if (ctx->buf) {
+ int i;
+
+ for (i = 0; i < ctx->max_sz; i++) vpx_free_frame_buffer(&ctx->buf[i].img);
+ free(ctx->buf);
+ }
+ free(ctx);
+ }
+}
+
+struct lookahead_ctx *vp10_lookahead_init(unsigned int width,
+ unsigned int height,
+ unsigned int subsampling_x,
+ unsigned int subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ int use_highbitdepth,
+#endif
+ unsigned int depth) {
+ struct lookahead_ctx *ctx = NULL;
+
+ // Clamp the lookahead queue depth
+ depth = clamp(depth, 1, MAX_LAG_BUFFERS);
+
+ // Allocate memory to keep previous source frames available.
+ depth += MAX_PRE_FRAMES;
+
+ // Allocate the lookahead structures
+ ctx = calloc(1, sizeof(*ctx));
+ if (ctx) {
+ const int legacy_byte_alignment = 0;
+ unsigned int i;
+ ctx->max_sz = depth;
+ ctx->buf = calloc(depth, sizeof(*ctx->buf));
+ if (!ctx->buf) goto bail;
+ for (i = 0; i < depth; i++)
+ if (vpx_alloc_frame_buffer(
+ &ctx->buf[i].img, width, height, subsampling_x, subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ use_highbitdepth,
+#endif
+ VPX_ENC_BORDER_IN_PIXELS, legacy_byte_alignment))
+ goto bail;
+ }
+ return ctx;
+bail:
+ vp10_lookahead_destroy(ctx);
+ return NULL;
+}
+
+#define USE_PARTIAL_COPY 0
+
+int vp10_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
+ int64_t ts_start, int64_t ts_end,
+#if CONFIG_VP9_HIGHBITDEPTH
+ int use_highbitdepth,
+#endif
+ unsigned int flags) {
+ struct lookahead_entry *buf;
+#if USE_PARTIAL_COPY
+ int row, col, active_end;
+ int mb_rows = (src->y_height + 15) >> 4;
+ int mb_cols = (src->y_width + 15) >> 4;
+#endif
+ int width = src->y_crop_width;
+ int height = src->y_crop_height;
+ int uv_width = src->uv_crop_width;
+ int uv_height = src->uv_crop_height;
+ int subsampling_x = src->subsampling_x;
+ int subsampling_y = src->subsampling_y;
+ int larger_dimensions, new_dimensions;
+
+ if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1;
+ ctx->sz++;
+ buf = pop(ctx, &ctx->write_idx);
+
+ new_dimensions = width != buf->img.y_crop_width ||
+ height != buf->img.y_crop_height ||
+ uv_width != buf->img.uv_crop_width ||
+ uv_height != buf->img.uv_crop_height;
+ larger_dimensions = width > buf->img.y_width || height > buf->img.y_height ||
+ uv_width > buf->img.uv_width ||
+ uv_height > buf->img.uv_height;
+ assert(!larger_dimensions || new_dimensions);
+
+#if USE_PARTIAL_COPY
+ // TODO(jkoleszar): This is disabled for now, as
+ // vp10_copy_and_extend_frame_with_rect is not subsampling/alpha aware.
+
+ // Only do this partial copy if the following conditions are all met:
+ // 1. Lookahead queue has has size of 1.
+ // 2. Active map is provided.
+ // 3. This is not a key frame, golden nor altref frame.
+ if (!new_dimensions && ctx->max_sz == 1 && active_map && !flags) {
+ for (row = 0; row < mb_rows; ++row) {
+ col = 0;
+
+ while (1) {
+ // Find the first active macroblock in this row.
+ for (; col < mb_cols; ++col) {
+ if (active_map[col]) break;
+ }
+
+ // No more active macroblock in this row.
+ if (col == mb_cols) break;
+
+ // Find the end of active region in this row.
+ active_end = col;
+
+ for (; active_end < mb_cols; ++active_end) {
+ if (!active_map[active_end]) break;
+ }
+
+ // Only copy this active region.
+ vp10_copy_and_extend_frame_with_rect(src, &buf->img, row << 4, col << 4,
+ 16, (active_end - col) << 4);
+
+ // Start again from the end of this active region.
+ col = active_end;
+ }
+
+ active_map += mb_cols;
+ }
+ } else {
+#endif
+ if (larger_dimensions) {
+ YV12_BUFFER_CONFIG new_img;
+ memset(&new_img, 0, sizeof(new_img));
+ if (vpx_alloc_frame_buffer(&new_img, width, height, subsampling_x,
+ subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ use_highbitdepth,
+#endif
+ VPX_ENC_BORDER_IN_PIXELS, 0))
+ return 1;
+ vpx_free_frame_buffer(&buf->img);
+ buf->img = new_img;
+ } else if (new_dimensions) {
+ buf->img.y_crop_width = src->y_crop_width;
+ buf->img.y_crop_height = src->y_crop_height;
+ buf->img.uv_crop_width = src->uv_crop_width;
+ buf->img.uv_crop_height = src->uv_crop_height;
+ buf->img.subsampling_x = src->subsampling_x;
+ buf->img.subsampling_y = src->subsampling_y;
+ }
+ // Partial copy not implemented yet
+ vp10_copy_and_extend_frame(src, &buf->img);
+#if USE_PARTIAL_COPY
+ }
+#endif
+
+ buf->ts_start = ts_start;
+ buf->ts_end = ts_end;
+ buf->flags = flags;
+ return 0;
+}
+
+struct lookahead_entry *vp10_lookahead_pop(struct lookahead_ctx *ctx,
+ int drain) {
+ struct lookahead_entry *buf = NULL;
+
+ if (ctx && ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
+ buf = pop(ctx, &ctx->read_idx);
+ ctx->sz--;
+ }
+ return buf;
+}
+
+struct lookahead_entry *vp10_lookahead_peek(struct lookahead_ctx *ctx,
+ int index) {
+ struct lookahead_entry *buf = NULL;
+
+ if (index >= 0) {
+ // Forward peek
+ if (index < ctx->sz) {
+ index += ctx->read_idx;
+ if (index >= ctx->max_sz) index -= ctx->max_sz;
+ buf = ctx->buf + index;
+ }
+ } else if (index < 0) {
+ // Backward peek
+ if (-index <= MAX_PRE_FRAMES) {
+ index += ctx->read_idx;
+ if (index < 0) index += ctx->max_sz;
+ buf = ctx->buf + index;
+ }
+ }
+
+ return buf;
+}
+
+unsigned int vp10_lookahead_depth(struct lookahead_ctx *ctx) { return ctx->sz; }
diff --git a/av1/encoder/lookahead.h b/av1/encoder/lookahead.h
new file mode 100644
index 0000000..4b26068
--- /dev/null
+++ b/av1/encoder/lookahead.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_LOOKAHEAD_H_
+#define VP10_ENCODER_LOOKAHEAD_H_
+
+#include "aom_scale/yv12config.h"
+#include "aom/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_LAG_BUFFERS 25
+
+struct lookahead_entry {
+ YV12_BUFFER_CONFIG img;
+ int64_t ts_start;
+ int64_t ts_end;
+ unsigned int flags;
+};
+
+// The max of past frames we want to keep in the queue.
+#define MAX_PRE_FRAMES 1
+
+struct lookahead_ctx {
+ int max_sz; /* Absolute size of the queue */
+ int sz; /* Number of buffers currently in the queue */
+ int read_idx; /* Read index */
+ int write_idx; /* Write index */
+ struct lookahead_entry *buf; /* Buffer list */
+};
+
+/**\brief Initializes the lookahead stage
+ *
+ * The lookahead stage is a queue of frame buffers on which some analysis
+ * may be done when buffers are enqueued.
+ */
+struct lookahead_ctx *vp10_lookahead_init(unsigned int width,
+ unsigned int height,
+ unsigned int subsampling_x,
+ unsigned int subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ int use_highbitdepth,
+#endif
+ unsigned int depth);
+
+/**\brief Destroys the lookahead stage
+ */
+void vp10_lookahead_destroy(struct lookahead_ctx *ctx);
+
+/**\brief Enqueue a source buffer
+ *
+ * This function will copy the source image into a new framebuffer with
+ * the expected stride/border.
+ *
+ * If active_map is non-NULL and there is only one frame in the queue, then copy
+ * only active macroblocks.
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ * \param[in] src Pointer to the image to enqueue
+ * \param[in] ts_start Timestamp for the start of this frame
+ * \param[in] ts_end Timestamp for the end of this frame
+ * \param[in] flags Flags set on this frame
+ * \param[in] active_map Map that specifies which macroblock is active
+ */
+int vp10_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
+ int64_t ts_start, int64_t ts_end,
+#if CONFIG_VP9_HIGHBITDEPTH
+ int use_highbitdepth,
+#endif
+ unsigned int flags);
+
+/**\brief Get the next source buffer to encode
+ *
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ * \param[in] drain Flag indicating the buffer should be drained
+ * (return a buffer regardless of the current queue depth)
+ *
+ * \retval NULL, if drain set and queue is empty
+ * \retval NULL, if drain not set and queue not of the configured depth
+ */
+struct lookahead_entry *vp10_lookahead_pop(struct lookahead_ctx *ctx,
+ int drain);
+
+/**\brief Get a future source buffer to encode
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ * \param[in] index Index of the frame to be returned, 0 == next frame
+ *
+ * \retval NULL, if no buffer exists at the specified index
+ */
+struct lookahead_entry *vp10_lookahead_peek(struct lookahead_ctx *ctx,
+ int index);
+
+/**\brief Get the number of frames currently in the lookahead queue
+ *
+ * \param[in] ctx Pointer to the lookahead context
+ */
+unsigned int vp10_lookahead_depth(struct lookahead_ctx *ctx);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_LOOKAHEAD_H_
diff --git a/av1/encoder/mbgraph.c b/av1/encoder/mbgraph.c
new file mode 100644
index 0000000..43f0f87
--- /dev/null
+++ b/av1/encoder/mbgraph.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/system_state.h"
+#include "av1/encoder/segmentation.h"
+#include "av1/encoder/mcomp.h"
+#include "av1/common/blockd.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/reconintra.h"
+
+static unsigned int do_16x16_motion_iteration(VP10_COMP *cpi, const MV *ref_mv,
+ int mb_row, int mb_col) {
+ MACROBLOCK *const x = &cpi->td.mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
+ const vpx_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
+
+ const int tmp_col_min = x->mv_col_min;
+ const int tmp_col_max = x->mv_col_max;
+ const int tmp_row_min = x->mv_row_min;
+ const int tmp_row_max = x->mv_row_max;
+ MV ref_full;
+ int cost_list[5];
+
+ // Further step/diamond searches as necessary
+ int step_param = mv_sf->reduce_first_step_size;
+ step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
+
+ vp10_set_mv_search_range(x, ref_mv);
+
+ ref_full.col = ref_mv->col >> 3;
+ ref_full.row = ref_mv->row >> 3;
+
+ /*cpi->sf.search_method == HEX*/
+ vp10_hex_search(x, &ref_full, step_param, x->errorperbit, 0,
+ cond_cost_list(cpi, cost_list), &v_fn_ptr, 0, ref_mv);
+
+ // Try sub-pixel MC
+ // if (bestsme > error_thresh && bestsme < INT_MAX)
+ {
+ int distortion;
+ unsigned int sse;
+ cpi->find_fractional_mv_step(x, ref_mv, cpi->common.allow_high_precision_mv,
+ x->errorperbit, &v_fn_ptr, 0,
+ mv_sf->subpel_iters_per_step,
+ cond_cost_list(cpi, cost_list), NULL, NULL,
+ &distortion, &sse, NULL, 0, 0, 0);
+ }
+
+#if CONFIG_EXT_INTER
+ if (has_second_ref(&xd->mi[0]->mbmi))
+ xd->mi[0]->mbmi.mode = NEW_NEWMV;
+ else
+#endif // CONFIG_EXT_INTER
+ xd->mi[0]->mbmi.mode = NEWMV;
+
+ xd->mi[0]->mbmi.mv[0] = x->best_mv;
+#if CONFIG_EXT_INTER
+ xd->mi[0]->mbmi.ref_frame[1] = NONE;
+#endif // CONFIG_EXT_INTER
+
+ vp10_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16);
+
+ /* restore UMV window */
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+
+ return vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
+ xd->plane[0].dst.buf, xd->plane[0].dst.stride);
+}
+
+static int do_16x16_motion_search(VP10_COMP *cpi, const MV *ref_mv, int mb_row,
+ int mb_col) {
+ MACROBLOCK *const x = &cpi->td.mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ unsigned int err, tmp_err;
+ MV best_mv;
+
+ // Try zero MV first
+ // FIXME should really use something like near/nearest MV and/or MV prediction
+ err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
+ xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
+ best_mv.col = best_mv.row = 0;
+
+ // Test last reference frame using the previous best mv as the
+ // starting point (best reference) for the search
+ tmp_err = do_16x16_motion_iteration(cpi, ref_mv, mb_row, mb_col);
+ if (tmp_err < err) {
+ err = tmp_err;
+ best_mv = x->best_mv.as_mv;
+ }
+
+ // If the current best reference mv is not centered on 0,0 then do a 0,0
+ // based search as well.
+ if (ref_mv->row != 0 || ref_mv->col != 0) {
+ unsigned int tmp_err;
+ MV zero_ref_mv = { 0, 0 };
+
+ tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, mb_row, mb_col);
+ if (tmp_err < err) {
+ err = tmp_err;
+ best_mv = x->best_mv.as_mv;
+ }
+ }
+
+ x->best_mv.as_mv = best_mv;
+ return err;
+}
+
+static int do_16x16_zerozero_search(VP10_COMP *cpi, int_mv *dst_mv) {
+ MACROBLOCK *const x = &cpi->td.mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ unsigned int err;
+
+ // Try zero MV first
+ // FIXME should really use something like near/nearest MV and/or MV prediction
+ err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
+ xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride);
+
+ dst_mv->as_int = 0;
+
+ return err;
+}
+static int find_best_16x16_intra(VP10_COMP *cpi, PREDICTION_MODE *pbest_mode) {
+ MACROBLOCK *const x = &cpi->td.mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ PREDICTION_MODE best_mode = -1, mode;
+ unsigned int best_err = INT_MAX;
+
+ // calculate SATD for each intra prediction mode;
+ // we're intentionally not doing 4x4, we just want a rough estimate
+ for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+ unsigned int err;
+
+ xd->mi[0]->mbmi.mode = mode;
+ vp10_predict_intra_block(xd, 2, 2, TX_16X16, mode, x->plane[0].src.buf,
+ x->plane[0].src.stride, xd->plane[0].dst.buf,
+ xd->plane[0].dst.stride, 0, 0, 0);
+ err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
+ xd->plane[0].dst.buf, xd->plane[0].dst.stride);
+
+ // find best
+ if (err < best_err) {
+ best_err = err;
+ best_mode = mode;
+ }
+ }
+
+ if (pbest_mode) *pbest_mode = best_mode;
+
+ return best_err;
+}
+
+static void update_mbgraph_mb_stats(VP10_COMP *cpi, MBGRAPH_MB_STATS *stats,
+ YV12_BUFFER_CONFIG *buf, int mb_y_offset,
+ YV12_BUFFER_CONFIG *golden_ref,
+ const MV *prev_golden_ref_mv,
+ YV12_BUFFER_CONFIG *alt_ref, int mb_row,
+ int mb_col) {
+ MACROBLOCK *const x = &cpi->td.mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int intra_error;
+ VP10_COMMON *cm = &cpi->common;
+
+ // FIXME in practice we're completely ignoring chroma here
+ x->plane[0].src.buf = buf->y_buffer + mb_y_offset;
+ x->plane[0].src.stride = buf->y_stride;
+
+ xd->plane[0].dst.buf = get_frame_new_buffer(cm)->y_buffer + mb_y_offset;
+ xd->plane[0].dst.stride = get_frame_new_buffer(cm)->y_stride;
+
+ // do intra 16x16 prediction
+ intra_error = find_best_16x16_intra(cpi, &stats->ref[INTRA_FRAME].m.mode);
+ if (intra_error <= 0) intra_error = 1;
+ stats->ref[INTRA_FRAME].err = intra_error;
+
+ // Golden frame MV search, if it exists and is different than last frame
+ if (golden_ref) {
+ int g_motion_error;
+ xd->plane[0].pre[0].buf = golden_ref->y_buffer + mb_y_offset;
+ xd->plane[0].pre[0].stride = golden_ref->y_stride;
+ g_motion_error =
+ do_16x16_motion_search(cpi, prev_golden_ref_mv, mb_row, mb_col);
+ stats->ref[GOLDEN_FRAME].m.mv = x->best_mv;
+ stats->ref[GOLDEN_FRAME].err = g_motion_error;
+ } else {
+ stats->ref[GOLDEN_FRAME].err = INT_MAX;
+ stats->ref[GOLDEN_FRAME].m.mv.as_int = 0;
+ }
+
+ // Do an Alt-ref frame MV search, if it exists and is different than
+ // last/golden frame.
+ if (alt_ref) {
+ int a_motion_error;
+ xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset;
+ xd->plane[0].pre[0].stride = alt_ref->y_stride;
+ a_motion_error =
+ do_16x16_zerozero_search(cpi, &stats->ref[ALTREF_FRAME].m.mv);
+
+ stats->ref[ALTREF_FRAME].err = a_motion_error;
+ } else {
+ stats->ref[ALTREF_FRAME].err = INT_MAX;
+ stats->ref[ALTREF_FRAME].m.mv.as_int = 0;
+ }
+}
+
+static void update_mbgraph_frame_stats(VP10_COMP *cpi,
+ MBGRAPH_FRAME_STATS *stats,
+ YV12_BUFFER_CONFIG *buf,
+ YV12_BUFFER_CONFIG *golden_ref,
+ YV12_BUFFER_CONFIG *alt_ref) {
+ MACROBLOCK *const x = &cpi->td.mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ VP10_COMMON *const cm = &cpi->common;
+
+ int mb_col, mb_row, offset = 0;
+ int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
+ MV gld_top_mv = { 0, 0 };
+ MODE_INFO mi_local;
+
+ vp10_zero(mi_local);
+ // Set up limit values for motion vectors to prevent them extending outside
+ // the UMV borders.
+ x->mv_row_min = -BORDER_MV_PIXELS_B16;
+ x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
+ xd->up_available = 0;
+ xd->plane[0].dst.stride = buf->y_stride;
+ xd->plane[0].pre[0].stride = buf->y_stride;
+ xd->plane[1].dst.stride = buf->uv_stride;
+ xd->mi[0] = &mi_local;
+ mi_local.mbmi.sb_type = BLOCK_16X16;
+ mi_local.mbmi.ref_frame[0] = LAST_FRAME;
+ mi_local.mbmi.ref_frame[1] = NONE;
+
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
+ MV gld_left_mv = gld_top_mv;
+ int mb_y_in_offset = mb_y_offset;
+ int arf_y_in_offset = arf_y_offset;
+ int gld_y_in_offset = gld_y_offset;
+
+ // Set up limit values for motion vectors to prevent them extending outside
+ // the UMV borders.
+ x->mv_col_min = -BORDER_MV_PIXELS_B16;
+ x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
+ xd->left_available = 0;
+
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
+ MBGRAPH_MB_STATS *mb_stats = &stats->mb_stats[offset + mb_col];
+
+ update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset, golden_ref,
+ &gld_left_mv, alt_ref, mb_row, mb_col);
+ gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv;
+ if (mb_col == 0) {
+ gld_top_mv = gld_left_mv;
+ }
+ xd->left_available = 1;
+ mb_y_in_offset += 16;
+ gld_y_in_offset += 16;
+ arf_y_in_offset += 16;
+ x->mv_col_min -= 16;
+ x->mv_col_max -= 16;
+ }
+ xd->up_available = 1;
+ mb_y_offset += buf->y_stride * 16;
+ gld_y_offset += golden_ref->y_stride * 16;
+ if (alt_ref) arf_y_offset += alt_ref->y_stride * 16;
+ x->mv_row_min -= 16;
+ x->mv_row_max -= 16;
+ offset += cm->mb_cols;
+ }
+}
+
+// void separate_arf_mbs_byzz
+static void separate_arf_mbs(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ int mb_col, mb_row, offset, i;
+ int mi_row, mi_col;
+ int ncnt[4] = { 0 };
+ int n_frames = cpi->mbgraph_n_frames;
+
+ int *arf_not_zz;
+
+ CHECK_MEM_ERROR(
+ cm, arf_not_zz,
+ vpx_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz), 1));
+
+ // We are not interested in results beyond the alt ref itself.
+ if (n_frames > cpi->rc.frames_till_gf_update_due)
+ n_frames = cpi->rc.frames_till_gf_update_due;
+
+ // defer cost to reference frames
+ for (i = n_frames - 1; i >= 0; i--) {
+ MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
+
+ for (offset = 0, mb_row = 0; mb_row < cm->mb_rows;
+ offset += cm->mb_cols, mb_row++) {
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
+ MBGRAPH_MB_STATS *mb_stats = &frame_stats->mb_stats[offset + mb_col];
+
+ int altref_err = mb_stats->ref[ALTREF_FRAME].err;
+ int intra_err = mb_stats->ref[INTRA_FRAME].err;
+ int golden_err = mb_stats->ref[GOLDEN_FRAME].err;
+
+ // Test for altref vs intra and gf and that its mv was 0,0.
+ if (altref_err > 1000 || altref_err > intra_err ||
+ altref_err > golden_err) {
+ arf_not_zz[offset + mb_col]++;
+ }
+ }
+ }
+ }
+
+ // arf_not_zz is indexed by MB, but this loop is indexed by MI to avoid out
+ // of bound access in segmentation_map
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
+ // If any of the blocks in the sequence failed then the MB
+ // goes in segment 0
+ if (arf_not_zz[mi_row / 2 * cm->mb_cols + mi_col / 2]) {
+ ncnt[0]++;
+ cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 0;
+ } else {
+ cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 1;
+ ncnt[1]++;
+ }
+ }
+ }
+
+ // Only bother with segmentation if over 10% of the MBs in static segment
+ // if ( ncnt[1] && (ncnt[0] / ncnt[1] < 10) )
+ if (1) {
+ // Note % of blocks that are marked as static
+ if (cm->MBs)
+ cpi->static_mb_pct = (ncnt[1] * 100) / (cm->mi_rows * cm->mi_cols);
+
+ // This error case should not be reachable as this function should
+ // never be called with the common data structure uninitialized.
+ else
+ cpi->static_mb_pct = 0;
+
+ vp10_enable_segmentation(&cm->seg);
+ } else {
+ cpi->static_mb_pct = 0;
+ vp10_disable_segmentation(&cm->seg);
+ }
+
+ // Free localy allocated storage
+ vpx_free(arf_not_zz);
+}
+
+void vp10_update_mbgraph_stats(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ int i, n_frames = vp10_lookahead_depth(cpi->lookahead);
+ YV12_BUFFER_CONFIG *golden_ref = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+
+ assert(golden_ref != NULL);
+
+ // we need to look ahead beyond where the ARF transitions into
+ // being a GF - so exit if we don't look ahead beyond that
+ if (n_frames <= cpi->rc.frames_till_gf_update_due) return;
+
+ if (n_frames > MAX_LAG_BUFFERS) n_frames = MAX_LAG_BUFFERS;
+
+ cpi->mbgraph_n_frames = n_frames;
+ for (i = 0; i < n_frames; i++) {
+ MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
+ memset(frame_stats->mb_stats, 0,
+ cm->mb_rows * cm->mb_cols * sizeof(*cpi->mbgraph_stats[i].mb_stats));
+ }
+
+ // do motion search to find contribution of each reference to data
+ // later on in this GF group
+ // FIXME really, the GF/last MC search should be done forward, and
+ // the ARF MC search backwards, to get optimal results for MV caching
+ for (i = 0; i < n_frames; i++) {
+ MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i];
+ struct lookahead_entry *q_cur = vp10_lookahead_peek(cpi->lookahead, i);
+
+ assert(q_cur != NULL);
+
+ update_mbgraph_frame_stats(cpi, frame_stats, &q_cur->img, golden_ref,
+ cpi->Source);
+ }
+
+ vpx_clear_system_state();
+
+ separate_arf_mbs(cpi);
+}
diff --git a/av1/encoder/mbgraph.h b/av1/encoder/mbgraph.h
new file mode 100644
index 0000000..0b056af
--- /dev/null
+++ b/av1/encoder/mbgraph.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_MBGRAPH_H_
+#define VP10_ENCODER_MBGRAPH_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ struct {
+ int err;
+ union {
+ int_mv mv;
+ PREDICTION_MODE mode;
+ } m;
+ } ref[TOTAL_REFS_PER_FRAME];
+} MBGRAPH_MB_STATS;
+
+typedef struct { MBGRAPH_MB_STATS *mb_stats; } MBGRAPH_FRAME_STATS;
+
+struct VP10_COMP;
+
+void vp10_update_mbgraph_stats(struct VP10_COMP *cpi);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_MBGRAPH_H_
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
new file mode 100644
index 0000000..9ee06e9
--- /dev/null
+++ b/av1/encoder/mcomp.c
@@ -0,0 +1,3417 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+
+#include "av1/common/common.h"
+#include "av1/common/reconinter.h"
+
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/mcomp.h"
+#include "av1/encoder/rdopt.h"
+
+// #define NEW_DIAMOND_SEARCH
+
+static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
+ const MV *mv) {
+ return &buf->buf[mv->row * buf->stride + mv->col];
+}
+
+void vp10_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
+ int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
+ int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
+ int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
+ int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
+
+ col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1);
+ row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1);
+ col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1);
+ row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1);
+
+ // Get intersection of UMV window and valid MV window to reduce # of checks
+ // in diamond search.
+ if (x->mv_col_min < col_min) x->mv_col_min = col_min;
+ if (x->mv_col_max > col_max) x->mv_col_max = col_max;
+ if (x->mv_row_min < row_min) x->mv_row_min = row_min;
+ if (x->mv_row_max > row_max) x->mv_row_max = row_max;
+}
+
+int vp10_init_search_range(int size) {
+ int sr = 0;
+ // Minimum search size no matter what the passed in value.
+ size = VPXMAX(16, size);
+
+ while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
+
+ sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2);
+ return sr;
+}
+
+static INLINE int mv_cost(const MV *mv, const int *joint_cost,
+ int *const comp_cost[2]) {
+ return joint_cost[vp10_get_mv_joint(mv)] + comp_cost[0][mv->row] +
+ comp_cost[1][mv->col];
+}
+
+int vp10_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
+ int *mvcost[2], int weight) {
+ const MV diff = { mv->row - ref->row, mv->col - ref->col };
+ return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
+}
+
+#define PIXEL_TRANSFORM_ERROR_SCALE 4
+static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
+ int *mvcost[2], int error_per_bit) {
+ if (mvcost) {
+ const MV diff = { mv->row - ref->row, mv->col - ref->col };
+ // This product sits at a 32-bit ceiling right now and any additional
+ // accuracy in either bit cost or error cost will cause it to overflow.
+ return ROUND_POWER_OF_TWO(
+ (unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
+ RDDIV_BITS + VP10_PROB_COST_SHIFT - RD_EPB_SHIFT +
+ PIXEL_TRANSFORM_ERROR_SCALE);
+ }
+ return 0;
+}
+
+static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
+ int sad_per_bit) {
+ const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 };
+ return ROUND_POWER_OF_TWO(
+ (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->mvsadcost) * sad_per_bit,
+ VP10_PROB_COST_SHIFT);
+}
+
+void vp10_init_dsmotion_compensation(search_site_config *cfg, int stride) {
+ int len, ss_count = 1;
+
+ cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
+ cfg->ss[0].offset = 0;
+
+ for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
+ // Generate offsets for 4 search sites per step.
+ const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
+ int i;
+ for (i = 0; i < 4; ++i) {
+ search_site *const ss = &cfg->ss[ss_count++];
+ ss->mv = ss_mvs[i];
+ ss->offset = ss->mv.row * stride + ss->mv.col;
+ }
+ }
+
+ cfg->ss_count = ss_count;
+ cfg->searches_per_step = 4;
+}
+
+void vp10_init3smotion_compensation(search_site_config *cfg, int stride) {
+ int len, ss_count = 1;
+
+ cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
+ cfg->ss[0].offset = 0;
+
+ for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
+ // Generate offsets for 8 search sites per step.
+ const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len },
+ { 0, len }, { -len, -len }, { -len, len },
+ { len, -len }, { len, len } };
+ int i;
+ for (i = 0; i < 8; ++i) {
+ search_site *const ss = &cfg->ss[ss_count++];
+ ss->mv = ss_mvs[i];
+ ss->offset = ss->mv.row * stride + ss->mv.col;
+ }
+ }
+
+ cfg->ss_count = ss_count;
+ cfg->searches_per_step = 8;
+}
+
+/*
+ * To avoid the penalty for crossing cache-line read, preload the reference
+ * area in a small buffer, which is aligned to make sure there won't be crossing
+ * cache-line read while reading from this buffer. This reduced the cpu
+ * cycles spent on reading ref data in sub-pixel filter functions.
+ * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
+ * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
+ * could reduce the area.
+ */
+
+// convert motion vector component to offset for sv[a]f calc
+static INLINE int sp(int x) { return x & 7; }
+
+static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
+ return &buf[(r >> 3) * stride + (c >> 3)];
+}
+
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ MV this_mv = { r, c }; \
+ v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
+ if (second_pred == NULL) \
+ thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
+ src_address, src_stride, &sse); \
+ else \
+ thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
+ src_address, src_stride, &sse, second_pred); \
+ v += thismse; \
+ if (v < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+
+#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
+
+static INLINE const uint8_t *upre(const uint8_t *buf, int stride, int r,
+ int c) {
+ return &buf[(r)*stride + (c)];
+}
+
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ MV this_mv = { r, c }; \
+ thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, \
+ upre(y, y_stride, r, c), y_stride, \
+ second_pred, w, h, &sse); \
+ v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
+ v += thismse; \
+ if (v < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+
+#define FIRST_LEVEL_CHECKS \
+ { \
+ unsigned int left, right, up, down, diag; \
+ CHECK_BETTER(left, tr, tc - hstep); \
+ CHECK_BETTER(right, tr, tc + hstep); \
+ CHECK_BETTER(up, tr - hstep, tc); \
+ CHECK_BETTER(down, tr + hstep, tc); \
+ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \
+ switch (whichdir) { \
+ case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
+ case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
+ case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
+ case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
+ } \
+ }
+
+#define SECOND_LEVEL_CHECKS \
+ { \
+ int kr, kc; \
+ unsigned int second; \
+ if (tr != br && tc != bc) { \
+ kr = br - tr; \
+ kc = bc - tc; \
+ CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
+ CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
+ } else if (tr == br && tc != bc) { \
+ kc = bc - tc; \
+ CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
+ CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
+ switch (whichdir) { \
+ case 0: \
+ case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
+ case 2: \
+ case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
+ } \
+ } else if (tr != br && tc == bc) { \
+ kr = br - tr; \
+ CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
+ CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
+ switch (whichdir) { \
+ case 0: \
+ case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
+ case 1: \
+ case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
+ } \
+ } \
+ }
+
+// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
+// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
+// later in the same way.
+#define SECOND_LEVEL_CHECKS_BEST(k) \
+ { \
+ unsigned int second; \
+ int br0 = br; \
+ int bc0 = bc; \
+ assert(tr == br || tc == bc); \
+ if (tr == br && tc != bc) { \
+ kc = bc - tc; \
+ } else if (tr != br && tc == bc) { \
+ kr = br - tr; \
+ } \
+ CHECK_BETTER##k(second, br0 + kr, bc0); \
+ CHECK_BETTER##k(second, br0, bc0 + kc); \
+ if (br0 != br || bc0 != bc) { \
+ CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \
+ } \
+ }
+
+#define SETUP_SUBPEL_SEARCH \
+ const uint8_t *const src_address = x->plane[0].src.buf; \
+ const int src_stride = x->plane[0].src.stride; \
+ const MACROBLOCKD *xd = &x->e_mbd; \
+ unsigned int besterr = INT_MAX; \
+ unsigned int sse; \
+ unsigned int whichdir; \
+ int thismse; \
+ MV *bestmv = &x->best_mv.as_mv; \
+ const unsigned int halfiters = iters_per_step; \
+ const unsigned int quarteriters = iters_per_step; \
+ const unsigned int eighthiters = iters_per_step; \
+ const int y_stride = xd->plane[0].pre[0].stride; \
+ const int offset = bestmv->row * y_stride + bestmv->col; \
+ const uint8_t *const y = xd->plane[0].pre[0].buf; \
+ \
+ int br = bestmv->row * 8; \
+ int bc = bestmv->col * 8; \
+ int hstep = 4; \
+ const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); \
+ const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); \
+ const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); \
+ const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); \
+ int tr = br; \
+ int tc = bc; \
+ \
+ bestmv->row *= 8; \
+ bestmv->col *= 8;
+
+static unsigned int setup_center_error(
+ const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
+ int error_per_bit, const vpx_variance_fn_ptr_t *vfp,
+ const uint8_t *const src, const int src_stride, const uint8_t *const y,
+ int y_stride, const uint8_t *second_pred, int w, int h, int offset,
+ int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion) {
+ unsigned int besterr;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (second_pred != NULL) {
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
+ vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
+ y_stride);
+ besterr =
+ vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
+ } else {
+ DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
+ vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+ besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
+ }
+ } else {
+ besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
+ }
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+#else
+ (void)xd;
+ if (second_pred != NULL) {
+ DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
+ vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+ besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
+ } else {
+ besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
+ }
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ return besterr;
+}
+
+static INLINE int divide_and_round(const int n, const int d) {
+ return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
+}
+
+static INLINE int is_cost_list_wellbehaved(int *cost_list) {
+ return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
+ cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
+}
+
+// Returns surface minima estimate at given precision in 1/2^n bits.
+// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
+// For a given set of costs S0, S1, S2, S3, S4 at points
+// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
+// the solution for the location of the minima (x0, y0) is given by:
+// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
+// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
+// The code below is an integerized version of that.
+static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
+ *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
+ (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
+ *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
+ (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
+}
+
+int vp10_find_best_sub_pixel_tree_pruned_evenmore(
+ MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
+ const vpx_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
+ int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1, const uint8_t *second_pred, int w, int h,
+ int use_upsampled_ref) {
+ SETUP_SUBPEL_SEARCH;
+ besterr = setup_center_error(
+ xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
+ y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion);
+ (void)halfiters;
+ (void)quarteriters;
+ (void)eighthiters;
+ (void)whichdir;
+ (void)allow_hp;
+ (void)forced_stop;
+ (void)hstep;
+ (void)use_upsampled_ref;
+
+ if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
+ cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
+ cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
+ int ir, ic;
+ unsigned int minpt;
+ get_cost_surf_min(cost_list, &ir, &ic, 2);
+ if (ir != 0 || ic != 0) {
+ CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
+ }
+ } else {
+ FIRST_LEVEL_CHECKS;
+ if (halfiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+
+ tr = br;
+ tc = bc;
+
+ // Each subsequent iteration checks at least one point in common with
+ // the last iteration could be 2 ( if diag selected) 1/4 pel
+ // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+ if (forced_stop != 2) {
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (quarteriters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ }
+ }
+
+ tr = br;
+ tc = bc;
+
+ if (allow_hp && vp10_use_mv_hp(ref_mv) && forced_stop == 0) {
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (eighthiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ }
+
+ bestmv->row = br;
+ bestmv->col = bc;
+
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+
+int vp10_find_best_sub_pixel_tree_pruned_more(
+ MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
+ const vpx_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
+ int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1, const uint8_t *second_pred, int w, int h,
+ int use_upsampled_ref) {
+ SETUP_SUBPEL_SEARCH;
+ (void)use_upsampled_ref;
+
+ besterr = setup_center_error(
+ xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
+ y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion);
+ if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
+ cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
+ cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
+ unsigned int minpt;
+ int ir, ic;
+ get_cost_surf_min(cost_list, &ir, &ic, 1);
+ if (ir != 0 || ic != 0) {
+ CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
+ }
+ } else {
+ FIRST_LEVEL_CHECKS;
+ if (halfiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ }
+
+ // Each subsequent iteration checks at least one point in common with
+ // the last iteration could be 2 ( if diag selected) 1/4 pel
+
+ // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+ if (forced_stop != 2) {
+ tr = br;
+ tc = bc;
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (quarteriters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ }
+
+ if (allow_hp && vp10_use_mv_hp(ref_mv) && forced_stop == 0) {
+ tr = br;
+ tc = bc;
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (eighthiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ }
+ // These lines insure static analysis doesn't warn that
+ // tr and tc aren't used after the above point.
+ (void)tr;
+ (void)tc;
+
+ bestmv->row = br;
+ bestmv->col = bc;
+
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+
+int vp10_find_best_sub_pixel_tree_pruned(
+ MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
+ const vpx_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
+ int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1, const uint8_t *second_pred, int w, int h,
+ int use_upsampled_ref) {
+ SETUP_SUBPEL_SEARCH;
+ (void)use_upsampled_ref;
+
+ besterr = setup_center_error(
+ xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
+ y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion);
+ if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
+ cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
+ cost_list[4] != INT_MAX) {
+ unsigned int left, right, up, down, diag;
+ whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
+ (cost_list[2] < cost_list[4] ? 0 : 2);
+ switch (whichdir) {
+ case 0:
+ CHECK_BETTER(left, tr, tc - hstep);
+ CHECK_BETTER(down, tr + hstep, tc);
+ CHECK_BETTER(diag, tr + hstep, tc - hstep);
+ break;
+ case 1:
+ CHECK_BETTER(right, tr, tc + hstep);
+ CHECK_BETTER(down, tr + hstep, tc);
+ CHECK_BETTER(diag, tr + hstep, tc + hstep);
+ break;
+ case 2:
+ CHECK_BETTER(left, tr, tc - hstep);
+ CHECK_BETTER(up, tr - hstep, tc);
+ CHECK_BETTER(diag, tr - hstep, tc - hstep);
+ break;
+ case 3:
+ CHECK_BETTER(right, tr, tc + hstep);
+ CHECK_BETTER(up, tr - hstep, tc);
+ CHECK_BETTER(diag, tr - hstep, tc + hstep);
+ break;
+ }
+ } else {
+ FIRST_LEVEL_CHECKS;
+ if (halfiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ }
+
+ tr = br;
+ tc = bc;
+
+ // Each subsequent iteration checks at least one point in common with
+ // the last iteration could be 2 ( if diag selected) 1/4 pel
+
+ // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+ if (forced_stop != 2) {
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (quarteriters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ tr = br;
+ tc = bc;
+ }
+
+ if (allow_hp && vp10_use_mv_hp(ref_mv) && forced_stop == 0) {
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (eighthiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ tr = br;
+ tc = bc;
+ }
+ // These lines insure static analysis doesn't warn that
+ // tr and tc aren't used after the above point.
+ (void)tr;
+ (void)tc;
+
+ bestmv->row = br;
+ bestmv->col = bc;
+
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+
+/* clang-format off */
+static const MV search_step_table[12] = {
+ // left, right, up, down
+ { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
+ { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
+ { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
+};
+/* clang-format on */
+
+static int upsampled_pref_error(const MACROBLOCKD *xd,
+ const vpx_variance_fn_ptr_t *vfp,
+ const uint8_t *const src, const int src_stride,
+ const uint8_t *const y, int y_stride,
+ const uint8_t *second_pred, int w, int h,
+ unsigned int *sse) {
+ unsigned int besterr;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
+ if (second_pred != NULL)
+ vpx_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y,
+ y_stride);
+ else
+ vpx_highbd_upsampled_pred(pred16, w, h, y, y_stride);
+
+ besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, sse);
+ } else {
+ DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
+ (void)xd;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ if (second_pred != NULL)
+ vpx_comp_avg_upsampled_pred(pred, second_pred, w, h, y, y_stride);
+ else
+ vpx_upsampled_pred(pred, w, h, y, y_stride);
+
+ besterr = vfp->vf(pred, w, src, src_stride, sse);
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif
+ return besterr;
+}
+
+static unsigned int upsampled_setup_center_error(
+ const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
+ int error_per_bit, const vpx_variance_fn_ptr_t *vfp,
+ const uint8_t *const src, const int src_stride, const uint8_t *const y,
+ int y_stride, const uint8_t *second_pred, int w, int h, int offset,
+ int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion) {
+ unsigned int besterr = upsampled_pref_error(
+ xd, vfp, src, src_stride, y + offset, y_stride, second_pred, w, h, sse1);
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+ return besterr;
+}
+
+int vp10_find_best_sub_pixel_tree(MACROBLOCK *x, const MV *ref_mv, int allow_hp,
+ int error_per_bit,
+ const vpx_variance_fn_ptr_t *vfp,
+ int forced_stop, int iters_per_step,
+ int *cost_list, int *mvjcost, int *mvcost[2],
+ int *distortion, unsigned int *sse1,
+ const uint8_t *second_pred, int w, int h,
+ int use_upsampled_ref) {
+ const uint8_t *const src_address = x->plane[0].src.buf;
+ const int src_stride = x->plane[0].src.stride;
+ const MACROBLOCKD *xd = &x->e_mbd;
+ unsigned int besterr = INT_MAX;
+ unsigned int sse;
+ unsigned int thismse;
+ const int y_stride = xd->plane[0].pre[0].stride;
+ MV *bestmv = &x->best_mv.as_mv;
+ const int offset = bestmv->row * y_stride + bestmv->col;
+ const uint8_t *const y = xd->plane[0].pre[0].buf;
+
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
+ int hstep = 4;
+ int iter, round = 3 - forced_stop;
+ const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
+ int tr = br;
+ int tc = bc;
+ const MV *search_step = search_step_table;
+ int idx, best_idx = -1;
+ unsigned int cost_array[5];
+ int kr, kc;
+
+ if (!(allow_hp && vp10_use_mv_hp(ref_mv)))
+ if (round == 3) round = 2;
+
+ bestmv->row *= 8;
+ bestmv->col *= 8;
+
+ // use_upsampled_ref can be 0 or 1
+ if (use_upsampled_ref)
+ besterr = upsampled_setup_center_error(
+ xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
+ y_stride, second_pred, w, h, (offset * 8), mvjcost, mvcost, sse1,
+ distortion);
+ else
+ besterr = setup_center_error(
+ xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
+ y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion);
+
+ (void)cost_list; // to silence compiler warning
+
+ for (iter = 0; iter < round; ++iter) {
+ // Check vertical and horizontal sub-pixel positions.
+ for (idx = 0; idx < 4; ++idx) {
+ tr = br + search_step[idx].row;
+ tc = bc + search_step[idx].col;
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
+ MV this_mv = { tr, tc };
+
+ if (use_upsampled_ref) {
+ const uint8_t *const pre_address = y + tr * y_stride + tc;
+
+ thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
+ pre_address, y_stride, second_pred, w,
+ h, &sse);
+ } else {
+ const uint8_t *const pre_address =
+ y + (tr >> 3) * y_stride + (tc >> 3);
+ if (second_pred == NULL)
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse);
+ else
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse, second_pred);
+ }
+
+ cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
+ mvcost, error_per_bit);
+
+ if (cost_array[idx] < besterr) {
+ best_idx = idx;
+ besterr = cost_array[idx];
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+ } else {
+ cost_array[idx] = INT_MAX;
+ }
+ }
+
+ // Check diagonal sub-pixel position
+ kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
+ kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
+
+ tc = bc + kc;
+ tr = br + kr;
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
+ MV this_mv = { tr, tc };
+
+ if (use_upsampled_ref) {
+ const uint8_t *const pre_address = y + tr * y_stride + tc;
+
+ thismse =
+ upsampled_pref_error(xd, vfp, src_address, src_stride, pre_address,
+ y_stride, second_pred, w, h, &sse);
+ } else {
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
+
+ if (second_pred == NULL)
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
+ src_stride, &sse);
+ else
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse, second_pred);
+ }
+
+ cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
+
+ if (cost_array[4] < besterr) {
+ best_idx = 4;
+ besterr = cost_array[4];
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+ } else {
+ cost_array[idx] = INT_MAX;
+ }
+
+ if (best_idx < 4 && best_idx >= 0) {
+ br += search_step[best_idx].row;
+ bc += search_step[best_idx].col;
+ } else if (best_idx == 4) {
+ br = tr;
+ bc = tc;
+ }
+
+ if (iters_per_step > 1 && best_idx != -1) {
+ if (use_upsampled_ref) {
+ SECOND_LEVEL_CHECKS_BEST(1);
+ } else {
+ SECOND_LEVEL_CHECKS_BEST(0);
+ }
+ }
+
+ search_step += 4;
+ hstep >>= 1;
+ best_idx = -1;
+ }
+
+ // These lines insure static analysis doesn't warn that
+ // tr and tc aren't used after the above point.
+ (void)tr;
+ (void)tc;
+
+ bestmv->row = br;
+ bestmv->col = bc;
+
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+
+#undef PRE
+#undef CHECK_BETTER
+
+static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
+ int range) {
+ return ((row - range) >= x->mv_row_min) & ((row + range) <= x->mv_row_max) &
+ ((col - range) >= x->mv_col_min) & ((col + range) <= x->mv_col_max);
+}
+
+static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
+ return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) &&
+ (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max);
+}
+
+#define CHECK_BETTER \
+ { \
+ if (thissad < bestsad) { \
+ if (use_mvcost) \
+ thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
+ if (thissad < bestsad) { \
+ bestsad = thissad; \
+ best_site = i; \
+ } \
+ } \
+ }
+
+#define MAX_PATTERN_SCALES 11
+#define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
+#define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
+
+// Calculate and return a sad+mvcost list around an integer best pel.
+static INLINE void calc_int_cost_list(const MACROBLOCK *x,
+ const MV *const ref_mv, int sadpb,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *best_mv, int *cost_list) {
+ static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
+ const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
+ const int br = best_mv->row;
+ const int bc = best_mv->col;
+ MV this_mv;
+ int i;
+ unsigned int sse;
+
+ this_mv.row = br;
+ this_mv.col = bc;
+ cost_list[0] =
+ fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
+ in_what->stride, &sse) +
+ mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
+ if (check_bounds(x, br, bc, 1)) {
+ for (i = 0; i < 4; i++) {
+ const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
+ cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv),
+ in_what->stride, &sse) +
+ mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
+ x->mvcost, x->errorperbit);
+ }
+ } else {
+ for (i = 0; i < 4; i++) {
+ const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
+ if (!is_mv_in(x, &this_mv))
+ cost_list[i + 1] = INT_MAX;
+ else
+ cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv),
+ in_what->stride, &sse) +
+ mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
+ x->mvcost, x->errorperbit);
+ }
+ }
+}
+
+static INLINE void calc_int_sad_list(const MACROBLOCK *x,
+ const MV *const ref_mv, int sadpb,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *best_mv, int *cost_list,
+ const int use_mvcost, const int bestsad) {
+ static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
+ const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
+ int i;
+ const int br = best_mv->row;
+ const int bc = best_mv->col;
+
+ if (cost_list[0] == INT_MAX) {
+ cost_list[0] = bestsad;
+ if (check_bounds(x, br, bc, 1)) {
+ for (i = 0; i < 4; i++) {
+ const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
+ cost_list[i + 1] =
+ fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv), in_what->stride);
+ }
+ } else {
+ for (i = 0; i < 4; i++) {
+ const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
+ if (!is_mv_in(x, &this_mv))
+ cost_list[i + 1] = INT_MAX;
+ else
+ cost_list[i + 1] =
+ fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv), in_what->stride);
+ }
+ }
+ } else {
+ if (use_mvcost) {
+ for (i = 0; i < 4; i++) {
+ const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
+ if (cost_list[i + 1] != INT_MAX) {
+ cost_list[i + 1] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
+ }
+ }
+ }
+ }
+}
+
+// Generic pattern search function that searches over multiple scales.
+// Each scale can have a different number of candidates and shape of
+// candidates as indicated in the num_candidates and candidates arrays
+// passed into this function
+//
+static int pattern_search(
+ MACROBLOCK *x, MV *start_mv, int search_param, int sad_per_bit,
+ int do_init_search, int *cost_list, const vpx_variance_fn_ptr_t *vfp,
+ int use_mvcost, const MV *center_mv,
+ const int num_candidates[MAX_PATTERN_SCALES],
+ const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
+ 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ };
+ int i, s, t;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const int last_is_4 = num_candidates[0] == 4;
+ int br, bc;
+ int bestsad = INT_MAX;
+ int thissad;
+ int k = -1;
+ const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
+ int best_init_s = search_param_to_steps[search_param];
+ // adjust ref_mv to make sure it is within MV range
+ clamp_mv(start_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
+ x->mv_row_max);
+ br = start_mv->row;
+ bc = start_mv->col;
+ if (cost_list != NULL) {
+ cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
+ INT_MAX;
+ }
+
+ // Work out the start point for the search
+ bestsad = vfp->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, start_mv), in_what->stride) +
+ mvsad_err_cost(x, start_mv, &fcenter_mv, sad_per_bit);
+
+ // Search all possible scales upto the search param around the center point
+ // pick the scale of the point that is best as the starting scale of
+ // further steps around it.
+ if (do_init_search) {
+ s = best_init_s;
+ best_init_s = -1;
+ for (t = 0; t <= s; ++t) {
+ int best_site = -1;
+ if (check_bounds(x, br, bc, 1 << t)) {
+ for (i = 0; i < num_candidates[t]; i++) {
+ const MV this_mv = { br + candidates[t][i].row,
+ bc + candidates[t][i].col };
+ thissad =
+ vfp->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv), in_what->stride);
+ CHECK_BETTER
+ }
+ } else {
+ for (i = 0; i < num_candidates[t]; i++) {
+ const MV this_mv = { br + candidates[t][i].row,
+ bc + candidates[t][i].col };
+ if (!is_mv_in(x, &this_mv)) continue;
+ thissad =
+ vfp->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv), in_what->stride);
+ CHECK_BETTER
+ }
+ }
+ if (best_site == -1) {
+ continue;
+ } else {
+ best_init_s = t;
+ k = best_site;
+ }
+ }
+ if (best_init_s != -1) {
+ br += candidates[best_init_s][k].row;
+ bc += candidates[best_init_s][k].col;
+ }
+ }
+
+ // If the center point is still the best, just skip this and move to
+ // the refinement step.
+ if (best_init_s != -1) {
+ const int last_s = (last_is_4 && cost_list != NULL);
+ int best_site = -1;
+ s = best_init_s;
+
+ for (; s >= last_s; s--) {
+ // No need to search all points the 1st time if initial search was used
+ if (!do_init_search || s != best_init_s) {
+ if (check_bounds(x, br, bc, 1 << s)) {
+ for (i = 0; i < num_candidates[s]; i++) {
+ const MV this_mv = { br + candidates[s][i].row,
+ bc + candidates[s][i].col };
+ thissad =
+ vfp->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv), in_what->stride);
+ CHECK_BETTER
+ }
+ } else {
+ for (i = 0; i < num_candidates[s]; i++) {
+ const MV this_mv = { br + candidates[s][i].row,
+ bc + candidates[s][i].col };
+ if (!is_mv_in(x, &this_mv)) continue;
+ thissad =
+ vfp->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv), in_what->stride);
+ CHECK_BETTER
+ }
+ }
+
+ if (best_site == -1) {
+ continue;
+ } else {
+ br += candidates[s][best_site].row;
+ bc += candidates[s][best_site].col;
+ k = best_site;
+ }
+ }
+
+ do {
+ int next_chkpts_indices[PATTERN_CANDIDATES_REF];
+ best_site = -1;
+ next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
+ next_chkpts_indices[1] = k;
+ next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
+
+ if (check_bounds(x, br, bc, 1 << s)) {
+ for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
+ const MV this_mv = {
+ br + candidates[s][next_chkpts_indices[i]].row,
+ bc + candidates[s][next_chkpts_indices[i]].col
+ };
+ thissad =
+ vfp->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv), in_what->stride);
+ CHECK_BETTER
+ }
+ } else {
+ for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
+ const MV this_mv = {
+ br + candidates[s][next_chkpts_indices[i]].row,
+ bc + candidates[s][next_chkpts_indices[i]].col
+ };
+ if (!is_mv_in(x, &this_mv)) continue;
+ thissad =
+ vfp->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv), in_what->stride);
+ CHECK_BETTER
+ }
+ }
+
+ if (best_site != -1) {
+ k = next_chkpts_indices[best_site];
+ br += candidates[s][k].row;
+ bc += candidates[s][k].col;
+ }
+ } while (best_site != -1);
+ }
+
+ // Note: If we enter the if below, then cost_list must be non-NULL.
+ if (s == 0) {
+ cost_list[0] = bestsad;
+ if (!do_init_search || s != best_init_s) {
+ if (check_bounds(x, br, bc, 1 << s)) {
+ for (i = 0; i < num_candidates[s]; i++) {
+ const MV this_mv = { br + candidates[s][i].row,
+ bc + candidates[s][i].col };
+ cost_list[i + 1] = thissad =
+ vfp->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv), in_what->stride);
+ CHECK_BETTER
+ }
+ } else {
+ for (i = 0; i < num_candidates[s]; i++) {
+ const MV this_mv = { br + candidates[s][i].row,
+ bc + candidates[s][i].col };
+ if (!is_mv_in(x, &this_mv)) continue;
+ cost_list[i + 1] = thissad =
+ vfp->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv), in_what->stride);
+ CHECK_BETTER
+ }
+ }
+
+ if (best_site != -1) {
+ br += candidates[s][best_site].row;
+ bc += candidates[s][best_site].col;
+ k = best_site;
+ }
+ }
+ while (best_site != -1) {
+ int next_chkpts_indices[PATTERN_CANDIDATES_REF];
+ best_site = -1;
+ next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
+ next_chkpts_indices[1] = k;
+ next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
+ cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
+ cost_list[((k + 2) % 4) + 1] = cost_list[0];
+ cost_list[0] = bestsad;
+
+ if (check_bounds(x, br, bc, 1 << s)) {
+ for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
+ const MV this_mv = {
+ br + candidates[s][next_chkpts_indices[i]].row,
+ bc + candidates[s][next_chkpts_indices[i]].col
+ };
+ cost_list[next_chkpts_indices[i] + 1] = thissad =
+ vfp->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv), in_what->stride);
+ CHECK_BETTER
+ }
+ } else {
+ for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
+ const MV this_mv = {
+ br + candidates[s][next_chkpts_indices[i]].row,
+ bc + candidates[s][next_chkpts_indices[i]].col
+ };
+ if (!is_mv_in(x, &this_mv)) {
+ cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
+ continue;
+ }
+ cost_list[next_chkpts_indices[i] + 1] = thissad =
+ vfp->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &this_mv), in_what->stride);
+ CHECK_BETTER
+ }
+ }
+
+ if (best_site != -1) {
+ k = next_chkpts_indices[best_site];
+ br += candidates[s][k].row;
+ bc += candidates[s][k].col;
+ }
+ }
+ }
+ }
+
+ // Returns the one-away integer pel cost/sad around the best as follows:
+ // cost_list[0]: cost/sad at the best integer pel
+ // cost_list[1]: cost/sad at delta {0, -1} (left) from the best integer pel
+ // cost_list[2]: cost/sad at delta { 1, 0} (bottom) from the best integer pel
+ // cost_list[3]: cost/sad at delta { 0, 1} (right) from the best integer pel
+ // cost_list[4]: cost/sad at delta {-1, 0} (top) from the best integer pel
+ if (cost_list) {
+ const MV best_mv = { br, bc };
+ if (last_is_4) {
+ calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_mv, cost_list,
+ use_mvcost, bestsad);
+ } else {
+ calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_mv, cost_list);
+ }
+ }
+ x->best_mv.as_mv.row = br;
+ x->best_mv.as_mv.col = bc;
+ return bestsad;
+}
+
+int vp10_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
+ const MV *center_mv, const vpx_variance_fn_ptr_t *vfp,
+ int use_mvcost) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const MV mv = { best_mv->row * 8, best_mv->col * 8 };
+ unsigned int unused;
+
+ return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
+ in_what->stride, &unused) +
+ (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
+ x->errorperbit)
+ : 0);
+}
+
+int vp10_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
+ const MV *center_mv, const uint8_t *second_pred,
+ const vpx_variance_fn_ptr_t *vfp, int use_mvcost) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const MV mv = { best_mv->row * 8, best_mv->col * 8 };
+ unsigned int unused;
+
+ return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
+ what->buf, what->stride, &unused, second_pred) +
+ (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
+ x->errorperbit)
+ : 0);
+}
+
+int vp10_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
+ int sad_per_bit, int do_init_search, int *cost_list,
+ const vpx_variance_fn_ptr_t *vfp, int use_mvcost,
+ const MV *center_mv) {
+ // First scale has 8-closest points, the rest have 6 points in hex shape
+ // at increasing scales
+ static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6 };
+ // Note that the largest candidate step at each scale is 2^scale
+ /* clang-format off */
+ static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
+ { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
+ { -1, 0 } },
+ { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
+ { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
+ { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
+ { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
+ { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
+ { -32, 0 } },
+ { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
+ { -64, 0 } },
+ { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
+ { -128, 0 } },
+ { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
+ { -256, 0 } },
+ { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
+ { -512, 0 } },
+ { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
+ { -512, 1024 }, { -1024, 0 } },
+ };
+ /* clang-format on */
+ return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
+ cost_list, vfp, use_mvcost, center_mv,
+ hex_num_candidates, hex_candidates);
+}
+
+static int bigdia_search(MACROBLOCK *x, MV *start_mv, int search_param,
+ int sad_per_bit, int do_init_search, int *cost_list,
+ const vpx_variance_fn_ptr_t *vfp, int use_mvcost,
+ const MV *center_mv) {
+ // First scale has 4-closest points, the rest have 8 points in diamond
+ // shape at increasing scales
+ static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
+ 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ };
+ // Note that the largest candidate step at each scale is 2^scale
+ /* clang-format off */
+ static const MV
+ bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
+ { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
+ { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
+ { -1, 1 }, { -2, 0 } },
+ { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
+ { -2, 2 }, { -4, 0 } },
+ { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
+ { -4, 4 }, { -8, 0 } },
+ { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
+ { -8, 8 }, { -16, 0 } },
+ { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
+ { 0, 32 }, { -16, 16 }, { -32, 0 } },
+ { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
+ { 0, 64 }, { -32, 32 }, { -64, 0 } },
+ { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
+ { 0, 128 }, { -64, 64 }, { -128, 0 } },
+ { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
+ { 0, 256 }, { -128, 128 }, { -256, 0 } },
+ { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
+ { 0, 512 }, { -256, 256 }, { -512, 0 } },
+ { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
+ { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } },
+ };
+ /* clang-format on */
+ return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
+ cost_list, vfp, use_mvcost, center_mv,
+ bigdia_num_candidates, bigdia_candidates);
+}
+
+static int square_search(MACROBLOCK *x, MV *start_mv, int search_param,
+ int sad_per_bit, int do_init_search, int *cost_list,
+ const vpx_variance_fn_ptr_t *vfp, int use_mvcost,
+ const MV *center_mv) {
+ // All scales have 8 closest points in square shape
+ static const int square_num_candidates[MAX_PATTERN_SCALES] = {
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ };
+ // Note that the largest candidate step at each scale is 2^scale
+ /* clang-format off */
+ static const MV
+ square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
+ { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
+ { -1, 1 }, { -1, 0 } },
+ { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
+ { -2, 2 }, { -2, 0 } },
+ { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
+ { -4, 4 }, { -4, 0 } },
+ { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
+ { -8, 8 }, { -8, 0 } },
+ { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
+ { 0, 16 }, { -16, 16 }, { -16, 0 } },
+ { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
+ { 0, 32 }, { -32, 32 }, { -32, 0 } },
+ { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
+ { 0, 64 }, { -64, 64 }, { -64, 0 } },
+ { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
+ { 0, 128 }, { -128, 128 }, { -128, 0 } },
+ { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
+ { 0, 256 }, { -256, 256 }, { -256, 0 } },
+ { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
+ { 0, 512 }, { -512, 512 }, { -512, 0 } },
+ { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
+ { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } },
+ };
+ /* clang-format on */
+ return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
+ cost_list, vfp, use_mvcost, center_mv,
+ square_num_candidates, square_candidates);
+}
+
+static int fast_hex_search(MACROBLOCK *x, MV *ref_mv, int search_param,
+ int sad_per_bit,
+ int do_init_search, // must be zero for fast_hex
+ int *cost_list, const vpx_variance_fn_ptr_t *vfp,
+ int use_mvcost, const MV *center_mv) {
+ return vp10_hex_search(
+ x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), sad_per_bit,
+ do_init_search, cost_list, vfp, use_mvcost, center_mv);
+}
+
+static int fast_dia_search(MACROBLOCK *x, MV *ref_mv, int search_param,
+ int sad_per_bit, int do_init_search, int *cost_list,
+ const vpx_variance_fn_ptr_t *vfp, int use_mvcost,
+ const MV *center_mv) {
+ return bigdia_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
+ sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
+ center_mv);
+}
+
+#undef CHECK_BETTER
+
+// Exhuastive motion search around a given centre position with a given
+// step size.
+static int exhuastive_mesh_search(MACROBLOCK *x, MV *ref_mv, MV *best_mv,
+ int range, int step, int sad_per_bit,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ MV fcenter_mv = { center_mv->row, center_mv->col };
+ unsigned int best_sad = INT_MAX;
+ int r, c, i;
+ int start_col, end_col, start_row, end_row;
+ int col_step = (step > 1) ? step : 4;
+
+ assert(step >= 1);
+
+ clamp_mv(&fcenter_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
+ x->mv_row_max);
+ *best_mv = fcenter_mv;
+ best_sad =
+ fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
+ mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
+ start_row = VPXMAX(-range, x->mv_row_min - fcenter_mv.row);
+ start_col = VPXMAX(-range, x->mv_col_min - fcenter_mv.col);
+ end_row = VPXMIN(range, x->mv_row_max - fcenter_mv.row);
+ end_col = VPXMIN(range, x->mv_col_max - fcenter_mv.col);
+
+ for (r = start_row; r <= end_row; r += step) {
+ for (c = start_col; c <= end_col; c += col_step) {
+ // Step > 1 means we are not checking every location in this pass.
+ if (step > 1) {
+ const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
+ unsigned int sad =
+ fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
+ in_what->stride);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ x->second_best_mv.as_mv = *best_mv;
+ *best_mv = mv;
+ }
+ }
+ } else {
+ // 4 sads in a single call if we are checking every location
+ if (c + 3 <= end_col) {
+ unsigned int sads[4];
+ const uint8_t *addrs[4];
+ for (i = 0; i < 4; ++i) {
+ const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
+ addrs[i] = get_buf_from_mv(in_what, &mv);
+ }
+ fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
+
+ for (i = 0; i < 4; ++i) {
+ if (sads[i] < best_sad) {
+ const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
+ const unsigned int sad =
+ sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ x->second_best_mv.as_mv = *best_mv;
+ *best_mv = mv;
+ }
+ }
+ }
+ } else {
+ for (i = 0; i < end_col - c; ++i) {
+ const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
+ unsigned int sad =
+ fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ x->second_best_mv.as_mv = *best_mv;
+ *best_mv = mv;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return best_sad;
+}
+
+int vp10_diamond_search_sad_c(MACROBLOCK *x, const search_site_config *cfg,
+ MV *ref_mv, MV *best_mv, int search_param,
+ int sad_per_bit, int *num00,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv) {
+ int i, j, step;
+
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ uint8_t *what = x->plane[0].src.buf;
+ const int what_stride = x->plane[0].src.stride;
+ const uint8_t *in_what;
+ const int in_what_stride = xd->plane[0].pre[0].stride;
+ const uint8_t *best_address;
+
+ unsigned int bestsad = INT_MAX;
+ int best_site = 0;
+ int last_site = 0;
+
+ int ref_row;
+ int ref_col;
+
+ // search_param determines the length of the initial step and hence the number
+ // of iterations.
+ // 0 = initial step (MAX_FIRST_STEP) pel
+ // 1 = (MAX_FIRST_STEP/2) pel,
+ // 2 = (MAX_FIRST_STEP/4) pel...
+ const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
+
+ const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
+ clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ ref_row = ref_mv->row;
+ ref_col = ref_mv->col;
+ *num00 = 0;
+ best_mv->row = ref_row;
+ best_mv->col = ref_col;
+
+ // Work out the start point for the search
+ in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
+ best_address = in_what;
+
+ // Check the starting position
+ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
+ mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
+
+ i = 1;
+
+ for (step = 0; step < tot_steps; step++) {
+ int all_in = 1, t;
+
+ // All_in is true if every one of the points we are checking are within
+ // the bounds of the image.
+ all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min);
+ all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max);
+ all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min);
+ all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max);
+
+ // If all the pixels are within the bounds we don't check whether the
+ // search point is valid in this loop, otherwise we check each point
+ // for validity..
+ if (all_in) {
+ unsigned int sad_array[4];
+
+ for (j = 0; j < cfg->searches_per_step; j += 4) {
+ unsigned char const *block_offset[4];
+
+ for (t = 0; t < 4; t++)
+ block_offset[t] = ss[i + t].offset + best_address;
+
+ fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
+ sad_array);
+
+ for (t = 0; t < 4; t++, i++) {
+ if (sad_array[t] < bestsad) {
+ const MV this_mv = { best_mv->row + ss[i].mv.row,
+ best_mv->col + ss[i].mv.col };
+ sad_array[t] +=
+ mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
+ if (sad_array[t] < bestsad) {
+ bestsad = sad_array[t];
+ best_site = i;
+ }
+ }
+ }
+ }
+ } else {
+ for (j = 0; j < cfg->searches_per_step; j++) {
+ // Trap illegal vectors
+ const MV this_mv = { best_mv->row + ss[i].mv.row,
+ best_mv->col + ss[i].mv.col };
+
+ if (is_mv_in(x, &this_mv)) {
+ const uint8_t *const check_here = ss[i].offset + best_address;
+ unsigned int thissad =
+ fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
+
+ if (thissad < bestsad) {
+ thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_site = i;
+ }
+ }
+ }
+ i++;
+ }
+ }
+ if (best_site != last_site) {
+ x->second_best_mv.as_mv = *best_mv;
+ best_mv->row += ss[best_site].mv.row;
+ best_mv->col += ss[best_site].mv.col;
+ best_address += ss[best_site].offset;
+ last_site = best_site;
+#if defined(NEW_DIAMOND_SEARCH)
+ while (1) {
+ const MV this_mv = { best_mv->row + ss[best_site].mv.row,
+ best_mv->col + ss[best_site].mv.col };
+ if (is_mv_in(x, &this_mv)) {
+ const uint8_t *const check_here = ss[best_site].offset + best_address;
+ unsigned int thissad =
+ fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
+ if (thissad < bestsad) {
+ thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_mv->row += ss[best_site].mv.row;
+ best_mv->col += ss[best_site].mv.col;
+ best_address += ss[best_site].offset;
+ continue;
+ }
+ }
+ }
+ break;
+ }
+#endif
+ } else if (best_address == in_what) {
+ (*num00)++;
+ }
+ }
+ return bestsad;
+}
+
+static int vector_match(int16_t *ref, int16_t *src, int bwl) {
+ int best_sad = INT_MAX;
+ int this_sad;
+ int d;
+ int center, offset = 0;
+ int bw = 4 << bwl; // redundant variable, to be changed in the experiments.
+ for (d = 0; d <= bw; d += 16) {
+ this_sad = vpx_vector_var(&ref[d], src, bwl);
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ offset = d;
+ }
+ }
+ center = offset;
+
+ for (d = -8; d <= 8; d += 16) {
+ int this_pos = offset + d;
+ // check limit
+ if (this_pos < 0 || this_pos > bw) continue;
+ this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ center = this_pos;
+ }
+ }
+ offset = center;
+
+ for (d = -4; d <= 4; d += 8) {
+ int this_pos = offset + d;
+ // check limit
+ if (this_pos < 0 || this_pos > bw) continue;
+ this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ center = this_pos;
+ }
+ }
+ offset = center;
+
+ for (d = -2; d <= 2; d += 4) {
+ int this_pos = offset + d;
+ // check limit
+ if (this_pos < 0 || this_pos > bw) continue;
+ this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ center = this_pos;
+ }
+ }
+ offset = center;
+
+ for (d = -1; d <= 1; d += 2) {
+ int this_pos = offset + d;
+ // check limit
+ if (this_pos < 0 || this_pos > bw) continue;
+ this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ center = this_pos;
+ }
+ }
+
+ return (center - (bw >> 1));
+}
+
+static const MV search_pos[4] = {
+ { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
+};
+
+unsigned int vp10_int_pro_motion_estimation(const VP10_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int mi_row,
+ int mi_col) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
+ DECLARE_ALIGNED(16, int16_t, hbuf[2 * MAX_SB_SIZE]);
+ DECLARE_ALIGNED(16, int16_t, vbuf[2 * MAX_SB_SIZE]);
+ DECLARE_ALIGNED(16, int16_t, src_hbuf[MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(16, int16_t, src_vbuf[MAX_SB_SQUARE]);
+ int idx;
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ const int search_width = bw << 1;
+ const int search_height = bh << 1;
+ const int src_stride = x->plane[0].src.stride;
+ const int ref_stride = xd->plane[0].pre[0].stride;
+ uint8_t const *ref_buf, *src_buf;
+ MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv;
+ unsigned int best_sad, tmp_sad, this_sad[4];
+ MV this_mv;
+ const int norm_factor = 3 + (bw >> 5);
+ const YV12_BUFFER_CONFIG *scaled_ref_frame =
+ vp10_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
+
+ if (scaled_ref_frame) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
+ vp10_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ {
+ unsigned int this_sad;
+ tmp_mv->row = 0;
+ tmp_mv->col = 0;
+ this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
+ xd->plane[0].pre[0].buf, ref_stride);
+
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
+ }
+ return this_sad;
+ }
+#endif
+
+ // Set up prediction 1-D reference set
+ ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
+ for (idx = 0; idx < search_width; idx += 16) {
+ vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
+ ref_buf += 16;
+ }
+
+ ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
+ for (idx = 0; idx < search_height; ++idx) {
+ vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor;
+ ref_buf += ref_stride;
+ }
+
+ // Set up src 1-D reference set
+ for (idx = 0; idx < bw; idx += 16) {
+ src_buf = x->plane[0].src.buf + idx;
+ vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
+ }
+
+ src_buf = x->plane[0].src.buf;
+ for (idx = 0; idx < bh; ++idx) {
+ src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor;
+ src_buf += src_stride;
+ }
+
+ // Find the best match per 1-D search
+ tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
+ tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
+
+ this_mv = *tmp_mv;
+ src_buf = x->plane[0].src.buf;
+ ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
+ best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
+
+ {
+ const uint8_t *const pos[4] = {
+ ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
+ };
+
+ cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
+ }
+
+ for (idx = 0; idx < 4; ++idx) {
+ if (this_sad[idx] < best_sad) {
+ best_sad = this_sad[idx];
+ tmp_mv->row = search_pos[idx].row + this_mv.row;
+ tmp_mv->col = search_pos[idx].col + this_mv.col;
+ }
+ }
+
+ if (this_sad[0] < this_sad[3])
+ this_mv.row -= 1;
+ else
+ this_mv.row += 1;
+
+ if (this_sad[1] < this_sad[2])
+ this_mv.col -= 1;
+ else
+ this_mv.col += 1;
+
+ ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
+
+ tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
+ if (best_sad > tmp_sad) {
+ *tmp_mv = this_mv;
+ best_sad = tmp_sad;
+ }
+
+ tmp_mv->row *= 8;
+ tmp_mv->col *= 8;
+
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
+ }
+
+ return best_sad;
+}
+
+/* do_refine: If last step (1-away) of n-step search doesn't pick the center
+ point as the best match, we will do a final 1-away diamond
+ refining search */
+static int full_pixel_diamond(VP10_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
+ int step_param, int sadpb, int further_steps,
+ int do_refine, int *cost_list,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv) {
+ MV temp_mv;
+ int thissme, n, num00 = 0;
+ int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
+ step_param, sadpb, &n, fn_ptr, ref_mv);
+ if (bestsme < INT_MAX)
+ bestsme = vp10_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
+ x->best_mv.as_mv = temp_mv;
+
+ // If there won't be more n-step search, check to see if refining search is
+ // needed.
+ if (n > further_steps) do_refine = 0;
+
+ while (n < further_steps) {
+ ++n;
+
+ if (num00) {
+ num00--;
+ } else {
+ thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
+ step_param + n, sadpb, &num00, fn_ptr,
+ ref_mv);
+ if (thissme < INT_MAX)
+ thissme = vp10_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
+
+ // check to see if refining search is needed.
+ if (num00 > further_steps - n) do_refine = 0;
+
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ x->best_mv.as_mv = temp_mv;
+ }
+ }
+ }
+
+ // final 1-away diamond refining search
+ if (do_refine) {
+ const int search_range = 8;
+ MV best_mv = x->best_mv.as_mv;
+ thissme = vp10_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
+ ref_mv);
+ if (thissme < INT_MAX)
+ thissme = vp10_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ x->best_mv.as_mv = best_mv;
+ }
+ }
+
+ // Return cost list.
+ if (cost_list) {
+ calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, &x->best_mv.as_mv, cost_list);
+ }
+ return bestsme;
+}
+
+#define MIN_RANGE 7
+#define MAX_RANGE 256
+#define MIN_INTERVAL 1
+// Runs an limited range exhaustive mesh search using a pattern set
+// according to the encode speed profile.
+static int full_pixel_exhaustive(VP10_COMP *cpi, MACROBLOCK *x,
+ const MV *centre_mv_full, int sadpb,
+ int *cost_list,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv) {
+ const SPEED_FEATURES *const sf = &cpi->sf;
+ MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
+ MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
+ int bestsme;
+ int i;
+ int interval = sf->mesh_patterns[0].interval;
+ int range = sf->mesh_patterns[0].range;
+ int baseline_interval_divisor;
+
+ // Keep track of number of exhaustive calls (this frame in this thread).
+ ++(*x->ex_search_count_ptr);
+
+ // Trap illegal values for interval and range for this function.
+ if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
+ (interval > range))
+ return INT_MAX;
+
+ baseline_interval_divisor = range / interval;
+
+ // Check size of proposed first range against magnitude of the centre
+ // value used as a starting point.
+ range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
+ range = VPXMIN(range, MAX_RANGE);
+ interval = VPXMAX(interval, range / baseline_interval_divisor);
+
+ // initial search
+ bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
+ sadpb, fn_ptr, &temp_mv);
+
+ if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
+ // Progressive searches with range and step size decreasing each time
+ // till we reach a step size of 1. Then break out.
+ for (i = 1; i < MAX_MESH_STEP; ++i) {
+ // First pass with coarser step and longer range
+ bestsme = exhuastive_mesh_search(
+ x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
+ sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
+
+ if (sf->mesh_patterns[i].interval == 1) break;
+ }
+ }
+
+ if (bestsme < INT_MAX)
+ bestsme = vp10_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
+ *dst_mv = temp_mv;
+
+ // Return cost list.
+ if (cost_list) {
+ calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
+ }
+ return bestsme;
+}
+
+int vp10_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
+ int sad_per_bit, int distance,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, MV *best_mv) {
+ int r, c;
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min);
+ const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max);
+ const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min);
+ const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max);
+ const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
+ int best_sad =
+ fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
+ in_what->stride) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+ *best_mv = *ref_mv;
+
+ for (r = row_min; r < row_max; ++r) {
+ for (c = col_min; c < col_max; ++c) {
+ const MV mv = { r, c };
+ const int sad =
+ fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
+ in_what->stride) +
+ mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
+ }
+ }
+ return best_sad;
+}
+
+int vp10_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
+ int sad_per_bit, int distance,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, MV *best_mv) {
+ int r;
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min);
+ const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max);
+ const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min);
+ const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max);
+ const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
+ unsigned int best_sad =
+ fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
+ in_what->stride) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+ *best_mv = *ref_mv;
+
+ for (r = row_min; r < row_max; ++r) {
+ int c = col_min;
+ const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
+
+ if (fn_ptr->sdx3f != NULL) {
+ while ((c + 2) < col_max) {
+ int i;
+ DECLARE_ALIGNED(16, uint32_t, sads[3]);
+
+ fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
+ sads);
+
+ for (i = 0; i < 3; ++i) {
+ unsigned int sad = sads[i];
+ if (sad < best_sad) {
+ const MV mv = { r, c };
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
+ }
+ ++check_here;
+ ++c;
+ }
+ }
+ }
+
+ while (c < col_max) {
+ unsigned int sad =
+ fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride);
+ if (sad < best_sad) {
+ const MV mv = { r, c };
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
+ }
+ ++check_here;
+ ++c;
+ }
+ }
+
+ return best_sad;
+}
+
+int vp10_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
+ int sad_per_bit, int distance,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, MV *best_mv) {
+ int r;
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min);
+ const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max);
+ const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min);
+ const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max);
+ const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
+ unsigned int best_sad =
+ fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
+ in_what->stride) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+ *best_mv = *ref_mv;
+
+ for (r = row_min; r < row_max; ++r) {
+ int c = col_min;
+ const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
+
+ if (fn_ptr->sdx8f != NULL) {
+ while ((c + 7) < col_max) {
+ int i;
+ DECLARE_ALIGNED(16, uint32_t, sads[8]);
+
+ fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
+ sads);
+
+ for (i = 0; i < 8; ++i) {
+ unsigned int sad = sads[i];
+ if (sad < best_sad) {
+ const MV mv = { r, c };
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
+ }
+ ++check_here;
+ ++c;
+ }
+ }
+ }
+
+ if (fn_ptr->sdx3f != NULL) {
+ while ((c + 2) < col_max) {
+ int i;
+ DECLARE_ALIGNED(16, uint32_t, sads[3]);
+
+ fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
+ sads);
+
+ for (i = 0; i < 3; ++i) {
+ unsigned int sad = sads[i];
+ if (sad < best_sad) {
+ const MV mv = { r, c };
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
+ }
+ ++check_here;
+ ++c;
+ }
+ }
+ }
+
+ while (c < col_max) {
+ unsigned int sad =
+ fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride);
+ if (sad < best_sad) {
+ const MV mv = { r, c };
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
+ }
+ ++check_here;
+ ++c;
+ }
+ }
+
+ return best_sad;
+}
+
+int vp10_refining_search_sad(MACROBLOCK *x, MV *ref_mv, int error_per_bit,
+ int search_range,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
+ const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
+ unsigned int best_sad =
+ fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
+ int i, j;
+
+ for (i = 0; i < search_range; i++) {
+ int best_site = -1;
+ const int all_in = ((ref_mv->row - 1) > x->mv_row_min) &
+ ((ref_mv->row + 1) < x->mv_row_max) &
+ ((ref_mv->col - 1) > x->mv_col_min) &
+ ((ref_mv->col + 1) < x->mv_col_max);
+
+ if (all_in) {
+ unsigned int sads[4];
+ const uint8_t *const positions[4] = { best_address - in_what->stride,
+ best_address - 1, best_address + 1,
+ best_address + in_what->stride };
+
+ fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
+
+ for (j = 0; j < 4; ++j) {
+ if (sads[j] < best_sad) {
+ const MV mv = { ref_mv->row + neighbors[j].row,
+ ref_mv->col + neighbors[j].col };
+ sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
+ if (sads[j] < best_sad) {
+ best_sad = sads[j];
+ best_site = j;
+ }
+ }
+ }
+ } else {
+ for (j = 0; j < 4; ++j) {
+ const MV mv = { ref_mv->row + neighbors[j].row,
+ ref_mv->col + neighbors[j].col };
+
+ if (is_mv_in(x, &mv)) {
+ unsigned int sad =
+ fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ best_site = j;
+ }
+ }
+ }
+ }
+ }
+
+ if (best_site == -1) {
+ break;
+ } else {
+ x->second_best_mv.as_mv = *ref_mv;
+ ref_mv->row += neighbors[best_site].row;
+ ref_mv->col += neighbors[best_site].col;
+ best_address = get_buf_from_mv(in_what, ref_mv);
+ }
+ }
+
+ return best_sad;
+}
+
+// This function is called when we do joint motion search in comp_inter_inter
+// mode.
+int vp10_refining_search_8p_c(MACROBLOCK *x, int error_per_bit,
+ int search_range,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, const uint8_t *second_pred) {
+ const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
+ { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
+ MV *best_mv = &x->best_mv.as_mv;
+ unsigned int best_sad =
+ fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
+ in_what->stride, second_pred) +
+ mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
+ int i, j;
+
+ for (i = 0; i < search_range; ++i) {
+ int best_site = -1;
+
+ for (j = 0; j < 8; ++j) {
+ const MV mv = { best_mv->row + neighbors[j].row,
+ best_mv->col + neighbors[j].col };
+
+ if (is_mv_in(x, &mv)) {
+ unsigned int sad =
+ fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
+ in_what->stride, second_pred);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ best_site = j;
+ }
+ }
+ }
+ }
+
+ if (best_site == -1) {
+ break;
+ } else {
+ best_mv->row += neighbors[best_site].row;
+ best_mv->col += neighbors[best_site].col;
+ }
+ }
+ return best_sad;
+}
+
+#define MIN_EX_SEARCH_LIMIT 128
+static int is_exhaustive_allowed(VP10_COMP *cpi, MACROBLOCK *x) {
+ const SPEED_FEATURES *const sf = &cpi->sf;
+ const int max_ex =
+ VPXMAX(MIN_EX_SEARCH_LIMIT,
+ (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
+
+ return sf->allow_exhaustive_searches &&
+ (sf->exhaustive_searches_thresh < INT_MAX) &&
+ (*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;
+}
+
+int vp10_full_pixel_search(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
+ MV *mvp_full, int step_param, int error_per_bit,
+ int *cost_list, const MV *ref_mv, int var_max,
+ int rd) {
+ const SPEED_FEATURES *const sf = &cpi->sf;
+ const SEARCH_METHODS method = sf->mv.search_method;
+ vpx_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
+ int var = 0;
+
+ if (cost_list) {
+ cost_list[0] = INT_MAX;
+ cost_list[1] = INT_MAX;
+ cost_list[2] = INT_MAX;
+ cost_list[3] = INT_MAX;
+ cost_list[4] = INT_MAX;
+ }
+
+ // Keep track of number of searches (this frame in this thread).
+ ++(*x->m_search_count_ptr);
+
+ switch (method) {
+ case FAST_DIAMOND:
+ var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
+ cost_list, fn_ptr, 1, ref_mv);
+ break;
+ case FAST_HEX:
+ var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
+ cost_list, fn_ptr, 1, ref_mv);
+ break;
+ case HEX:
+ var = vp10_hex_search(x, mvp_full, step_param, error_per_bit, 1,
+ cost_list, fn_ptr, 1, ref_mv);
+ break;
+ case SQUARE:
+ var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
+ fn_ptr, 1, ref_mv);
+ break;
+ case BIGDIA:
+ var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
+ fn_ptr, 1, ref_mv);
+ break;
+ case NSTEP:
+ var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
+ MAX_MVSEARCH_STEPS - 1 - step_param, 1,
+ cost_list, fn_ptr, ref_mv);
+
+ // Should we allow a follow on exhaustive search?
+ if (is_exhaustive_allowed(cpi, x)) {
+ int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
+ exhuastive_thr >>=
+ 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
+
+ // Threshold variance for an exhaustive full search.
+ if (var > exhuastive_thr) {
+ int var_ex;
+ MV tmp_mv_ex;
+ var_ex =
+ full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit,
+ cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
+
+ if (var_ex < var) {
+ var = var_ex;
+ x->best_mv.as_mv = tmp_mv_ex;
+ }
+ }
+ }
+ break;
+
+ break;
+ default: assert(0 && "Invalid search method.");
+ }
+
+ if (method != NSTEP && rd && var < var_max)
+ var = vp10_get_mvpred_var(x, &x->best_mv.as_mv, ref_mv, fn_ptr, 1);
+
+ return var;
+}
+
+#if CONFIG_EXT_INTER
+/* returns subpixel variance error function */
+#define DIST(r, c) \
+ vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, src_stride, \
+ mask, mask_stride, &sse)
+
+/* checks if (r, c) has better score than previous best */
+
+#define MVC(r, c) \
+ (mvcost \
+ ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + mvcost[0][((r)-rr)] + \
+ mvcost[1][((c)-rc)]) * \
+ error_per_bit + \
+ 4096) >> \
+ 13 \
+ : 0)
+
+#define CHECK_BETTER(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ thismse = (DIST(r, c)); \
+ if ((v = MVC(r, c) + thismse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+
+#undef CHECK_BETTER0
+#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
+
+#undef CHECK_BETTER1
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ thismse = upsampled_masked_pref_error(xd, mask, mask_stride, vfp, z, \
+ src_stride, upre(y, y_stride, r, c), \
+ y_stride, w, h, &sse); \
+ if ((v = MVC(r, c) + thismse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+
+int vp10_find_best_masked_sub_pixel_tree(
+ const MACROBLOCK *x, const uint8_t *mask, int mask_stride, MV *bestmv,
+ const MV *ref_mv, int allow_hp, int error_per_bit,
+ const vpx_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
+ int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
+ int is_second) {
+ const uint8_t *const z = x->plane[0].src.buf;
+ const int src_stride = x->plane[0].src.stride;
+ const MACROBLOCKD *xd = &x->e_mbd;
+ unsigned int besterr = INT_MAX;
+ unsigned int sse;
+ int thismse;
+ unsigned int whichdir;
+ unsigned int halfiters = iters_per_step;
+ unsigned int quarteriters = iters_per_step;
+ unsigned int eighthiters = iters_per_step;
+
+ const int y_stride = xd->plane[0].pre[is_second].stride;
+ const int offset = bestmv->row * y_stride + bestmv->col;
+ const uint8_t *const y = xd->plane[0].pre[is_second].buf;
+
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
+ int hstep = 4;
+ const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
+
+ int tr = br;
+ int tc = bc;
+
+ // central mv
+ bestmv->row *= 8;
+ bestmv->col *= 8;
+
+ // calculate central point error
+ besterr =
+ vfp->mvf(y + offset, y_stride, z, src_stride, mask, mask_stride, sse1);
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+
+ // 1/2 pel
+ FIRST_LEVEL_CHECKS;
+ if (halfiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ tr = br;
+ tc = bc;
+
+ // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
+ if (forced_stop != 2) {
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (quarteriters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ tr = br;
+ tc = bc;
+ }
+
+ if (allow_hp && vp10_use_mv_hp(ref_mv) && forced_stop == 0) {
+ hstep >>= 1;
+ FIRST_LEVEL_CHECKS;
+ if (eighthiters > 1) {
+ SECOND_LEVEL_CHECKS;
+ }
+ tr = br;
+ tc = bc;
+ }
+ // These lines insure static analysis doesn't warn that
+ // tr and tc aren't used after the above point.
+ (void)tr;
+ (void)tc;
+
+ bestmv->row = br;
+ bestmv->col = bc;
+
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+
+static unsigned int setup_masked_center_error(
+ const uint8_t *mask, int mask_stride, const MV *bestmv, const MV *ref_mv,
+ int error_per_bit, const vpx_variance_fn_ptr_t *vfp,
+ const uint8_t *const src, const int src_stride, const uint8_t *const y,
+ int y_stride, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
+ int *distortion) {
+ unsigned int besterr;
+ besterr =
+ vfp->mvf(y + offset, y_stride, src, src_stride, mask, mask_stride, sse1);
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+ return besterr;
+}
+
+static int upsampled_masked_pref_error(const MACROBLOCKD *xd,
+ const uint8_t *mask, int mask_stride,
+ const vpx_variance_fn_ptr_t *vfp,
+ const uint8_t *const src,
+ const int src_stride,
+ const uint8_t *const y, int y_stride,
+ int w, int h, unsigned int *sse) {
+ unsigned int besterr;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
+ vpx_highbd_upsampled_pred(pred16, w, h, y, y_stride);
+
+ besterr = vfp->mvf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, mask,
+ mask_stride, sse);
+ } else {
+ DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
+ (void)xd;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vpx_upsampled_pred(pred, w, h, y, y_stride);
+
+ besterr = vfp->mvf(pred, w, src, src_stride, mask, mask_stride, sse);
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif
+ return besterr;
+}
+
+static unsigned int upsampled_setup_masked_center_error(
+ const MACROBLOCKD *xd, const uint8_t *mask, int mask_stride,
+ const MV *bestmv, const MV *ref_mv, int error_per_bit,
+ const vpx_variance_fn_ptr_t *vfp, const uint8_t *const src,
+ const int src_stride, const uint8_t *const y, int y_stride, int w, int h,
+ int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
+ int *distortion) {
+ unsigned int besterr =
+ upsampled_masked_pref_error(xd, mask, mask_stride, vfp, src, src_stride,
+ y + offset, y_stride, w, h, sse1);
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+ return besterr;
+}
+
+int vp10_find_best_masked_sub_pixel_tree_up(
+ VP10_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, int mask_stride,
+ int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp,
+ int error_per_bit, const vpx_variance_fn_ptr_t *vfp, int forced_stop,
+ int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1, int is_second, int use_upsampled_ref) {
+ const uint8_t *const z = x->plane[0].src.buf;
+ const uint8_t *const src_address = z;
+ const int src_stride = x->plane[0].src.stride;
+ MACROBLOCKD *xd = &x->e_mbd;
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ unsigned int besterr = INT_MAX;
+ unsigned int sse;
+ unsigned int thismse;
+
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
+ int hstep = 4;
+ int iter;
+ int round = 3 - forced_stop;
+ const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
+ int tr = br;
+ int tc = bc;
+ const MV *search_step = search_step_table;
+ int idx, best_idx = -1;
+ unsigned int cost_array[5];
+ int kr, kc;
+ const int w = 4 * num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ const int h = 4 * num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int offset;
+ int y_stride;
+ const uint8_t *y;
+
+ const struct buf_2d backup_pred = pd->pre[is_second];
+ if (use_upsampled_ref) {
+ int ref = xd->mi[0]->mbmi.ref_frame[is_second];
+ const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
+ setup_pred_plane(&pd->pre[is_second], upsampled_ref->y_buffer,
+ upsampled_ref->y_crop_width, upsampled_ref->y_crop_height,
+ upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
+ NULL, pd->subsampling_x, pd->subsampling_y);
+ }
+ y = pd->pre[is_second].buf;
+ y_stride = pd->pre[is_second].stride;
+ offset = bestmv->row * y_stride + bestmv->col;
+
+ if (!(allow_hp && vp10_use_mv_hp(ref_mv)))
+ if (round == 3) round = 2;
+
+ bestmv->row *= 8;
+ bestmv->col *= 8;
+
+ // use_upsampled_ref can be 0 or 1
+ if (use_upsampled_ref)
+ besterr = upsampled_setup_masked_center_error(
+ xd, mask, mask_stride, bestmv, ref_mv, error_per_bit, vfp, z,
+ src_stride, y, y_stride, w, h, (offset << 3), mvjcost, mvcost, sse1,
+ distortion);
+ else
+ besterr = setup_masked_center_error(
+ mask, mask_stride, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y,
+ y_stride, offset, mvjcost, mvcost, sse1, distortion);
+
+ for (iter = 0; iter < round; ++iter) {
+ // Check vertical and horizontal sub-pixel positions.
+ for (idx = 0; idx < 4; ++idx) {
+ tr = br + search_step[idx].row;
+ tc = bc + search_step[idx].col;
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
+ MV this_mv = { tr, tc };
+
+ if (use_upsampled_ref) {
+ const uint8_t *const pre_address = y + tr * y_stride + tc;
+
+ thismse = upsampled_masked_pref_error(
+ xd, mask, mask_stride, vfp, src_address, src_stride, pre_address,
+ y_stride, w, h, &sse);
+ } else {
+ const uint8_t *const pre_address =
+ y + (tr >> 3) * y_stride + (tc >> 3);
+ thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, mask, mask_stride, &sse);
+ }
+
+ cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
+ mvcost, error_per_bit);
+
+ if (cost_array[idx] < besterr) {
+ best_idx = idx;
+ besterr = cost_array[idx];
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+ } else {
+ cost_array[idx] = INT_MAX;
+ }
+ }
+
+ // Check diagonal sub-pixel position
+ kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
+ kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
+
+ tc = bc + kc;
+ tr = br + kr;
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
+ MV this_mv = { tr, tc };
+
+ if (use_upsampled_ref) {
+ const uint8_t *const pre_address = y + tr * y_stride + tc;
+
+ thismse = upsampled_masked_pref_error(
+ xd, mask, mask_stride, vfp, src_address, src_stride, pre_address,
+ y_stride, w, h, &sse);
+ } else {
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
+
+ thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr), src_address,
+ src_stride, mask, mask_stride, &sse);
+ }
+
+ cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
+
+ if (cost_array[4] < besterr) {
+ best_idx = 4;
+ besterr = cost_array[4];
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+ } else {
+ cost_array[idx] = INT_MAX;
+ }
+
+ if (best_idx < 4 && best_idx >= 0) {
+ br += search_step[best_idx].row;
+ bc += search_step[best_idx].col;
+ } else if (best_idx == 4) {
+ br = tr;
+ bc = tc;
+ }
+
+ if (iters_per_step > 1 && best_idx != -1) {
+ if (use_upsampled_ref) {
+ SECOND_LEVEL_CHECKS_BEST(1);
+ } else {
+ SECOND_LEVEL_CHECKS_BEST(0);
+ }
+ }
+
+ tr = br;
+ tc = bc;
+
+ search_step += 4;
+ hstep >>= 1;
+ best_idx = -1;
+ }
+
+ // These lines insure static analysis doesn't warn that
+ // tr and tc aren't used after the above point.
+ (void)tr;
+ (void)tc;
+
+ bestmv->row = br;
+ bestmv->col = bc;
+
+ if (use_upsampled_ref) {
+ pd->pre[is_second] = backup_pred;
+ }
+
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+
+#undef DIST
+#undef MVC
+#undef CHECK_BETTER
+
+static int get_masked_mvpred_var(const MACROBLOCK *x, const uint8_t *mask,
+ int mask_stride, const MV *best_mv,
+ const MV *center_mv,
+ const vpx_variance_fn_ptr_t *vfp,
+ int use_mvcost, int is_second) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
+ const MV mv = { best_mv->row * 8, best_mv->col * 8 };
+ unsigned int unused;
+
+ return vfp->mvf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
+ in_what->stride, mask, mask_stride, &unused) +
+ (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
+ x->errorperbit)
+ : 0);
+}
+
+int masked_refining_search_sad(const MACROBLOCK *x, const uint8_t *mask,
+ int mask_stride, MV *ref_mv, int error_per_bit,
+ int search_range,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, int is_second) {
+ const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
+ const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
+ unsigned int best_sad =
+ fn_ptr->msdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
+ in_what->stride, mask, mask_stride) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
+ int i, j;
+
+ for (i = 0; i < search_range; i++) {
+ int best_site = -1;
+
+ for (j = 0; j < 4; j++) {
+ const MV mv = { ref_mv->row + neighbors[j].row,
+ ref_mv->col + neighbors[j].col };
+ if (is_mv_in(x, &mv)) {
+ unsigned int sad =
+ fn_ptr->msdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
+ in_what->stride, mask, mask_stride);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ best_site = j;
+ }
+ }
+ }
+ }
+
+ if (best_site == -1) {
+ break;
+ } else {
+ ref_mv->row += neighbors[best_site].row;
+ ref_mv->col += neighbors[best_site].col;
+ }
+ }
+ return best_sad;
+}
+
+int masked_diamond_search_sad(const MACROBLOCK *x,
+ const search_site_config *cfg,
+ const uint8_t *mask, int mask_stride, MV *ref_mv,
+ MV *best_mv, int search_param, int sad_per_bit,
+ int *num00, const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, int is_second) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
+ // search_param determines the length of the initial step and hence the number
+ // of iterations
+ // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
+ // (MAX_FIRST_STEP/4) pel... etc.
+ const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
+ const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
+ const uint8_t *best_address, *in_what_ref;
+ int best_sad = INT_MAX;
+ int best_site = 0;
+ int last_site = 0;
+ int i, j, step;
+
+ clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ in_what_ref = get_buf_from_mv(in_what, ref_mv);
+ best_address = in_what_ref;
+ *num00 = 0;
+ *best_mv = *ref_mv;
+
+ // Check the starting position
+ best_sad = fn_ptr->msdf(what->buf, what->stride, best_address,
+ in_what->stride, mask, mask_stride) +
+ mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
+
+ i = 1;
+
+ for (step = 0; step < tot_steps; step++) {
+ for (j = 0; j < cfg->searches_per_step; j++) {
+ const MV mv = { best_mv->row + ss[i].mv.row,
+ best_mv->col + ss[i].mv.col };
+ if (is_mv_in(x, &mv)) {
+ int sad =
+ fn_ptr->msdf(what->buf, what->stride, best_address + ss[i].offset,
+ in_what->stride, mask, mask_stride);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ best_site = i;
+ }
+ }
+ }
+
+ i++;
+ }
+
+ if (best_site != last_site) {
+ best_mv->row += ss[best_site].mv.row;
+ best_mv->col += ss[best_site].mv.col;
+ best_address += ss[best_site].offset;
+ last_site = best_site;
+#if defined(NEW_DIAMOND_SEARCH)
+ while (1) {
+ const MV this_mv = { best_mv->row + ss[best_site].mv.row,
+ best_mv->col + ss[best_site].mv.col };
+ if (is_mv_in(x, &this_mv)) {
+ int sad = fn_ptr->msdf(what->buf, what->stride,
+ best_address + ss[best_site].offset,
+ in_what->stride, mask, mask_stride);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ best_mv->row += ss[best_site].mv.row;
+ best_mv->col += ss[best_site].mv.col;
+ best_address += ss[best_site].offset;
+ continue;
+ }
+ }
+ }
+ break;
+ }
+#endif
+ } else if (best_address == in_what_ref) {
+ (*num00)++;
+ }
+ }
+ return best_sad;
+}
+
+int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
+ const uint8_t *mask, int mask_stride,
+ MV *mvp_full, int step_param, int sadpb,
+ int further_steps, int do_refine,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv,
+ int is_second) {
+ MV temp_mv;
+ int thissme, n, num00 = 0;
+ int bestsme = masked_diamond_search_sad(x, &cpi->ss_cfg, mask, mask_stride,
+ mvp_full, &temp_mv, step_param, sadpb,
+ &n, fn_ptr, ref_mv, is_second);
+ if (bestsme < INT_MAX)
+ bestsme = get_masked_mvpred_var(x, mask, mask_stride, &temp_mv, ref_mv,
+ fn_ptr, 1, is_second);
+ *dst_mv = temp_mv;
+
+ // If there won't be more n-step search, check to see if refining search is
+ // needed.
+ if (n > further_steps) do_refine = 0;
+
+ while (n < further_steps) {
+ ++n;
+
+ if (num00) {
+ num00--;
+ } else {
+ thissme = masked_diamond_search_sad(
+ x, &cpi->ss_cfg, mask, mask_stride, mvp_full, &temp_mv,
+ step_param + n, sadpb, &num00, fn_ptr, ref_mv, is_second);
+ if (thissme < INT_MAX)
+ thissme = get_masked_mvpred_var(x, mask, mask_stride, &temp_mv, ref_mv,
+ fn_ptr, 1, is_second);
+
+ // check to see if refining search is needed.
+ if (num00 > further_steps - n) do_refine = 0;
+
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ *dst_mv = temp_mv;
+ }
+ }
+ }
+
+ // final 1-away diamond refining search
+ if (do_refine) {
+ const int search_range = 8;
+ MV best_mv = *dst_mv;
+ thissme =
+ masked_refining_search_sad(x, mask, mask_stride, &best_mv, sadpb,
+ search_range, fn_ptr, ref_mv, is_second);
+ if (thissme < INT_MAX)
+ thissme = get_masked_mvpred_var(x, mask, mask_stride, &best_mv, ref_mv,
+ fn_ptr, 1, is_second);
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ *dst_mv = best_mv;
+ }
+ }
+ return bestsme;
+}
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC
+/* returns subpixel variance error function */
+#define DIST(r, c) \
+ vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
+
+/* checks if (r, c) has better score than previous best */
+#define MVC(r, c) \
+ (mvcost \
+ ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + mvcost[0][((r)-rr)] + \
+ mvcost[1][((c)-rc)]) * \
+ error_per_bit + \
+ 4096) >> \
+ 13 \
+ : 0)
+
+#define CHECK_BETTER(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ thismse = (DIST(r, c)); \
+ if ((v = MVC(r, c) + thismse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+
+#undef CHECK_BETTER0
+#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
+
+#undef CHECK_BETTER1
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ thismse = upsampled_obmc_pref_error( \
+ xd, mask, vfp, z, upre(y, y_stride, r, c), y_stride, w, h, &sse); \
+ if ((v = MVC(r, c) + thismse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+
+static unsigned int setup_obmc_center_error(
+ const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
+ const vpx_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
+ const uint8_t *const y, int y_stride, int offset, int *mvjcost,
+ int *mvcost[2], unsigned int *sse1, int *distortion) {
+ unsigned int besterr;
+ besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1);
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+ return besterr;
+}
+
+static int upsampled_obmc_pref_error(const MACROBLOCKD *xd, const int32_t *mask,
+ const vpx_variance_fn_ptr_t *vfp,
+ const int32_t *const wsrc,
+ const uint8_t *const y, int y_stride,
+ int w, int h, unsigned int *sse) {
+ unsigned int besterr;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
+ vpx_highbd_upsampled_pred(pred16, w, h, y, y_stride);
+
+ besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, mask, sse);
+ } else {
+ DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
+ (void)xd;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vpx_upsampled_pred(pred, w, h, y, y_stride);
+
+ besterr = vfp->ovf(pred, w, wsrc, mask, sse);
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif
+ return besterr;
+}
+
+static unsigned int upsampled_setup_obmc_center_error(
+ const MACROBLOCKD *xd, const int32_t *mask, const MV *bestmv,
+ const MV *ref_mv, int error_per_bit, const vpx_variance_fn_ptr_t *vfp,
+ const int32_t *const wsrc, const uint8_t *const y, int y_stride, int w,
+ int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
+ int *distortion) {
+ unsigned int besterr = upsampled_obmc_pref_error(
+ xd, mask, vfp, wsrc, y + offset, y_stride, w, h, sse1);
+ *distortion = besterr;
+ besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
+ return besterr;
+}
+
+int vp10_find_best_obmc_sub_pixel_tree_up(
+ VP10_COMP *cpi, MACROBLOCK *x, const int32_t *wsrc, const int32_t *mask,
+ int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp,
+ int error_per_bit, const vpx_variance_fn_ptr_t *vfp, int forced_stop,
+ int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1, int is_second, int use_upsampled_ref) {
+ const int *const z = wsrc;
+ const int *const src_address = z;
+ MACROBLOCKD *xd = &x->e_mbd;
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ unsigned int besterr = INT_MAX;
+ unsigned int sse;
+ unsigned int thismse;
+
+ int rr = ref_mv->row;
+ int rc = ref_mv->col;
+ int br = bestmv->row * 8;
+ int bc = bestmv->col * 8;
+ int hstep = 4;
+ int iter;
+ int round = 3 - forced_stop;
+ const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
+ int tr = br;
+ int tc = bc;
+ const MV *search_step = search_step_table;
+ int idx, best_idx = -1;
+ unsigned int cost_array[5];
+ int kr, kc;
+ const int w = 4 * num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ const int h = 4 * num_4x4_blocks_high_lookup[mbmi->sb_type];
+ int offset;
+ int y_stride;
+ const uint8_t *y;
+
+ const struct buf_2d backup_pred = pd->pre[is_second];
+ if (use_upsampled_ref) {
+ int ref = xd->mi[0]->mbmi.ref_frame[is_second];
+ const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
+ setup_pred_plane(&pd->pre[is_second], upsampled_ref->y_buffer,
+ upsampled_ref->y_crop_width, upsampled_ref->y_crop_height,
+ upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3),
+ NULL, pd->subsampling_x, pd->subsampling_y);
+ }
+ y = pd->pre[is_second].buf;
+ y_stride = pd->pre[is_second].stride;
+ offset = bestmv->row * y_stride + bestmv->col;
+
+ if (!(allow_hp && vp10_use_mv_hp(ref_mv)))
+ if (round == 3) round = 2;
+
+ bestmv->row *= 8;
+ bestmv->col *= 8;
+ // use_upsampled_ref can be 0 or 1
+ if (use_upsampled_ref)
+ besterr = upsampled_setup_obmc_center_error(
+ xd, mask, bestmv, ref_mv, error_per_bit, vfp, z, y, y_stride, w, h,
+ (offset << 3), mvjcost, mvcost, sse1, distortion);
+ else
+ besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp,
+ z, y, y_stride, offset, mvjcost, mvcost,
+ sse1, distortion);
+
+ for (iter = 0; iter < round; ++iter) {
+ // Check vertical and horizontal sub-pixel positions.
+ for (idx = 0; idx < 4; ++idx) {
+ tr = br + search_step[idx].row;
+ tc = bc + search_step[idx].col;
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
+ MV this_mv = { tr, tc };
+
+ if (use_upsampled_ref) {
+ const uint8_t *const pre_address = y + tr * y_stride + tc;
+
+ thismse = upsampled_obmc_pref_error(
+ xd, mask, vfp, src_address, pre_address, y_stride, w, h, &sse);
+ } else {
+ const uint8_t *const pre_address =
+ y + (tr >> 3) * y_stride + (tc >> 3);
+ thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, mask, &sse);
+ }
+
+ cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
+ mvcost, error_per_bit);
+ if (cost_array[idx] < besterr) {
+ best_idx = idx;
+ besterr = cost_array[idx];
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+ } else {
+ cost_array[idx] = INT_MAX;
+ }
+ }
+
+ // Check diagonal sub-pixel position
+ kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
+ kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
+
+ tc = bc + kc;
+ tr = br + kr;
+ if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
+ MV this_mv = { tr, tc };
+
+ if (use_upsampled_ref) {
+ const uint8_t *const pre_address = y + tr * y_stride + tc;
+
+ thismse = upsampled_obmc_pref_error(xd, mask, vfp, src_address,
+ pre_address, y_stride, w, h, &sse);
+ } else {
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
+
+ thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr), src_address,
+ mask, &sse);
+ }
+
+ cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
+ error_per_bit);
+
+ if (cost_array[4] < besterr) {
+ best_idx = 4;
+ besterr = cost_array[4];
+ *distortion = thismse;
+ *sse1 = sse;
+ }
+ } else {
+ cost_array[idx] = INT_MAX;
+ }
+
+ if (best_idx < 4 && best_idx >= 0) {
+ br += search_step[best_idx].row;
+ bc += search_step[best_idx].col;
+ } else if (best_idx == 4) {
+ br = tr;
+ bc = tc;
+ }
+
+ if (iters_per_step > 1 && best_idx != -1) {
+ if (use_upsampled_ref) {
+ SECOND_LEVEL_CHECKS_BEST(1);
+ } else {
+ SECOND_LEVEL_CHECKS_BEST(0);
+ }
+ }
+
+ tr = br;
+ tc = bc;
+
+ search_step += 4;
+ hstep >>= 1;
+ best_idx = -1;
+ }
+
+ // These lines insure static analysis doesn't warn that
+ // tr and tc aren't used after the above point.
+ (void)tr;
+ (void)tc;
+
+ bestmv->row = br;
+ bestmv->col = bc;
+
+ if (use_upsampled_ref) {
+ pd->pre[is_second] = backup_pred;
+ }
+
+ if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
+ (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
+ return INT_MAX;
+
+ return besterr;
+}
+
+#undef DIST
+#undef MVC
+#undef CHECK_BETTER
+
+static int get_obmc_mvpred_var(const MACROBLOCK *x, const int32_t *wsrc,
+ const int32_t *mask, const MV *best_mv,
+ const MV *center_mv,
+ const vpx_variance_fn_ptr_t *vfp, int use_mvcost,
+ int is_second) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
+ const MV mv = { best_mv->row * 8, best_mv->col * 8 };
+ unsigned int unused;
+
+ return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride, wsrc,
+ mask, &unused) +
+ (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
+ x->errorperbit)
+ : 0);
+}
+
+int obmc_refining_search_sad(const MACROBLOCK *x, const int32_t *wsrc,
+ const int32_t *mask, MV *ref_mv, int error_per_bit,
+ int search_range,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, int is_second) {
+ const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
+ const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
+ unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv),
+ in_what->stride, wsrc, mask) +
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
+ int i, j;
+
+ for (i = 0; i < search_range; i++) {
+ int best_site = -1;
+
+ for (j = 0; j < 4; j++) {
+ const MV mv = { ref_mv->row + neighbors[j].row,
+ ref_mv->col + neighbors[j].col };
+ if (is_mv_in(x, &mv)) {
+ unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv),
+ in_what->stride, wsrc, mask);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ best_site = j;
+ }
+ }
+ }
+ }
+
+ if (best_site == -1) {
+ break;
+ } else {
+ ref_mv->row += neighbors[best_site].row;
+ ref_mv->col += neighbors[best_site].col;
+ }
+ }
+ return best_sad;
+}
+
+int obmc_diamond_search_sad(const MACROBLOCK *x, const search_site_config *cfg,
+ const int32_t *wsrc, const int32_t *mask,
+ MV *ref_mv, MV *best_mv, int search_param,
+ int sad_per_bit, int *num00,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, int is_second) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
+ // search_param determines the length of the initial step and hence the number
+ // of iterations
+ // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
+ // (MAX_FIRST_STEP/4) pel... etc.
+ const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
+ const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
+ const uint8_t *best_address, *in_what_ref;
+ int best_sad = INT_MAX;
+ int best_site = 0;
+ int last_site = 0;
+ int i, j, step;
+
+ clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
+ in_what_ref = in_what->buf + ref_mv->row * in_what->stride + ref_mv->col;
+ best_address = in_what_ref;
+ *num00 = 0;
+ *best_mv = *ref_mv;
+
+ // Check the starting position
+ best_sad = fn_ptr->osdf(best_address, in_what->stride, wsrc, mask) +
+ mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
+
+ i = 1;
+
+ for (step = 0; step < tot_steps; step++) {
+ for (j = 0; j < cfg->searches_per_step; j++) {
+ const MV mv = { best_mv->row + ss[i].mv.row,
+ best_mv->col + ss[i].mv.col };
+ if (is_mv_in(x, &mv)) {
+ int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride,
+ wsrc, mask);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ best_site = i;
+ }
+ }
+ }
+
+ i++;
+ }
+
+ if (best_site != last_site) {
+ best_mv->row += ss[best_site].mv.row;
+ best_mv->col += ss[best_site].mv.col;
+ best_address += ss[best_site].offset;
+ last_site = best_site;
+#if defined(NEW_DIAMOND_SEARCH)
+ while (1) {
+ const MV this_mv = { best_mv->row + ss[best_site].mv.row,
+ best_mv->col + ss[best_site].mv.col };
+ if (is_mv_in(x, &this_mv)) {
+ int sad = fn_ptr->osdf(best_address + ss[best_site].offset,
+ in_what->stride, wsrc, mask);
+ if (sad < best_sad) {
+ sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
+ if (sad < best_sad) {
+ best_sad = sad;
+ best_mv->row += ss[best_site].mv.row;
+ best_mv->col += ss[best_site].mv.col;
+ best_address += ss[best_site].offset;
+ continue;
+ }
+ }
+ }
+ break;
+ }
+#endif
+ } else if (best_address == in_what_ref) {
+ (*num00)++;
+ }
+ }
+ return best_sad;
+}
+
+int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
+ const int32_t *wsrc, const int32_t *mask,
+ MV *mvp_full, int step_param, int sadpb,
+ int further_steps, int do_refine,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv, int is_second) {
+ MV temp_mv;
+ int thissme, n, num00 = 0;
+ int bestsme =
+ obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full, &temp_mv,
+ step_param, sadpb, &n, fn_ptr, ref_mv, is_second);
+ if (bestsme < INT_MAX)
+ bestsme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr, 1,
+ is_second);
+ *dst_mv = temp_mv;
+
+ // If there won't be more n-step search, check to see if refining search is
+ // needed.
+ if (n > further_steps) do_refine = 0;
+
+ while (n < further_steps) {
+ ++n;
+
+ if (num00) {
+ num00--;
+ } else {
+ thissme = obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full,
+ &temp_mv, step_param + n, sadpb, &num00,
+ fn_ptr, ref_mv, is_second);
+ if (thissme < INT_MAX)
+ thissme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr,
+ 1, is_second);
+
+ // check to see if refining search is needed.
+ if (num00 > further_steps - n) do_refine = 0;
+
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ *dst_mv = temp_mv;
+ }
+ }
+ }
+
+ // final 1-away diamond refining search
+ if (do_refine) {
+ const int search_range = 8;
+ MV best_mv = *dst_mv;
+ thissme = obmc_refining_search_sad(x, wsrc, mask, &best_mv, sadpb,
+ search_range, fn_ptr, ref_mv, is_second);
+ if (thissme < INT_MAX)
+ thissme = get_obmc_mvpred_var(x, wsrc, mask, &best_mv, ref_mv, fn_ptr, 1,
+ is_second);
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ *dst_mv = best_mv;
+ }
+ }
+ return bestsme;
+}
+#endif // CONFIG_OBMC
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
new file mode 100644
index 0000000..d26b9bd
--- /dev/null
+++ b/av1/encoder/mcomp.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_MCOMP_H_
+#define VP10_ENCODER_MCOMP_H_
+
+#include "av1/encoder/block.h"
+#include "aom_dsp/variance.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// The maximum number of steps in a step search given the largest
+// allowed initial step
+#define MAX_MVSEARCH_STEPS 11
+// Max full pel mv specified in the unit of full pixel
+// Enable the use of motion vector in range [-1023, 1023].
+#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS - 1)) - 1)
+// Maximum size of the first step in full pel units
+#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS - 1))
+// Allowed motion vector pixel distance outside image border
+// for Block_16x16
+#define BORDER_MV_PIXELS_B16 (16 + VPX_INTERP_EXTEND)
+
+// motion search site
+typedef struct search_site {
+ MV mv;
+ int offset;
+} search_site;
+
+typedef struct search_site_config {
+ search_site ss[8 * MAX_MVSEARCH_STEPS + 1];
+ int ss_count;
+ int searches_per_step;
+} search_site_config;
+
+void vp10_init_dsmotion_compensation(search_site_config *cfg, int stride);
+void vp10_init3smotion_compensation(search_site_config *cfg, int stride);
+
+void vp10_set_mv_search_range(MACROBLOCK *x, const MV *mv);
+int vp10_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
+ int *mvcost[2], int weight);
+
+// Utility to compute variance + MV rate cost for a given MV
+int vp10_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
+ const MV *center_mv, const vpx_variance_fn_ptr_t *vfp,
+ int use_mvcost);
+int vp10_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
+ const MV *center_mv, const uint8_t *second_pred,
+ const vpx_variance_fn_ptr_t *vfp, int use_mvcost);
+
+struct VP10_COMP;
+struct SPEED_FEATURES;
+
+int vp10_init_search_range(int size);
+
+int vp10_refining_search_sad(struct macroblock *x, struct mv *ref_mv,
+ int sad_per_bit, int distance,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const struct mv *center_mv);
+
+// Runs sequence of diamond searches in smaller steps for RD.
+int vp10_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
+ MV *mvp_full, int step_param, int sadpb,
+ int further_steps, int do_refine, int *cost_list,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv);
+
+// Perform integral projection based motion estimation.
+unsigned int vp10_int_pro_motion_estimation(const struct VP10_COMP *cpi,
+ MACROBLOCK *x, BLOCK_SIZE bsize,
+ int mi_row, int mi_col);
+
+int vp10_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
+ int sad_per_bit, int do_init_search, int *cost_list,
+ const vpx_variance_fn_ptr_t *vfp, int use_mvcost,
+ const MV *center_mv);
+
+typedef int(fractional_mv_step_fp)(
+ MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
+ const vpx_variance_fn_ptr_t *vfp,
+ int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
+ int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
+ int *distortion, unsigned int *sse1, const uint8_t *second_pred, int w,
+ int h, int use_upsampled_ref);
+
+extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree;
+extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned;
+extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned_more;
+extern fractional_mv_step_fp vp10_find_best_sub_pixel_tree_pruned_evenmore;
+
+typedef int (*vp10_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv,
+ int sad_per_bit, int distance,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, MV *best_mv);
+
+typedef int (*vp10_diamond_search_fn_t)(
+ MACROBLOCK *x, const search_site_config *cfg, MV *ref_mv, MV *best_mv,
+ int search_param, int sad_per_bit, int *num00,
+ const vpx_variance_fn_ptr_t *fn_ptr, const MV *center_mv);
+
+int vp10_refining_search_8p_c(MACROBLOCK *x, int error_per_bit,
+ int search_range,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, const uint8_t *second_pred);
+
+struct VP10_COMP;
+
+int vp10_full_pixel_search(struct VP10_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, MV *mvp_full, int step_param,
+ int error_per_bit, int *cost_list, const MV *ref_mv,
+ int var_max, int rd);
+
+#if CONFIG_EXT_INTER
+int vp10_find_best_masked_sub_pixel_tree(
+ const MACROBLOCK *x, const uint8_t *mask, int mask_stride, MV *bestmv,
+ const MV *ref_mv, int allow_hp, int error_per_bit,
+ const vpx_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
+ int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
+ int is_second);
+int vp10_find_best_masked_sub_pixel_tree_up(
+ struct VP10_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, int mask_stride,
+ int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp,
+ int error_per_bit, const vpx_variance_fn_ptr_t *vfp, int forced_stop,
+ int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1, int is_second, int use_upsampled_ref);
+int vp10_masked_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
+ const uint8_t *mask, int mask_stride,
+ MV *mvp_full, int step_param, int sadpb,
+ int further_steps, int do_refine,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv, int is_second);
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_OBMC
+int vp10_obmc_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
+ const int32_t *wsrc, const int32_t *mask,
+ MV *mvp_full, int step_param, int sadpb,
+ int further_steps, int do_refine,
+ const vpx_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv, int is_second);
+int vp10_find_best_obmc_sub_pixel_tree_up(
+ struct VP10_COMP *cpi, MACROBLOCK *x, const int32_t *wsrc,
+ const int32_t *mask, int mi_row, int mi_col, MV *bestmv, const MV *ref_mv,
+ int allow_hp, int error_per_bit, const vpx_variance_fn_ptr_t *vfp,
+ int forced_stop, int iters_per_step, int *mvjcost, int *mvcost[2],
+ int *distortion, unsigned int *sse1, int is_second, int use_upsampled_ref);
+#endif // CONFIG_OBMC
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_MCOMP_H_
diff --git a/av1/encoder/mips/msa/error_msa.c b/av1/encoder/mips/msa/error_msa.c
new file mode 100644
index 0000000..71c5ad3
--- /dev/null
+++ b/av1/encoder/mips/msa/error_msa.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "aom_dsp/mips/macros_msa.h"
+
+#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
+ static int64_t block_error_##BSize##size_msa( \
+ const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
+ int64_t err = 0; \
+ uint32_t loop_cnt; \
+ v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
+ v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
+ v2i64 sq_coeff_r, sq_coeff_l; \
+ v2i64 err0, err_dup0, err1, err_dup1; \
+ \
+ coeff = LD_SH(coeff_ptr); \
+ dq_coeff = LD_SH(dq_coeff_ptr); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \
+ sq_coeff_l); \
+ DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
+ \
+ coeff = LD_SH(coeff_ptr + 8); \
+ dq_coeff = LD_SH(dq_coeff_ptr + 8); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
+ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
+ \
+ coeff_ptr += 16; \
+ dq_coeff_ptr += 16; \
+ \
+ for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
+ coeff = LD_SH(coeff_ptr); \
+ dq_coeff = LD_SH(dq_coeff_ptr); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
+ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
+ \
+ coeff = LD_SH(coeff_ptr + 8); \
+ dq_coeff = LD_SH(dq_coeff_ptr + 8); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
+ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
+ \
+ coeff_ptr += 16; \
+ dq_coeff_ptr += 16; \
+ } \
+ \
+ err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
+ err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
+ sq_coeff_r += err_dup0; \
+ sq_coeff_l += err_dup1; \
+ *ssz = __msa_copy_s_d(sq_coeff_r, 0); \
+ *ssz += __msa_copy_s_d(sq_coeff_l, 0); \
+ \
+ err_dup0 = __msa_splati_d(err0, 1); \
+ err_dup1 = __msa_splati_d(err1, 1); \
+ err0 += err_dup0; \
+ err1 += err_dup1; \
+ err = __msa_copy_s_d(err0, 0); \
+ err += __msa_copy_s_d(err1, 0); \
+ \
+ return err; \
+ }
+
+/* clang-format off */
+BLOCK_ERROR_BLOCKSIZE_MSA(16)
+BLOCK_ERROR_BLOCKSIZE_MSA(64)
+BLOCK_ERROR_BLOCKSIZE_MSA(256)
+BLOCK_ERROR_BLOCKSIZE_MSA(1024)
+/* clang-format on */
+
+int64_t vp10_block_error_msa(const tran_low_t *coeff_ptr,
+ const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
+ int64_t *ssz) {
+ int64_t err;
+ const int16_t *coeff = (const int16_t *)coeff_ptr;
+ const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
+
+ switch (blk_size) {
+ case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
+ case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
+ case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
+ case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
+ default:
+ err = vp10_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
+ break;
+ }
+
+ return err;
+}
diff --git a/av1/encoder/mips/msa/fdct16x16_msa.c b/av1/encoder/mips/msa/fdct16x16_msa.c
new file mode 100644
index 0000000..cda2138
--- /dev/null
+++ b/av1/encoder/mips/msa/fdct16x16_msa.c
@@ -0,0 +1,500 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/enums.h"
+#include "av1/encoder/mips/msa/fdct_msa.h"
+#include "aom_dsp/mips/fwd_txfm_msa.h"
+
+static void fadst16_cols_step1_msa(const int16_t *input, int32_t stride,
+ const int32_t *const0, int16_t *int_buf) {
+ v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+ v8i16 tp0, tp1, tp2, tp3, g0, g1, g2, g3, g8, g9, g10, g11, h0, h1, h2, h3;
+ v4i32 k0, k1, k2, k3;
+
+ /* load input data */
+ r0 = LD_SH(input);
+ r15 = LD_SH(input + 15 * stride);
+ r7 = LD_SH(input + 7 * stride);
+ r8 = LD_SH(input + 8 * stride);
+ SLLI_4V(r0, r15, r7, r8, 2);
+
+ /* stage 1 */
+ LD_SW2(const0, 4, k0, k1);
+ LD_SW2(const0 + 8, 4, k2, k3);
+ MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3);
+
+ r3 = LD_SH(input + 3 * stride);
+ r4 = LD_SH(input + 4 * stride);
+ r11 = LD_SH(input + 11 * stride);
+ r12 = LD_SH(input + 12 * stride);
+ SLLI_4V(r3, r4, r11, r12, 2);
+
+ LD_SW2(const0 + 4 * 4, 4, k0, k1);
+ LD_SW2(const0 + 4 * 6, 4, k2, k3);
+ MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11);
+
+ /* stage 2 */
+ BUTTERFLY_4(g0, g2, g10, g8, tp0, tp2, tp3, tp1);
+ ST_SH2(tp0, tp2, int_buf, 8);
+ ST_SH2(tp1, tp3, int_buf + 4 * 8, 8);
+
+ LD_SW2(const0 + 4 * 8, 4, k0, k1);
+ k2 = LD_SW(const0 + 4 * 10);
+ MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3);
+
+ ST_SH2(h0, h1, int_buf + 8 * 8, 8);
+ ST_SH2(h3, h2, int_buf + 12 * 8, 8);
+
+ r9 = LD_SH(input + 9 * stride);
+ r6 = LD_SH(input + 6 * stride);
+ r1 = LD_SH(input + stride);
+ r14 = LD_SH(input + 14 * stride);
+ SLLI_4V(r9, r6, r1, r14, 2);
+
+ LD_SW2(const0 + 4 * 11, 4, k0, k1);
+ LD_SW2(const0 + 4 * 13, 4, k2, k3);
+ MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g0, g1, g2, g3);
+
+ ST_SH2(g1, g3, int_buf + 3 * 8, 4 * 8);
+
+ r13 = LD_SH(input + 13 * stride);
+ r2 = LD_SH(input + 2 * stride);
+ r5 = LD_SH(input + 5 * stride);
+ r10 = LD_SH(input + 10 * stride);
+ SLLI_4V(r13, r2, r5, r10, 2);
+
+ LD_SW2(const0 + 4 * 15, 4, k0, k1);
+ LD_SW2(const0 + 4 * 17, 4, k2, k3);
+ MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, h0, h1, h2, h3);
+
+ ST_SH2(h1, h3, int_buf + 11 * 8, 4 * 8);
+
+ BUTTERFLY_4(h0, h2, g2, g0, tp0, tp1, tp2, tp3);
+ ST_SH4(tp0, tp1, tp2, tp3, int_buf + 2 * 8, 4 * 8);
+}
+
+static void fadst16_cols_step2_msa(int16_t *int_buf, const int32_t *const0,
+ int16_t *out) {
+ int16_t *out_ptr = out + 128;
+ v8i16 tp0, tp1, tp2, tp3, g5, g7, g13, g15;
+ v8i16 h0, h1, h2, h3, h4, h5, h6, h7, h10, h11;
+ v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
+ v8i16 out8, out9, out10, out11, out12, out13, out14, out15;
+ v4i32 k0, k1, k2, k3;
+
+ LD_SH2(int_buf + 3 * 8, 4 * 8, g13, g15);
+ LD_SH2(int_buf + 11 * 8, 4 * 8, g5, g7);
+ LD_SW2(const0 + 4 * 19, 4, k0, k1);
+ k2 = LD_SW(const0 + 4 * 21);
+ MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7);
+
+ tp0 = LD_SH(int_buf + 4 * 8);
+ tp1 = LD_SH(int_buf + 5 * 8);
+ tp3 = LD_SH(int_buf + 10 * 8);
+ tp2 = LD_SH(int_buf + 14 * 8);
+ LD_SW2(const0 + 4 * 22, 4, k0, k1);
+ k2 = LD_SW(const0 + 4 * 24);
+ MADD_BF(tp0, tp1, tp2, tp3, k0, k1, k2, k0, out4, out6, out5, out7);
+ out4 = -out4;
+ ST_SH(out4, (out + 3 * 16));
+ ST_SH(out5, (out_ptr + 4 * 16));
+
+ h1 = LD_SH(int_buf + 9 * 8);
+ h3 = LD_SH(int_buf + 12 * 8);
+ MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15);
+ out13 = -out13;
+ ST_SH(out12, (out + 2 * 16));
+ ST_SH(out13, (out_ptr + 5 * 16));
+
+ tp0 = LD_SH(int_buf);
+ tp1 = LD_SH(int_buf + 8);
+ tp2 = LD_SH(int_buf + 2 * 8);
+ tp3 = LD_SH(int_buf + 6 * 8);
+
+ BUTTERFLY_4(tp0, tp1, tp3, tp2, out0, out1, h11, h10);
+ out1 = -out1;
+ ST_SH(out0, (out));
+ ST_SH(out1, (out_ptr + 7 * 16));
+
+ h0 = LD_SH(int_buf + 8 * 8);
+ h2 = LD_SH(int_buf + 13 * 8);
+
+ BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10);
+ out8 = -out8;
+ ST_SH(out8, (out + 16));
+ ST_SH(out9, (out_ptr + 6 * 16));
+
+ /* stage 4 */
+ LD_SW2(const0 + 4 * 25, 4, k0, k1);
+ LD_SW2(const0 + 4 * 27, 4, k2, k3);
+ MADD_SHORT(h10, h11, k1, k2, out2, out3);
+ ST_SH(out2, (out + 7 * 16));
+ ST_SH(out3, (out_ptr));
+
+ MADD_SHORT(out6, out7, k0, k3, out6, out7);
+ ST_SH(out6, (out + 4 * 16));
+ ST_SH(out7, (out_ptr + 3 * 16));
+
+ MADD_SHORT(out10, out11, k0, k3, out10, out11);
+ ST_SH(out10, (out + 6 * 16));
+ ST_SH(out11, (out_ptr + 16));
+
+ MADD_SHORT(out14, out15, k1, k2, out14, out15);
+ ST_SH(out14, (out + 5 * 16));
+ ST_SH(out15, (out_ptr + 2 * 16));
+}
+
+static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
+ v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+ v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
+
+ /* load input data */
+ LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
+ TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
+ r7);
+ FDCT_POSTPROC_2V_NEG_H(r0, r1);
+ FDCT_POSTPROC_2V_NEG_H(r2, r3);
+ FDCT_POSTPROC_2V_NEG_H(r4, r5);
+ FDCT_POSTPROC_2V_NEG_H(r6, r7);
+ ST_SH8(r0, r1, r2, r3, r4, r5, r6, r7, out, 8);
+ out += 64;
+
+ LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
+ TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
+ r12, r13, r14, r15);
+ FDCT_POSTPROC_2V_NEG_H(r8, r9);
+ FDCT_POSTPROC_2V_NEG_H(r10, r11);
+ FDCT_POSTPROC_2V_NEG_H(r12, r13);
+ FDCT_POSTPROC_2V_NEG_H(r14, r15);
+ ST_SH8(r8, r9, r10, r11, r12, r13, r14, r15, out, 8);
+ out += 64;
+
+ /* load input data */
+ input += 128;
+ LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
+ TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
+ r7);
+ FDCT_POSTPROC_2V_NEG_H(r0, r1);
+ FDCT_POSTPROC_2V_NEG_H(r2, r3);
+ FDCT_POSTPROC_2V_NEG_H(r4, r5);
+ FDCT_POSTPROC_2V_NEG_H(r6, r7);
+ ST_SH8(r0, r1, r2, r3, r4, r5, r6, r7, out, 8);
+ out += 64;
+
+ LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
+ TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
+ r12, r13, r14, r15);
+ FDCT_POSTPROC_2V_NEG_H(r8, r9);
+ FDCT_POSTPROC_2V_NEG_H(r10, r11);
+ FDCT_POSTPROC_2V_NEG_H(r12, r13);
+ FDCT_POSTPROC_2V_NEG_H(r14, r15);
+ ST_SH8(r8, r9, r10, r11, r12, r13, r14, r15, out, 8);
+}
+
+static void fadst16_rows_step1_msa(int16_t *input, const int32_t *const0,
+ int16_t *int_buf) {
+ v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+ v8i16 tp0, tp1, tp2, tp3, g0, g1, g2, g3, g8, g9, g10, g11, h0, h1, h2, h3;
+ v4i32 k0, k1, k2, k3;
+
+ /* load input data */
+ r0 = LD_SH(input);
+ r7 = LD_SH(input + 7 * 8);
+ r8 = LD_SH(input + 8 * 8);
+ r15 = LD_SH(input + 15 * 8);
+
+ /* stage 1 */
+ LD_SW2(const0, 4, k0, k1);
+ LD_SW2(const0 + 4 * 2, 4, k2, k3);
+ MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3);
+
+ r3 = LD_SH(input + 3 * 8);
+ r4 = LD_SH(input + 4 * 8);
+ r11 = LD_SH(input + 11 * 8);
+ r12 = LD_SH(input + 12 * 8);
+
+ LD_SW2(const0 + 4 * 4, 4, k0, k1);
+ LD_SW2(const0 + 4 * 6, 4, k2, k3);
+ MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11);
+
+ /* stage 2 */
+ BUTTERFLY_4(g0, g2, g10, g8, tp0, tp2, tp3, tp1);
+ ST_SH2(tp0, tp1, int_buf, 4 * 8);
+ ST_SH2(tp2, tp3, int_buf + 8, 4 * 8);
+
+ LD_SW2(const0 + 4 * 8, 4, k0, k1);
+ k2 = LD_SW(const0 + 4 * 10);
+ MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3);
+ ST_SH2(h0, h3, int_buf + 8 * 8, 4 * 8);
+ ST_SH2(h1, h2, int_buf + 9 * 8, 4 * 8);
+
+ r1 = LD_SH(input + 8);
+ r6 = LD_SH(input + 6 * 8);
+ r9 = LD_SH(input + 9 * 8);
+ r14 = LD_SH(input + 14 * 8);
+
+ LD_SW2(const0 + 4 * 11, 4, k0, k1);
+ LD_SW2(const0 + 4 * 13, 4, k2, k3);
+ MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g0, g1, g2, g3);
+ ST_SH2(g1, g3, int_buf + 3 * 8, 4 * 8);
+
+ r2 = LD_SH(input + 2 * 8);
+ r5 = LD_SH(input + 5 * 8);
+ r10 = LD_SH(input + 10 * 8);
+ r13 = LD_SH(input + 13 * 8);
+
+ LD_SW2(const0 + 4 * 15, 4, k0, k1);
+ LD_SW2(const0 + 4 * 17, 4, k2, k3);
+ MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, h0, h1, h2, h3);
+ ST_SH2(h1, h3, int_buf + 11 * 8, 4 * 8);
+ BUTTERFLY_4(h0, h2, g2, g0, tp0, tp1, tp2, tp3);
+ ST_SH4(tp0, tp1, tp2, tp3, int_buf + 2 * 8, 4 * 8);
+}
+
+static void fadst16_rows_step2_msa(int16_t *int_buf, const int32_t *const0,
+ int16_t *out) {
+ int16_t *out_ptr = out + 8;
+ v8i16 tp0, tp1, tp2, tp3, g5, g7, g13, g15;
+ v8i16 h0, h1, h2, h3, h4, h5, h6, h7, h10, h11;
+ v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
+ v8i16 out8, out9, out10, out11, out12, out13, out14, out15;
+ v4i32 k0, k1, k2, k3;
+
+ g13 = LD_SH(int_buf + 3 * 8);
+ g15 = LD_SH(int_buf + 7 * 8);
+ g5 = LD_SH(int_buf + 11 * 8);
+ g7 = LD_SH(int_buf + 15 * 8);
+
+ LD_SW2(const0 + 4 * 19, 4, k0, k1);
+ k2 = LD_SW(const0 + 4 * 21);
+ MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7);
+
+ tp0 = LD_SH(int_buf + 4 * 8);
+ tp1 = LD_SH(int_buf + 5 * 8);
+ tp3 = LD_SH(int_buf + 10 * 8);
+ tp2 = LD_SH(int_buf + 14 * 8);
+
+ LD_SW2(const0 + 4 * 22, 4, k0, k1);
+ k2 = LD_SW(const0 + 4 * 24);
+ MADD_BF(tp0, tp1, tp2, tp3, k0, k1, k2, k0, out4, out6, out5, out7);
+ out4 = -out4;
+ ST_SH(out4, (out + 3 * 16));
+ ST_SH(out5, (out_ptr + 4 * 16));
+
+ h1 = LD_SH(int_buf + 9 * 8);
+ h3 = LD_SH(int_buf + 12 * 8);
+ MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15);
+ out13 = -out13;
+ ST_SH(out12, (out + 2 * 16));
+ ST_SH(out13, (out_ptr + 5 * 16));
+
+ tp0 = LD_SH(int_buf);
+ tp1 = LD_SH(int_buf + 8);
+ tp2 = LD_SH(int_buf + 2 * 8);
+ tp3 = LD_SH(int_buf + 6 * 8);
+
+ BUTTERFLY_4(tp0, tp1, tp3, tp2, out0, out1, h11, h10);
+ out1 = -out1;
+ ST_SH(out0, (out));
+ ST_SH(out1, (out_ptr + 7 * 16));
+
+ h0 = LD_SH(int_buf + 8 * 8);
+ h2 = LD_SH(int_buf + 13 * 8);
+ BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10);
+ out8 = -out8;
+ ST_SH(out8, (out + 16));
+ ST_SH(out9, (out_ptr + 6 * 16));
+
+ /* stage 4 */
+ LD_SW2(const0 + 4 * 25, 4, k0, k1);
+ LD_SW2(const0 + 4 * 27, 4, k2, k3);
+ MADD_SHORT(h10, h11, k1, k2, out2, out3);
+ ST_SH(out2, (out + 7 * 16));
+ ST_SH(out3, (out_ptr));
+
+ MADD_SHORT(out6, out7, k0, k3, out6, out7);
+ ST_SH(out6, (out + 4 * 16));
+ ST_SH(out7, (out_ptr + 3 * 16));
+
+ MADD_SHORT(out10, out11, k0, k3, out10, out11);
+ ST_SH(out10, (out + 6 * 16));
+ ST_SH(out11, (out_ptr + 16));
+
+ MADD_SHORT(out14, out15, k1, k2, out14, out15);
+ ST_SH(out14, (out + 5 * 16));
+ ST_SH(out15, (out_ptr + 2 * 16));
+}
+
+static void fadst16_transpose_msa(int16_t *input, int16_t *out) {
+ v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+ v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
+
+ /* load input data */
+ LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14,
+ l7, l15);
+ TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
+ r7);
+ TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
+ r12, r13, r14, r15);
+ ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
+ ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
+ out += 16 * 8;
+
+ /* load input data */
+ input += 128;
+ LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14,
+ l7, l15);
+ TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6,
+ r7);
+ TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11,
+ r12, r13, r14, r15);
+ ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
+ ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
+}
+
+static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
+ int16_t *temp = intermediate;
+ int16_t *out = output;
+ v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11;
+ v8i16 in12, in13, in14, in15;
+
+ LD_SH8(temp, 16, in0, in1, in2, in3, in4, in5, in6, in7);
+ temp = intermediate + 8;
+ LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9,
+ in10, in11, in12, in13, in14, in15);
+ FDCT_POSTPROC_2V_NEG_H(in0, in1);
+ FDCT_POSTPROC_2V_NEG_H(in2, in3);
+ FDCT_POSTPROC_2V_NEG_H(in4, in5);
+ FDCT_POSTPROC_2V_NEG_H(in6, in7);
+ FDCT_POSTPROC_2V_NEG_H(in8, in9);
+ FDCT_POSTPROC_2V_NEG_H(in10, in11);
+ FDCT_POSTPROC_2V_NEG_H(in12, in13);
+ FDCT_POSTPROC_2V_NEG_H(in14, in15);
+ BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11,
+ in12, in13, in14, in15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6,
+ tmp7, in8, in9, in10, in11, in12, in13, in14, in15);
+ temp = intermediate;
+ ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, temp, 16);
+ FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp0, tmp1,
+ tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+ temp = intermediate;
+ LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
+ FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, tmp0, in0,
+ tmp1, in1, tmp2, in2, tmp3, in3);
+ ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, out, 16);
+ TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, tmp4, in4,
+ tmp5, in5, tmp6, in6, tmp7, in7);
+ out = output + 8;
+ ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, out, 16);
+}
+
+void vp10_fht16x16_msa(const int16_t *input, int16_t *output, int32_t stride,
+ int32_t tx_type) {
+ DECLARE_ALIGNED(32, int16_t, tmp[256]);
+ DECLARE_ALIGNED(32, int16_t, trans_buf[256]);
+ DECLARE_ALIGNED(32, int16_t, tmp_buf[128]);
+ int32_t i;
+ int16_t *ptmpbuf = &tmp_buf[0];
+ int16_t *trans = &trans_buf[0];
+ const int32_t const_arr[29 * 4] = {
+ 52707308, 52707308, 52707308, 52707308, -1072430300,
+ -1072430300, -1072430300, -1072430300, 795618043, 795618043,
+ 795618043, 795618043, -721080468, -721080468, -721080468,
+ -721080468, 459094491, 459094491, 459094491, 459094491,
+ -970646691, -970646691, -970646691, -970646691, 1010963856,
+ 1010963856, 1010963856, 1010963856, -361743294, -361743294,
+ -361743294, -361743294, 209469125, 209469125, 209469125,
+ 209469125, -1053094788, -1053094788, -1053094788, -1053094788,
+ 1053160324, 1053160324, 1053160324, 1053160324, 639644520,
+ 639644520, 639644520, 639644520, -862444000, -862444000,
+ -862444000, -862444000, 1062144356, 1062144356, 1062144356,
+ 1062144356, -157532337, -157532337, -157532337, -157532337,
+ 260914709, 260914709, 260914709, 260914709, -1041559667,
+ -1041559667, -1041559667, -1041559667, 920985831, 920985831,
+ 920985831, 920985831, -551995675, -551995675, -551995675,
+ -551995675, 596522295, 596522295, 596522295, 596522295,
+ 892853362, 892853362, 892853362, 892853362, -892787826,
+ -892787826, -892787826, -892787826, 410925857, 410925857,
+ 410925857, 410925857, -992012162, -992012162, -992012162,
+ -992012162, 992077698, 992077698, 992077698, 992077698,
+ 759246145, 759246145, 759246145, 759246145, -759180609,
+ -759180609, -759180609, -759180609, -759222975, -759222975,
+ -759222975, -759222975, 759288511, 759288511, 759288511,
+ 759288511
+ };
+
+ switch (tx_type) {
+ case DCT_DCT:
+ /* column transform */
+ for (i = 0; i < 2; ++i) {
+ fdct8x16_1d_column(input + 8 * i, tmp + 8 * i, stride);
+ }
+
+ /* row transform */
+ for (i = 0; i < 2; ++i) {
+ fdct16x8_1d_row(tmp + (128 * i), output + (128 * i));
+ }
+ break;
+ case ADST_DCT:
+ /* column transform */
+ for (i = 0; i < 2; ++i) {
+ fadst16_cols_step1_msa(input + (i << 3), stride, const_arr, ptmpbuf);
+ fadst16_cols_step2_msa(ptmpbuf, const_arr, tmp + (i << 3));
+ }
+
+ /* row transform */
+ for (i = 0; i < 2; ++i) {
+ postproc_fdct16x8_1d_row(tmp + (128 * i), output + (128 * i));
+ }
+ break;
+ case DCT_ADST:
+ /* column transform */
+ for (i = 0; i < 2; ++i) {
+ fdct8x16_1d_column(input + 8 * i, tmp + 8 * i, stride);
+ }
+
+ fadst16_transpose_postproc_msa(tmp, trans);
+
+ /* row transform */
+ for (i = 0; i < 2; ++i) {
+ fadst16_rows_step1_msa(trans + (i << 7), const_arr, ptmpbuf);
+ fadst16_rows_step2_msa(ptmpbuf, const_arr, tmp + (i << 7));
+ }
+
+ fadst16_transpose_msa(tmp, output);
+ break;
+ case ADST_ADST:
+ /* column transform */
+ for (i = 0; i < 2; ++i) {
+ fadst16_cols_step1_msa(input + (i << 3), stride, const_arr, ptmpbuf);
+ fadst16_cols_step2_msa(ptmpbuf, const_arr, tmp + (i << 3));
+ }
+
+ fadst16_transpose_postproc_msa(tmp, trans);
+
+ /* row transform */
+ for (i = 0; i < 2; ++i) {
+ fadst16_rows_step1_msa(trans + (i << 7), const_arr, ptmpbuf);
+ fadst16_rows_step2_msa(ptmpbuf, const_arr, tmp + (i << 7));
+ }
+
+ fadst16_transpose_msa(tmp, output);
+ break;
+ default: assert(0); break;
+ }
+}
diff --git a/av1/encoder/mips/msa/fdct4x4_msa.c b/av1/encoder/mips/msa/fdct4x4_msa.c
new file mode 100644
index 0000000..a3731c3
--- /dev/null
+++ b/av1/encoder/mips/msa/fdct4x4_msa.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/enums.h"
+#include "av1/encoder/mips/msa/fdct_msa.h"
+
+void vp10_fwht4x4_msa(const int16_t *input, int16_t *output,
+ int32_t src_stride) {
+ v8i16 in0, in1, in2, in3, in4;
+
+ LD_SH4(input, src_stride, in0, in1, in2, in3);
+
+ in0 += in1;
+ in3 -= in2;
+ in4 = (in0 - in3) >> 1;
+ SUB2(in4, in1, in4, in2, in1, in2);
+ in0 -= in2;
+ in3 += in1;
+
+ TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1);
+
+ in0 += in2;
+ in1 -= in3;
+ in4 = (in0 - in1) >> 1;
+ SUB2(in4, in2, in4, in3, in2, in3);
+ in0 -= in3;
+ in1 += in2;
+
+ SLLI_4V(in0, in1, in2, in3, 2);
+
+ TRANSPOSE4x4_SH_SH(in0, in3, in1, in2, in0, in3, in1, in2);
+
+ ST4x2_UB(in0, output, 4);
+ ST4x2_UB(in3, output + 4, 4);
+ ST4x2_UB(in1, output + 8, 4);
+ ST4x2_UB(in2, output + 12, 4);
+}
+
+void vp10_fht4x4_msa(const int16_t *input, int16_t *output, int32_t stride,
+ int32_t tx_type) {
+ v8i16 in0, in1, in2, in3;
+
+ LD_SH4(input, stride, in0, in1, in2, in3);
+
+ /* fdct4 pre-process */
+ {
+ v8i16 temp, mask;
+ v16i8 zero = { 0 };
+ v16i8 one = __msa_ldi_b(1);
+
+ mask = (v8i16)__msa_sldi_b(zero, one, 15);
+ SLLI_4V(in0, in1, in2, in3, 4);
+ temp = __msa_ceqi_h(in0, 0);
+ temp = (v8i16)__msa_xori_b((v16u8)temp, 255);
+ temp = mask & temp;
+ in0 += temp;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT:
+ VPX_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VPX_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ case ADST_DCT:
+ VPX_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VPX_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ case DCT_ADST:
+ VPX_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VPX_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ case ADST_ADST:
+ VPX_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VPX_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ default: assert(0); break;
+ }
+
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ ADD4(in0, 1, in1, 1, in2, 1, in3, 1, in0, in1, in2, in3);
+ SRA_4V(in0, in1, in2, in3, 2);
+ PCKEV_D2_SH(in1, in0, in3, in2, in0, in2);
+ ST_SH2(in0, in2, output, 8);
+}
diff --git a/av1/encoder/mips/msa/fdct8x8_msa.c b/av1/encoder/mips/msa/fdct8x8_msa.c
new file mode 100644
index 0000000..3b6532a
--- /dev/null
+++ b/av1/encoder/mips/msa/fdct8x8_msa.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "av1/common/enums.h"
+#include "av1/encoder/mips/msa/fdct_msa.h"
+
+void vp10_fht8x8_msa(const int16_t *input, int16_t *output, int32_t stride,
+ int32_t tx_type) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+ LD_SH8(input, stride, in0, in1, in2, in3, in4, in5, in6, in7);
+ SLLI_4V(in0, in1, in2, in3, 2);
+ SLLI_4V(in4, in5, in6, in7, 2);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+ in3, in4, in5, in6, in7);
+ VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ break;
+ case ADST_DCT:
+ VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+ in3, in4, in5, in6, in7);
+ VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ break;
+ case DCT_ADST:
+ VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+ in3, in4, in5, in6, in7);
+ VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ break;
+ case ADST_ADST:
+ VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2,
+ in3, in4, in5, in6, in7);
+ VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4,
+ in5, in6, in7);
+ break;
+ default: assert(0); break;
+ }
+
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3,
+ in4, in5, in6, in7);
+ SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7);
+ ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 8);
+}
diff --git a/av1/encoder/mips/msa/fdct_msa.h b/av1/encoder/mips/msa/fdct_msa.h
new file mode 100644
index 0000000..07471d0
--- /dev/null
+++ b/av1/encoder/mips/msa/fdct_msa.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_MIPS_MSA_VP10_FDCT_MSA_H_
+#define VP10_ENCODER_MIPS_MSA_VP10_FDCT_MSA_H_
+
+#include "aom_dsp/mips/fwd_txfm_msa.h"
+#include "aom_dsp/mips/txfm_macros_msa.h"
+#include "aom_ports/mem.h"
+
+#define VPX_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \
+ out3, out4, out5, out6, out7) \
+ { \
+ v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \
+ v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \
+ v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \
+ cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \
+ v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \
+ cospi_24_64, -cospi_24_64, 0, 0 }; \
+ \
+ SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \
+ cnst2_m = -cnst0_m; \
+ ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
+ SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \
+ cnst4_m = -cnst2_m; \
+ ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
+ \
+ ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \
+ ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \
+ DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \
+ cnst2_m, cnst3_m, in7, in0, in4, in3); \
+ \
+ SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \
+ cnst2_m = -cnst0_m; \
+ ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
+ SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \
+ cnst4_m = -cnst2_m; \
+ ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
+ \
+ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
+ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
+ \
+ DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \
+ cnst2_m, cnst3_m, in5, in2, in6, in1); \
+ BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \
+ out7 = -s0_m; \
+ out0 = s1_m; \
+ \
+ SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \
+ \
+ ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \
+ cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ cnst1_m = cnst0_m; \
+ \
+ ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \
+ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
+ DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst2_m, \
+ cnst3_m, cnst1_m, out1, out6, s0_m, s1_m); \
+ \
+ SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \
+ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ \
+ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
+ ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \
+ out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
+ out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
+ out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \
+ out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \
+ \
+ out1 = -out1; \
+ out3 = -out3; \
+ out5 = -out5; \
+ }
+
+#define VPX_FADST4(in0, in1, in2, in3, out0, out1, out2, out3) \
+ { \
+ v4i32 s0_m, s1_m, s2_m, s3_m, constant_m; \
+ v4i32 in0_r_m, in1_r_m, in2_r_m, in3_r_m; \
+ \
+ UNPCK_R_SH_SW(in0, in0_r_m); \
+ UNPCK_R_SH_SW(in1, in1_r_m); \
+ UNPCK_R_SH_SW(in2, in2_r_m); \
+ UNPCK_R_SH_SW(in3, in3_r_m); \
+ \
+ constant_m = __msa_fill_w(sinpi_4_9); \
+ MUL2(in0_r_m, constant_m, in3_r_m, constant_m, s1_m, s0_m); \
+ \
+ constant_m = __msa_fill_w(sinpi_1_9); \
+ s0_m += in0_r_m * constant_m; \
+ s1_m -= in1_r_m * constant_m; \
+ \
+ constant_m = __msa_fill_w(sinpi_2_9); \
+ s0_m += in1_r_m * constant_m; \
+ s1_m += in3_r_m * constant_m; \
+ \
+ s2_m = in0_r_m + in1_r_m - in3_r_m; \
+ \
+ constant_m = __msa_fill_w(sinpi_3_9); \
+ MUL2(in2_r_m, constant_m, s2_m, constant_m, s3_m, in1_r_m); \
+ \
+ in0_r_m = s0_m + s3_m; \
+ s2_m = s1_m - s3_m; \
+ s3_m = s1_m - s0_m + s3_m; \
+ \
+ SRARI_W4_SW(in0_r_m, in1_r_m, s2_m, s3_m, DCT_CONST_BITS); \
+ PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, s3_m, s3_m, \
+ out0, out1, out2, out3); \
+ }
+#endif // VP10_ENCODER_MIPS_MSA_VP10_FDCT_MSA_H_
diff --git a/av1/encoder/mips/msa/temporal_filter_msa.c b/av1/encoder/mips/msa/temporal_filter_msa.c
new file mode 100644
index 0000000..4d60d37
--- /dev/null
+++ b/av1/encoder/mips/msa/temporal_filter_msa.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp10_rtcd.h"
+#include "aom_dsp/mips/macros_msa.h"
+
+static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, uint32_t stride,
+ uint8_t *frm2_ptr, int32_t filt_sth,
+ int32_t filt_wgt, uint32_t *acc,
+ uint16_t *cnt) {
+ uint32_t row;
+ uint64_t f0, f1, f2, f3;
+ v16i8 frm2, frm1 = { 0 };
+ v16i8 frm4, frm3 = { 0 };
+ v16u8 frm_r, frm_l;
+ v8i16 frm2_r, frm2_l;
+ v8i16 diff0, diff1, mod0_h, mod1_h;
+ v4i32 cnst3, cnst16, filt_wt, strength;
+ v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
+ v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
+ v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
+ v4i32 acc0, acc1, acc2, acc3;
+ v8i16 cnt0, cnt1;
+
+ filt_wt = __msa_fill_w(filt_wgt);
+ strength = __msa_fill_w(filt_sth);
+ cnst3 = __msa_ldi_w(3);
+ cnst16 = __msa_ldi_w(16);
+
+ for (row = 2; row--;) {
+ LD4(frm1_ptr, stride, f0, f1, f2, f3);
+ frm1_ptr += (4 * stride);
+
+ LD_SB2(frm2_ptr, 16, frm2, frm4);
+ frm2_ptr += 32;
+
+ LD_SW2(acc, 4, acc0, acc1);
+ LD_SW2(acc + 8, 4, acc2, acc3);
+ LD_SH2(cnt, 8, cnt0, cnt1);
+
+ INSERT_D2_SB(f0, f1, frm1);
+ INSERT_D2_SB(f2, f3, frm3);
+ ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
+ HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
+ UNPCK_SH_SW(diff0, diff0_r, diff0_l);
+ UNPCK_SH_SW(diff1, diff1_r, diff1_l);
+ MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
+ mod1_w, mod2_w, mod3_w);
+ SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
+
+ diff0_r = (mod0_w < cnst16);
+ diff0_l = (mod1_w < cnst16);
+ diff1_r = (mod2_w < cnst16);
+ diff1_l = (mod3_w < cnst16);
+
+ SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
+ mod1_w, mod2_w, mod3_w);
+
+ mod0_w = diff0_r & mod0_w;
+ mod1_w = diff0_l & mod1_w;
+ mod2_w = diff1_r & mod2_w;
+ mod3_w = diff1_l & mod3_w;
+
+ MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
+ ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
+ ST_SH2(mod0_h, mod1_h, cnt, 8);
+ cnt += 16;
+
+ UNPCK_UB_SH(frm2, frm2_r, frm2_l);
+ UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
+ UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
+ MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
+ mod2_w, mod3_w);
+
+ ST_SW2(mod0_w, mod1_w, acc, 4);
+ acc += 8;
+ ST_SW2(mod2_w, mod3_w, acc, 4);
+ acc += 8;
+
+ LD_SW2(acc, 4, acc0, acc1);
+ LD_SW2(acc + 8, 4, acc2, acc3);
+ LD_SH2(cnt, 8, cnt0, cnt1);
+
+ ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
+ HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
+ UNPCK_SH_SW(diff0, diff0_r, diff0_l);
+ UNPCK_SH_SW(diff1, diff1_r, diff1_l);
+ MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
+ mod1_w, mod2_w, mod3_w);
+ SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
+
+ diff0_r = (mod0_w < cnst16);
+ diff0_l = (mod1_w < cnst16);
+ diff1_r = (mod2_w < cnst16);
+ diff1_l = (mod3_w < cnst16);
+
+ SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
+ mod1_w, mod2_w, mod3_w);
+
+ mod0_w = diff0_r & mod0_w;
+ mod1_w = diff0_l & mod1_w;
+ mod2_w = diff1_r & mod2_w;
+ mod3_w = diff1_l & mod3_w;
+
+ MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
+ ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
+ ST_SH2(mod0_h, mod1_h, cnt, 8);
+ cnt += 16;
+ UNPCK_UB_SH(frm4, frm2_r, frm2_l);
+ UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
+ UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
+ MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
+ mod2_w, mod3_w);
+
+ ST_SW2(mod0_w, mod1_w, acc, 4);
+ acc += 8;
+ ST_SW2(mod2_w, mod3_w, acc, 4);
+ acc += 8;
+ }
+}
+
+static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, uint32_t stride,
+ uint8_t *frm2_ptr,
+ int32_t filt_sth, int32_t filt_wgt,
+ uint32_t *acc, uint16_t *cnt) {
+ uint32_t row;
+ v16i8 frm1, frm2, frm3, frm4;
+ v16u8 frm_r, frm_l;
+ v16i8 zero = { 0 };
+ v8u16 frm2_r, frm2_l;
+ v8i16 diff0, diff1, mod0_h, mod1_h;
+ v4i32 cnst3, cnst16, filt_wt, strength;
+ v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
+ v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
+ v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
+ v4i32 acc0, acc1, acc2, acc3;
+ v8i16 cnt0, cnt1;
+
+ filt_wt = __msa_fill_w(filt_wgt);
+ strength = __msa_fill_w(filt_sth);
+ cnst3 = __msa_ldi_w(3);
+ cnst16 = __msa_ldi_w(16);
+
+ for (row = 8; row--;) {
+ LD_SB2(frm1_ptr, stride, frm1, frm3);
+ frm1_ptr += stride;
+
+ LD_SB2(frm2_ptr, 16, frm2, frm4);
+ frm2_ptr += 16;
+
+ LD_SW2(acc, 4, acc0, acc1);
+ LD_SW2(acc, 4, acc2, acc3);
+ LD_SH2(cnt, 8, cnt0, cnt1);
+
+ ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
+ HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
+ UNPCK_SH_SW(diff0, diff0_r, diff0_l);
+ UNPCK_SH_SW(diff1, diff1_r, diff1_l);
+ MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
+ mod1_w, mod2_w, mod3_w);
+ SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
+
+ diff0_r = (mod0_w < cnst16);
+ diff0_l = (mod1_w < cnst16);
+ diff1_r = (mod2_w < cnst16);
+ diff1_l = (mod3_w < cnst16);
+
+ SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
+ mod1_w, mod2_w, mod3_w);
+
+ mod0_w = diff0_r & mod0_w;
+ mod1_w = diff0_l & mod1_w;
+ mod2_w = diff1_r & mod2_w;
+ mod3_w = diff1_l & mod3_w;
+
+ MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
+ ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
+ ST_SH2(mod0_h, mod1_h, cnt, 8);
+ cnt += 16;
+
+ ILVRL_B2_UH(zero, frm2, frm2_r, frm2_l);
+ UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
+ UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
+ MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
+ mod2_w, mod3_w);
+
+ ST_SW2(mod0_w, mod1_w, acc, 4);
+ acc += 8;
+ ST_SW2(mod2_w, mod3_w, acc, 4);
+ acc += 8;
+
+ LD_SW2(acc, 4, acc0, acc1);
+ LD_SW2(acc + 8, 4, acc2, acc3);
+ LD_SH2(cnt, 8, cnt0, cnt1);
+
+ ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
+ HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
+ UNPCK_SH_SW(diff0, diff0_r, diff0_l);
+ UNPCK_SH_SW(diff1, diff1_r, diff1_l);
+ MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
+ mod1_w, mod2_w, mod3_w);
+ SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
+
+ diff0_r = (mod0_w < cnst16);
+ diff0_l = (mod1_w < cnst16);
+ diff1_r = (mod2_w < cnst16);
+ diff1_l = (mod3_w < cnst16);
+
+ SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
+ mod1_w, mod2_w, mod3_w);
+
+ mod0_w = diff0_r & mod0_w;
+ mod1_w = diff0_l & mod1_w;
+ mod2_w = diff1_r & mod2_w;
+ mod3_w = diff1_l & mod3_w;
+
+ MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
+ ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
+ ST_SH2(mod0_h, mod1_h, cnt, 8);
+ cnt += 16;
+
+ ILVRL_B2_UH(zero, frm4, frm2_r, frm2_l);
+ UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
+ UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
+ MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
+ mod2_w, mod3_w);
+ ST_SW2(mod0_w, mod1_w, acc, 4);
+ acc += 8;
+ ST_SW2(mod2_w, mod3_w, acc, 4);
+ acc += 8;
+
+ frm1_ptr += stride;
+ frm2_ptr += 16;
+ }
+}
+
+void vp10_temporal_filter_apply_msa(uint8_t *frame1_ptr, uint32_t stride,
+ uint8_t *frame2_ptr, uint32_t blk_w,
+ uint32_t blk_h, int32_t strength,
+ int32_t filt_wgt, uint32_t *accu,
+ uint16_t *cnt) {
+ if (8 == (blk_w * blk_h)) {
+ temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr, strength,
+ filt_wgt, accu, cnt);
+ } else if (16 == (blk_w * blk_h)) {
+ temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr, strength,
+ filt_wgt, accu, cnt);
+ } else {
+ vp10_temporal_filter_apply_c(frame1_ptr, stride, frame2_ptr, blk_w, blk_h,
+ strength, filt_wgt, accu, cnt);
+ }
+}
diff --git a/av1/encoder/palette.c b/av1/encoder/palette.c
new file mode 100644
index 0000000..74f91b7
--- /dev/null
+++ b/av1/encoder/palette.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include "av1/encoder/palette.h"
+
+static float calc_dist(const float *p1, const float *p2, int dim) {
+ float dist = 0;
+ int i;
+ for (i = 0; i < dim; ++i) {
+ const float diff = p1[i] - p2[i];
+ dist += diff * diff;
+ }
+ return dist;
+}
+
+void vp10_calc_indices(const float *data, const float *centroids,
+ uint8_t *indices, int n, int k, int dim) {
+ int i, j;
+ for (i = 0; i < n; ++i) {
+ float min_dist = calc_dist(data + i * dim, centroids, dim);
+ indices[i] = 0;
+ for (j = 1; j < k; ++j) {
+ const float this_dist =
+ calc_dist(data + i * dim, centroids + j * dim, dim);
+ if (this_dist < min_dist) {
+ min_dist = this_dist;
+ indices[i] = j;
+ }
+ }
+ }
+}
+
+// Generate a random number in the range [0, 32768).
+static unsigned int lcg_rand16(unsigned int *state) {
+ *state = *state * 1103515245 + 12345;
+ return *state / 65536 % 32768;
+}
+
+static void calc_centroids(const float *data, float *centroids,
+ const uint8_t *indices, int n, int k, int dim) {
+ int i, j, index;
+ int count[PALETTE_MAX_SIZE];
+ unsigned int rand_state = (unsigned int)data[0];
+
+ assert(n <= 32768);
+
+ memset(count, 0, sizeof(count[0]) * k);
+ memset(centroids, 0, sizeof(centroids[0]) * k * dim);
+
+ for (i = 0; i < n; ++i) {
+ index = indices[i];
+ assert(index < k);
+ ++count[index];
+ for (j = 0; j < dim; ++j) {
+ centroids[index * dim + j] += data[i * dim + j];
+ }
+ }
+
+ for (i = 0; i < k; ++i) {
+ if (count[i] == 0) {
+ memcpy(centroids + i * dim, data + (lcg_rand16(&rand_state) % n) * dim,
+ sizeof(centroids[0]) * dim);
+ } else {
+ const float norm = 1.0f / count[i];
+ for (j = 0; j < dim; ++j) centroids[i * dim + j] *= norm;
+ }
+ }
+
+ // Round to nearest integers.
+ for (i = 0; i < k * dim; ++i) {
+ centroids[i] = roundf(centroids[i]);
+ }
+}
+
+static float calc_total_dist(const float *data, const float *centroids,
+ const uint8_t *indices, int n, int k, int dim) {
+ float dist = 0;
+ int i;
+ (void)k;
+
+ for (i = 0; i < n; ++i)
+ dist += calc_dist(data + i * dim, centroids + indices[i] * dim, dim);
+
+ return dist;
+}
+
+void vp10_k_means(const float *data, float *centroids, uint8_t *indices, int n,
+ int k, int dim, int max_itr) {
+ int i;
+ float this_dist;
+ float pre_centroids[2 * PALETTE_MAX_SIZE];
+ uint8_t pre_indices[MAX_SB_SQUARE];
+
+ vp10_calc_indices(data, centroids, indices, n, k, dim);
+ this_dist = calc_total_dist(data, centroids, indices, n, k, dim);
+
+ for (i = 0; i < max_itr; ++i) {
+ const float pre_dist = this_dist;
+ memcpy(pre_centroids, centroids, sizeof(pre_centroids[0]) * k * dim);
+ memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n);
+
+ calc_centroids(data, centroids, indices, n, k, dim);
+ vp10_calc_indices(data, centroids, indices, n, k, dim);
+ this_dist = calc_total_dist(data, centroids, indices, n, k, dim);
+
+ if (this_dist > pre_dist) {
+ memcpy(centroids, pre_centroids, sizeof(pre_centroids[0]) * k * dim);
+ memcpy(indices, pre_indices, sizeof(pre_indices[0]) * n);
+ break;
+ }
+ if (!memcmp(centroids, pre_centroids, sizeof(pre_centroids[0]) * k * dim))
+ break;
+ }
+}
+
+static int float_comparer(const void *a, const void *b) {
+ const float fa = *(const float *)a;
+ const float fb = *(const float *)b;
+ return (fa > fb) - (fb < fa);
+}
+
+int vp10_remove_duplicates(float *centroids, int num_centroids) {
+ int num_unique; // number of unique centroids
+ int i;
+ qsort(centroids, num_centroids, sizeof(*centroids), float_comparer);
+ // Remove duplicates.
+ num_unique = 1;
+ for (i = 1; i < num_centroids; ++i) {
+ if (centroids[i] != centroids[i - 1]) { // found a new unique centroid
+ centroids[num_unique++] = centroids[i];
+ }
+ }
+ return num_unique;
+}
+
+int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols) {
+ int n = 0, r, c, i, val_count[256];
+ uint8_t val;
+ memset(val_count, 0, sizeof(val_count));
+
+ for (r = 0; r < rows; ++r) {
+ for (c = 0; c < cols; ++c) {
+ val = src[r * stride + c];
+ ++val_count[val];
+ }
+ }
+
+ for (i = 0; i < 256; ++i) {
+ if (val_count[i]) {
+ ++n;
+ }
+ }
+
+ return n;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+int vp10_count_colors_highbd(const uint8_t *src8, int stride, int rows,
+ int cols, int bit_depth) {
+ int n = 0, r, c, i;
+ uint16_t val;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ int val_count[1 << 12];
+
+ assert(bit_depth <= 12);
+ memset(val_count, 0, (1 << 12) * sizeof(val_count[0]));
+ for (r = 0; r < rows; ++r) {
+ for (c = 0; c < cols; ++c) {
+ val = src[r * stride + c];
+ ++val_count[val];
+ }
+ }
+
+ for (i = 0; i < (1 << bit_depth); ++i) {
+ if (val_count[i]) {
+ ++n;
+ }
+ }
+
+ return n;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/encoder/palette.h b/av1/encoder/palette.h
new file mode 100644
index 0000000..fbbb39c
--- /dev/null
+++ b/av1/encoder/palette.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_PALETTE_H_
+#define VP10_ENCODER_PALETTE_H_
+
+#include "av1/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_calc_indices(const float *data, const float *centroids,
+ uint8_t *indices, int n, int k, int dim);
+
+// Given 'data' of size 'n' and initial guess of 'centroids' of size 'k x dim',
+// runs up to 'max_itr' iterations of k-means algorithm to get updated
+// 'centroids' and the centroid 'indices' for elements in 'data'.
+// Note: the output centroids are rounded off to nearest integers.
+void vp10_k_means(const float *data, float *centroids, uint8_t *indices, int n,
+ int k, int dim, int max_itr);
+
+// Given a list of centroids, returns the unique number of centroids 'k', and
+// puts these unique centroids in first 'k' indices of 'centroids' array.
+// Ideally, the centroids should be rounded to integers before calling this
+// method.
+int vp10_remove_duplicates(float *centroids, int num_centroids);
+
+int vp10_count_colors(const uint8_t *src, int stride, int rows, int cols);
+#if CONFIG_VP9_HIGHBITDEPTH
+int vp10_count_colors_highbd(const uint8_t *src8, int stride, int rows,
+ int cols, int bit_depth);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* VP10_ENCODER_PALETTE_H_ */
diff --git a/av1/encoder/pickdering.c b/av1/encoder/pickdering.c
new file mode 100644
index 0000000..91e9b54
--- /dev/null
+++ b/av1/encoder/pickdering.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string.h>
+
+#include "./vpx_scale_rtcd.h"
+#include "av1/common/dering.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/reconinter.h"
+#include "av1/encoder/encoder.h"
+#include "aom/vpx_integer.h"
+
+static double compute_dist(int16_t *x, int xstride, int16_t *y, int ystride,
+ int nhb, int nvb, int coeff_shift) {
+ int i, j;
+ double sum;
+ sum = 0;
+ for (i = 0; i < nvb << 3; i++) {
+ for (j = 0; j < nhb << 3; j++) {
+ double tmp;
+ tmp = x[i * xstride + j] - y[i * ystride + j];
+ sum += tmp * tmp;
+ }
+ }
+ return sum / (double)(1 << 2 * coeff_shift);
+}
+
+int vp10_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
+ VP10_COMMON *cm, MACROBLOCKD *xd) {
+ int r, c;
+ int sbr, sbc;
+ int nhsb, nvsb;
+ od_dering_in *src;
+ int16_t *ref_coeff;
+ unsigned char *bskip;
+ int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
+ int stride;
+ int bsize[3];
+ int dec[3];
+ int pli;
+ int(*mse)[MAX_DERING_LEVEL];
+ int best_count[MAX_DERING_LEVEL] = { 0 };
+ double tot_mse[MAX_DERING_LEVEL] = { 0 };
+ int level;
+ int best_level;
+ int global_level;
+ double best_tot_mse = 1e15;
+ int coeff_shift = VPXMAX(cm->bit_depth - 8, 0);
+ src = vpx_malloc(sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
+ ref_coeff = vpx_malloc(sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * 64);
+ bskip = vpx_malloc(sizeof(*bskip) * cm->mi_rows * cm->mi_cols);
+ vp10_setup_dst_planes(xd->plane, frame, 0, 0);
+ for (pli = 0; pli < 3; pli++) {
+ dec[pli] = xd->plane[pli].subsampling_x;
+ bsize[pli] = 8 >> dec[pli];
+ }
+ stride = bsize[0] * cm->mi_cols;
+ for (r = 0; r < bsize[0] * cm->mi_rows; ++r) {
+ for (c = 0; c < bsize[0] * cm->mi_cols; ++c) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ src[r * stride + c] = CONVERT_TO_SHORTPTR(
+ xd->plane[0].dst.buf)[r * xd->plane[0].dst.stride + c];
+ ref_coeff[r * stride + c] =
+ CONVERT_TO_SHORTPTR(ref->y_buffer)[r * ref->y_stride + c];
+ } else {
+#endif
+ src[r * stride + c] =
+ xd->plane[0].dst.buf[r * xd->plane[0].dst.stride + c];
+ ref_coeff[r * stride + c] = ref->y_buffer[r * ref->y_stride + c];
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif
+ }
+ }
+ for (r = 0; r < cm->mi_rows; ++r) {
+ for (c = 0; c < cm->mi_cols; ++c) {
+ const MB_MODE_INFO *mbmi =
+ &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi;
+ bskip[r * cm->mi_cols + c] = mbmi->skip;
+ }
+ }
+ nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
+ nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
+ mse = vpx_malloc(nvsb * nhsb * sizeof(*mse));
+ for (sbr = 0; sbr < nvsb; sbr++) {
+ for (sbc = 0; sbc < nhsb; sbc++) {
+ int best_mse = 1000000000;
+ int nvb, nhb;
+ int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
+ best_level = 0;
+ nhb = VPXMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
+ nvb = VPXMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
+ for (level = 0; level < 64; level++) {
+ int threshold;
+ threshold = level << coeff_shift;
+ od_dering(
+ &OD_DERING_VTBL_C, dst, MAX_MIB_SIZE * bsize[0],
+ &src[sbr * stride * bsize[0] * MAX_MIB_SIZE +
+ sbc * bsize[0] * MAX_MIB_SIZE],
+ cm->mi_cols * bsize[0], nhb, nvb, sbc, sbr, nhsb, nvsb, 0, dir, 0,
+ &bskip[MAX_MIB_SIZE * sbr * cm->mi_cols + MAX_MIB_SIZE * sbc],
+ cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP, coeff_shift);
+ mse[nhsb * sbr + sbc][level] = (int)compute_dist(
+ dst, MAX_MIB_SIZE * bsize[0],
+ &ref_coeff[sbr * stride * bsize[0] * MAX_MIB_SIZE +
+ sbc * bsize[0] * MAX_MIB_SIZE],
+ stride, nhb, nvb, coeff_shift);
+ tot_mse[level] += mse[nhsb * sbr + sbc][level];
+ if (mse[nhsb * sbr + sbc][level] < best_mse) {
+ best_mse = mse[nhsb * sbr + sbc][level];
+ best_level = level;
+ }
+ }
+ best_count[best_level]++;
+ }
+ }
+#if DERING_REFINEMENT
+ best_level = 0;
+ /* Search for the best global level one value at a time. */
+ for (global_level = 2; global_level < MAX_DERING_LEVEL; global_level++) {
+ double tot_mse = 0;
+ for (sbr = 0; sbr < nvsb; sbr++) {
+ for (sbc = 0; sbc < nhsb; sbc++) {
+ int gi;
+ int best_mse = mse[nhsb * sbr + sbc][0];
+ for (gi = 1; gi < 4; gi++) {
+ level = compute_level_from_index(global_level, gi);
+ if (mse[nhsb * sbr + sbc][level] < best_mse) {
+ best_mse = mse[nhsb * sbr + sbc][level];
+ }
+ }
+ tot_mse += best_mse;
+ }
+ }
+ if (tot_mse < best_tot_mse) {
+ best_level = global_level;
+ best_tot_mse = tot_mse;
+ }
+ }
+ for (sbr = 0; sbr < nvsb; sbr++) {
+ for (sbc = 0; sbc < nhsb; sbc++) {
+ int gi;
+ int best_gi;
+ int best_mse = mse[nhsb * sbr + sbc][0];
+ best_gi = 0;
+ for (gi = 1; gi < DERING_REFINEMENT_LEVELS; gi++) {
+ level = compute_level_from_index(best_level, gi);
+ if (mse[nhsb * sbr + sbc][level] < best_mse) {
+ best_gi = gi;
+ best_mse = mse[nhsb * sbr + sbc][level];
+ }
+ }
+ cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
+ MAX_MIB_SIZE * sbc]
+ ->mbmi.dering_gain = best_gi;
+ }
+ }
+#else
+ best_level = 0;
+ for (level = 0; level < MAX_DERING_LEVEL; level++) {
+ if (tot_mse[level] < tot_mse[best_level]) best_level = level;
+ }
+#endif
+ vpx_free(src);
+ vpx_free(ref_coeff);
+ vpx_free(bskip);
+ vpx_free(mse);
+ return best_level;
+}
diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c
new file mode 100644
index 0000000..c4e9b7d
--- /dev/null
+++ b/av1/encoder/picklpf.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <limits.h>
+
+#include "./vpx_scale_rtcd.h"
+
+#include "aom_dsp/psnr.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+
+#include "av1/common/loopfilter.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/quant_common.h"
+
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/picklpf.h"
+#include "av1/encoder/quantize.h"
+
+int vp10_get_max_filter_level(const VP10_COMP *cpi) {
+ if (cpi->oxcf.pass == 2) {
+ return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
+ : MAX_LOOP_FILTER;
+ } else {
+ return MAX_LOOP_FILTER;
+ }
+}
+
+static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
+ VP10_COMP *const cpi, int filt_level,
+ int partial_frame) {
+ VP10_COMMON *const cm = &cpi->common;
+ int64_t filt_err;
+
+#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION
+ vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
+ 1, partial_frame);
+#else
+ if (cpi->num_workers > 1)
+ vp10_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane,
+ filt_level, 1, partial_frame, cpi->workers,
+ cpi->num_workers, &cpi->lf_row_sync);
+ else
+ vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
+ 1, partial_frame);
+#endif
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ filt_err = vpx_highbd_get_y_sse(sd, cm->frame_to_show);
+ } else {
+ filt_err = vpx_get_y_sse(sd, cm->frame_to_show);
+ }
+#else
+ filt_err = vpx_get_y_sse(sd, cm->frame_to_show);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // Re-instate the unfiltered frame
+ vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+
+ return filt_err;
+}
+
+int vp10_search_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
+ int partial_frame, double *best_cost_ret) {
+ const VP10_COMMON *const cm = &cpi->common;
+ const struct loopfilter *const lf = &cm->lf;
+ const int min_filter_level = 0;
+ const int max_filter_level = vp10_get_max_filter_level(cpi);
+ int filt_direction = 0;
+ int64_t best_err;
+ int filt_best;
+ MACROBLOCK *x = &cpi->td.mb;
+
+ // Start the search at the previous frame filter level unless it is now out of
+ // range.
+ int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
+ int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
+ // Sum squared error at each filter level
+ int64_t ss_err[MAX_LOOP_FILTER + 1];
+
+ // Set each entry to -1
+ memset(ss_err, 0xFF, sizeof(ss_err));
+
+ // Make a copy of the unfiltered / processed recon buffer
+ vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
+
+ best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame);
+ filt_best = filt_mid;
+ ss_err[filt_mid] = best_err;
+
+ while (filter_step > 0) {
+ const int filt_high = VPXMIN(filt_mid + filter_step, max_filter_level);
+ const int filt_low = VPXMAX(filt_mid - filter_step, min_filter_level);
+
+ // Bias against raising loop filter in favor of lowering it.
+ int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
+
+ if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20))
+ bias = (bias * cpi->twopass.section_intra_rating) / 20;
+
+ // yx, bias less for large block size
+ if (cm->tx_mode != ONLY_4X4) bias >>= 1;
+
+ if (filt_direction <= 0 && filt_low != filt_mid) {
+ // Get Low filter error score
+ if (ss_err[filt_low] < 0) {
+ ss_err[filt_low] = try_filter_frame(sd, cpi, filt_low, partial_frame);
+ }
+ // If value is close to the best so far then bias towards a lower loop
+ // filter value.
+ if (ss_err[filt_low] < (best_err + bias)) {
+ // Was it actually better than the previous best?
+ if (ss_err[filt_low] < best_err) {
+ best_err = ss_err[filt_low];
+ }
+ filt_best = filt_low;
+ }
+ }
+
+ // Now look at filt_high
+ if (filt_direction >= 0 && filt_high != filt_mid) {
+ if (ss_err[filt_high] < 0) {
+ ss_err[filt_high] = try_filter_frame(sd, cpi, filt_high, partial_frame);
+ }
+ // If value is significantly better than previous best, bias added against
+ // raising filter value
+ if (ss_err[filt_high] < (best_err - bias)) {
+ best_err = ss_err[filt_high];
+ filt_best = filt_high;
+ }
+ }
+
+ // Half the step distance if the best filter value was the same as last time
+ if (filt_best == filt_mid) {
+ filter_step /= 2;
+ filt_direction = 0;
+ } else {
+ filt_direction = (filt_best < filt_mid) ? -1 : 1;
+ filt_mid = filt_best;
+ }
+ }
+
+ // Update best error
+ best_err = ss_err[filt_best];
+
+ if (best_cost_ret)
+ *best_cost_ret = RDCOST_DBL(x->rdmult, x->rddiv, 0, best_err);
+ return filt_best;
+}
+
+#if !CONFIG_LOOP_RESTORATION
+void vp10_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
+ LPF_PICK_METHOD method) {
+ VP10_COMMON *const cm = &cpi->common;
+ struct loopfilter *const lf = &cm->lf;
+
+ lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness;
+
+ if (method == LPF_PICK_MINIMAL_LPF && lf->filter_level) {
+ lf->filter_level = 0;
+ } else if (method >= LPF_PICK_FROM_Q) {
+ const int min_filter_level = 0;
+ const int max_filter_level = vp10_get_max_filter_level(cpi);
+ const int q = vp10_ac_quant(cm->base_qindex, 0, cm->bit_depth);
+// These values were determined by linear fitting the result of the
+// searched level, filt_guess = q * 0.316206 + 3.87252
+#if CONFIG_VP9_HIGHBITDEPTH
+ int filt_guess;
+ switch (cm->bit_depth) {
+ case VPX_BITS_8:
+ filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
+ break;
+ case VPX_BITS_10:
+ filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20);
+ break;
+ case VPX_BITS_12:
+ filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22);
+ break;
+ default:
+ assert(0 &&
+ "bit_depth should be VPX_BITS_8, VPX_BITS_10 "
+ "or VPX_BITS_12");
+ return;
+ }
+#else
+ int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ if (cm->frame_type == KEY_FRAME) filt_guess -= 4;
+ lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
+ } else {
+ lf->filter_level = vp10_search_filter_level(
+ sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL);
+ }
+
+#if CONFIG_EXT_TILE
+ // TODO(any): 0 loopfilter level is only necessary if individual tile
+ // decoding is required. We need to communicate this requirement to this
+ // code and force loop filter level 0 only if required.
+ lf->filter_level = 0;
+#endif // CONFIG_EXT_TILE
+}
+#endif // !CONFIG_LOOP_RESTORATION
diff --git a/av1/encoder/picklpf.h b/av1/encoder/picklpf.h
new file mode 100644
index 0000000..cd8afc6
--- /dev/null
+++ b/av1/encoder/picklpf.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_PICKLPF_H_
+#define VP10_ENCODER_PICKLPF_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "av1/encoder/encoder.h"
+
+struct yv12_buffer_config;
+struct VP10_COMP;
+int vp10_get_max_filter_level(const VP10_COMP *cpi);
+int vp10_search_filter_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
+ int partial_frame, double *err);
+void vp10_pick_filter_level(const struct yv12_buffer_config *sd,
+ struct VP10_COMP *cpi, LPF_PICK_METHOD method);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_PICKLPF_H_
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
new file mode 100644
index 0000000..b6ee6f0
--- /dev/null
+++ b/av1/encoder/pickrst.c
@@ -0,0 +1,808 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <float.h>
+#include <limits.h>
+#include <math.h>
+
+#include "./vpx_scale_rtcd.h"
+
+#include "aom_dsp/psnr.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+
+#include "av1/common/onyxc_int.h"
+#include "av1/common/quant_common.h"
+
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/picklpf.h"
+#include "av1/encoder/pickrst.h"
+#include "av1/encoder/quantize.h"
+
+static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *sd,
+ VP10_COMP *const cpi, RestorationInfo *rsi,
+ int partial_frame) {
+ VP10_COMMON *const cm = &cpi->common;
+ int64_t filt_err;
+ vp10_loop_restoration_frame(cm->frame_to_show, cm, rsi, 1, partial_frame);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth) {
+ filt_err = vpx_highbd_get_y_sse(sd, cm->frame_to_show);
+ } else {
+ filt_err = vpx_get_y_sse(sd, cm->frame_to_show);
+ }
+#else
+ filt_err = vpx_get_y_sse(sd, cm->frame_to_show);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // Re-instate the unfiltered frame
+ vpx_yv12_copy_y(&cpi->last_frame_db, cm->frame_to_show);
+ return filt_err;
+}
+
+static int search_bilateral_level(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
+ int filter_level, int partial_frame,
+ int *bilateral_level, double *best_cost_ret) {
+ VP10_COMMON *const cm = &cpi->common;
+ int i, j, tile_idx;
+ int64_t err;
+ int bits;
+ double cost, best_cost, cost_norestore, cost_bilateral;
+ const int bilateral_level_bits = vp10_bilateral_level_bits(&cpi->common);
+ const int bilateral_levels = 1 << bilateral_level_bits;
+ MACROBLOCK *x = &cpi->td.mb;
+ RestorationInfo rsi;
+ const int ntiles =
+ vp10_get_restoration_ntiles(BILATERAL_TILESIZE, cm->width, cm->height);
+
+ // Make a copy of the unfiltered / processed recon buffer
+ vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
+ vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filter_level,
+ 1, partial_frame);
+ vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_db);
+
+ // RD cost associated with no restoration
+ rsi.restoration_type = RESTORE_NONE;
+ err = try_restoration_frame(sd, cpi, &rsi, partial_frame);
+ bits = 0;
+ cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv,
+ (bits << (VP10_PROB_COST_SHIFT - 4)), err);
+ best_cost = cost_norestore;
+
+ // RD cost associated with bilateral filtering
+ rsi.restoration_type = RESTORE_BILATERAL;
+ rsi.bilateral_level =
+ (int *)vpx_malloc(sizeof(*rsi.bilateral_level) * ntiles);
+ assert(rsi.bilateral_level != NULL);
+
+ for (j = 0; j < ntiles; ++j) bilateral_level[j] = -1;
+
+ // Find best filter for each tile
+ for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
+ for (j = 0; j < ntiles; ++j) rsi.bilateral_level[j] = -1;
+ best_cost = cost_norestore;
+ for (i = 0; i < bilateral_levels; ++i) {
+ rsi.bilateral_level[tile_idx] = i;
+ err = try_restoration_frame(sd, cpi, &rsi, partial_frame);
+ bits = bilateral_level_bits + 1;
+ // Normally the rate is rate in bits * 256 and dist is sum sq err * 64
+ // when RDCOST is used. However below we just scale both in the correct
+ // ratios appropriately but not exactly by these values.
+ cost = RDCOST_DBL(x->rdmult, x->rddiv,
+ (bits << (VP10_PROB_COST_SHIFT - 4)), err);
+ if (cost < best_cost) {
+ bilateral_level[tile_idx] = i;
+ best_cost = cost;
+ }
+ }
+ }
+ // Find cost for combined configuration
+ bits = 0;
+ for (j = 0; j < ntiles; ++j) {
+ rsi.bilateral_level[j] = bilateral_level[j];
+ if (rsi.bilateral_level[j] >= 0) {
+ bits += (bilateral_level_bits + 1);
+ } else {
+ bits += 1;
+ }
+ }
+ err = try_restoration_frame(sd, cpi, &rsi, partial_frame);
+ cost_bilateral = RDCOST_DBL(x->rdmult, x->rddiv,
+ (bits << (VP10_PROB_COST_SHIFT - 4)), err);
+
+ vpx_free(rsi.bilateral_level);
+
+ vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+ if (cost_bilateral < cost_norestore) {
+ if (best_cost_ret) *best_cost_ret = cost_bilateral;
+ return 1;
+ } else {
+ if (best_cost_ret) *best_cost_ret = cost_norestore;
+ return 0;
+ }
+}
+
+static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
+ VP10_COMP *cpi, int partial_frame,
+ int *filter_best, int *bilateral_level,
+ double *best_cost_ret) {
+ const VP10_COMMON *const cm = &cpi->common;
+ const struct loopfilter *const lf = &cm->lf;
+ const int min_filter_level = 0;
+ const int max_filter_level = vp10_get_max_filter_level(cpi);
+ int filt_direction = 0;
+ int filt_best;
+ double best_err;
+ int i, j;
+ int *tmp_level;
+ int bilateral_success[MAX_LOOP_FILTER + 1];
+
+ const int ntiles =
+ vp10_get_restoration_ntiles(BILATERAL_TILESIZE, cm->width, cm->height);
+
+ // Start the search at the previous frame filter level unless it is now out of
+ // range.
+ int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
+ int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
+ double ss_err[MAX_LOOP_FILTER + 1];
+ // Set each entry to -1
+ for (i = 0; i <= MAX_LOOP_FILTER; ++i) ss_err[i] = -1.0;
+
+ tmp_level = (int *)vpx_malloc(sizeof(*tmp_level) * ntiles);
+
+ bilateral_success[filt_mid] = search_bilateral_level(
+ sd, cpi, filt_mid, partial_frame, tmp_level, &best_err);
+ filt_best = filt_mid;
+ ss_err[filt_mid] = best_err;
+ for (j = 0; j < ntiles; ++j) {
+ bilateral_level[j] = tmp_level[j];
+ }
+
+ while (filter_step > 0) {
+ const int filt_high = VPXMIN(filt_mid + filter_step, max_filter_level);
+ const int filt_low = VPXMAX(filt_mid - filter_step, min_filter_level);
+
+ // Bias against raising loop filter in favor of lowering it.
+ double bias = (best_err / (1 << (15 - (filt_mid / 8)))) * filter_step;
+
+ if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20))
+ bias = (bias * cpi->twopass.section_intra_rating) / 20;
+
+ // yx, bias less for large block size
+ if (cm->tx_mode != ONLY_4X4) bias /= 2;
+
+ if (filt_direction <= 0 && filt_low != filt_mid) {
+ // Get Low filter error score
+ if (ss_err[filt_low] < 0) {
+ bilateral_success[filt_low] = search_bilateral_level(
+ sd, cpi, filt_low, partial_frame, tmp_level, &ss_err[filt_low]);
+ }
+ // If value is close to the best so far then bias towards a lower loop
+ // filter value.
+ if (ss_err[filt_low] < (best_err + bias)) {
+ // Was it actually better than the previous best?
+ if (ss_err[filt_low] < best_err) {
+ best_err = ss_err[filt_low];
+ }
+ filt_best = filt_low;
+ for (j = 0; j < ntiles; ++j) {
+ bilateral_level[j] = tmp_level[j];
+ }
+ }
+ }
+
+ // Now look at filt_high
+ if (filt_direction >= 0 && filt_high != filt_mid) {
+ if (ss_err[filt_high] < 0) {
+ bilateral_success[filt_high] = search_bilateral_level(
+ sd, cpi, filt_high, partial_frame, tmp_level, &ss_err[filt_high]);
+ }
+ // If value is significantly better than previous best, bias added against
+ // raising filter value
+ if (ss_err[filt_high] < (best_err - bias)) {
+ best_err = ss_err[filt_high];
+ filt_best = filt_high;
+ for (j = 0; j < ntiles; ++j) {
+ bilateral_level[j] = tmp_level[j];
+ }
+ }
+ }
+
+ // Half the step distance if the best filter value was the same as last time
+ if (filt_best == filt_mid) {
+ filter_step /= 2;
+ filt_direction = 0;
+ } else {
+ filt_direction = (filt_best < filt_mid) ? -1 : 1;
+ filt_mid = filt_best;
+ }
+ }
+
+ vpx_free(tmp_level);
+
+ // Update best error
+ best_err = ss_err[filt_best];
+
+ if (best_cost_ret) *best_cost_ret = best_err;
+ if (filter_best) *filter_best = filt_best;
+
+ return bilateral_success[filt_best];
+}
+
+static double find_average(uint8_t *src, int h_start, int h_end, int v_start,
+ int v_end, int stride) {
+ uint64_t sum = 0;
+ double avg = 0;
+ int i, j;
+ for (i = v_start; i < v_end; i++)
+ for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
+ avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
+ return avg;
+}
+
+static void compute_stats(uint8_t *dgd, uint8_t *src, int h_start, int h_end,
+ int v_start, int v_end, int dgd_stride,
+ int src_stride, double *M, double *H) {
+ int i, j, k, l;
+ double Y[RESTORATION_WIN2];
+ const double avg =
+ find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride);
+
+ memset(M, 0, sizeof(*M) * RESTORATION_WIN2);
+ memset(H, 0, sizeof(*H) * RESTORATION_WIN2 * RESTORATION_WIN2);
+ for (i = v_start; i < v_end; i++) {
+ for (j = h_start; j < h_end; j++) {
+ const double X = (double)src[i * src_stride + j] - avg;
+ int idx = 0;
+ for (k = -RESTORATION_HALFWIN; k <= RESTORATION_HALFWIN; k++) {
+ for (l = -RESTORATION_HALFWIN; l <= RESTORATION_HALFWIN; l++) {
+ Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
+ idx++;
+ }
+ }
+ for (k = 0; k < RESTORATION_WIN2; ++k) {
+ M[k] += Y[k] * X;
+ H[k * RESTORATION_WIN2 + k] += Y[k] * Y[k];
+ for (l = k + 1; l < RESTORATION_WIN2; ++l) {
+ double value = Y[k] * Y[l];
+ H[k * RESTORATION_WIN2 + l] += value;
+ H[l * RESTORATION_WIN2 + k] += value;
+ }
+ }
+ }
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static double find_average_highbd(uint16_t *src, int h_start, int h_end,
+ int v_start, int v_end, int stride) {
+ uint64_t sum = 0;
+ double avg = 0;
+ int i, j;
+ for (i = v_start; i < v_end; i++)
+ for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
+ avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
+ return avg;
+}
+
+static void compute_stats_highbd(uint8_t *dgd8, uint8_t *src8, int h_start,
+ int h_end, int v_start, int v_end,
+ int dgd_stride, int src_stride, double *M,
+ double *H) {
+ int i, j, k, l;
+ double Y[RESTORATION_WIN2];
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
+ const double avg =
+ find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride);
+
+ memset(M, 0, sizeof(*M) * RESTORATION_WIN2);
+ memset(H, 0, sizeof(*H) * RESTORATION_WIN2 * RESTORATION_WIN2);
+ for (i = v_start; i < v_end; i++) {
+ for (j = h_start; j < h_end; j++) {
+ const double X = (double)src[i * src_stride + j] - avg;
+ int idx = 0;
+ for (k = -RESTORATION_HALFWIN; k <= RESTORATION_HALFWIN; k++) {
+ for (l = -RESTORATION_HALFWIN; l <= RESTORATION_HALFWIN; l++) {
+ Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg;
+ idx++;
+ }
+ }
+ for (k = 0; k < RESTORATION_WIN2; ++k) {
+ M[k] += Y[k] * X;
+ H[k * RESTORATION_WIN2 + k] += Y[k] * Y[k];
+ for (l = k + 1; l < RESTORATION_WIN2; ++l) {
+ double value = Y[k] * Y[l];
+ H[k * RESTORATION_WIN2 + l] += value;
+ H[l * RESTORATION_WIN2 + k] += value;
+ }
+ }
+ }
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+// Solves Ax = b, where x and b are column vectors
+static int linsolve(int n, double *A, int stride, double *b, double *x) {
+ int i, j, k;
+ double c;
+ // Partial pivoting
+ for (i = n - 1; i > 0; i--) {
+ if (A[(i - 1) * stride] < A[i * stride]) {
+ for (j = 0; j < n; j++) {
+ c = A[i * stride + j];
+ A[i * stride + j] = A[(i - 1) * stride + j];
+ A[(i - 1) * stride + j] = c;
+ }
+ c = b[i];
+ b[i] = b[i - 1];
+ b[i - 1] = c;
+ }
+ }
+ // Forward elimination
+ for (k = 0; k < n - 1; k++) {
+ for (i = k; i < n - 1; i++) {
+ c = A[(i + 1) * stride + k] / A[k * stride + k];
+ for (j = 0; j < n; j++) A[(i + 1) * stride + j] -= c * A[k * stride + j];
+ b[i + 1] -= c * b[k];
+ }
+ }
+ // Backward substitution
+ for (i = n - 1; i >= 0; i--) {
+ if (fabs(A[i * stride + i]) < 1e-10) return 0;
+ c = 0;
+ for (j = i + 1; j <= n - 1; j++) c += A[i * stride + j] * x[j];
+ x[i] = (b[i] - c) / A[i * stride + i];
+ }
+ return 1;
+}
+
+static INLINE int wrap_index(int i) {
+ return (i >= RESTORATION_HALFWIN1 ? RESTORATION_WIN - 1 - i : i);
+}
+
+// Fix vector b, update vector a
+static void update_a_sep_sym(double **Mc, double **Hc, double *a, double *b) {
+ int i, j;
+ double S[RESTORATION_WIN];
+ double A[RESTORATION_WIN], B[RESTORATION_WIN2];
+ int w, w2;
+ memset(A, 0, sizeof(A));
+ memset(B, 0, sizeof(B));
+ for (i = 0; i < RESTORATION_WIN; i++) {
+ int j;
+ for (j = 0; j < RESTORATION_WIN; ++j) {
+ const int jj = wrap_index(j);
+ A[jj] += Mc[i][j] * b[i];
+ }
+ }
+ for (i = 0; i < RESTORATION_WIN; i++) {
+ for (j = 0; j < RESTORATION_WIN; j++) {
+ int k, l;
+ for (k = 0; k < RESTORATION_WIN; ++k)
+ for (l = 0; l < RESTORATION_WIN; ++l) {
+ const int kk = wrap_index(k);
+ const int ll = wrap_index(l);
+ B[ll * RESTORATION_HALFWIN1 + kk] +=
+ Hc[j * RESTORATION_WIN + i][k * RESTORATION_WIN2 + l] * b[i] *
+ b[j];
+ }
+ }
+ }
+ // Normalization enforcement in the system of equations itself
+ w = RESTORATION_WIN;
+ w2 = (w >> 1) + 1;
+ for (i = 0; i < w2 - 1; ++i)
+ A[i] -=
+ A[w2 - 1] * 2 + B[i * w2 + w2 - 1] - 2 * B[(w2 - 1) * w2 + (w2 - 1)];
+ for (i = 0; i < w2 - 1; ++i)
+ for (j = 0; j < w2 - 1; ++j)
+ B[i * w2 + j] -= 2 * (B[i * w2 + (w2 - 1)] + B[(w2 - 1) * w2 + j] -
+ 2 * B[(w2 - 1) * w2 + (w2 - 1)]);
+ if (linsolve(w2 - 1, B, w2, A, S)) {
+ S[w2 - 1] = 1.0;
+ for (i = w2; i < w; ++i) {
+ S[i] = S[w - 1 - i];
+ S[w2 - 1] -= 2 * S[i];
+ }
+ memcpy(a, S, w * sizeof(*a));
+ }
+}
+
+// Fix vector a, update vector b
+static void update_b_sep_sym(double **Mc, double **Hc, double *a, double *b) {
+ int i, j;
+ double S[RESTORATION_WIN];
+ double A[RESTORATION_WIN], B[RESTORATION_WIN2];
+ int w, w2;
+ memset(A, 0, sizeof(A));
+ memset(B, 0, sizeof(B));
+ for (i = 0; i < RESTORATION_WIN; i++) {
+ int j;
+ const int ii = wrap_index(i);
+ for (j = 0; j < RESTORATION_WIN; j++) A[ii] += Mc[i][j] * a[j];
+ }
+
+ for (i = 0; i < RESTORATION_WIN; i++) {
+ for (j = 0; j < RESTORATION_WIN; j++) {
+ const int ii = wrap_index(i);
+ const int jj = wrap_index(j);
+ int k, l;
+ for (k = 0; k < RESTORATION_WIN; ++k)
+ for (l = 0; l < RESTORATION_WIN; ++l)
+ B[jj * RESTORATION_HALFWIN1 + ii] +=
+ Hc[i * RESTORATION_WIN + j][k * RESTORATION_WIN2 + l] * a[k] *
+ a[l];
+ }
+ }
+ // Normalization enforcement in the system of equations itself
+ w = RESTORATION_WIN;
+ w2 = RESTORATION_HALFWIN1;
+ for (i = 0; i < w2 - 1; ++i)
+ A[i] -=
+ A[w2 - 1] * 2 + B[i * w2 + w2 - 1] - 2 * B[(w2 - 1) * w2 + (w2 - 1)];
+ for (i = 0; i < w2 - 1; ++i)
+ for (j = 0; j < w2 - 1; ++j)
+ B[i * w2 + j] -= 2 * (B[i * w2 + (w2 - 1)] + B[(w2 - 1) * w2 + j] -
+ 2 * B[(w2 - 1) * w2 + (w2 - 1)]);
+ if (linsolve(w2 - 1, B, w2, A, S)) {
+ S[w2 - 1] = 1.0;
+ for (i = w2; i < w; ++i) {
+ S[i] = S[w - 1 - i];
+ S[w2 - 1] -= 2 * S[i];
+ }
+ memcpy(b, S, w * sizeof(*b));
+ }
+}
+
+static int wiener_decompose_sep_sym(double *M, double *H, double *a,
+ double *b) {
+ static const double init_filt[RESTORATION_WIN] = {
+ 0.035623, -0.127154, 0.211436, 0.760190, 0.211436, -0.127154, 0.035623,
+ };
+ int i, j, iter;
+ double *Hc[RESTORATION_WIN2];
+ double *Mc[RESTORATION_WIN];
+ for (i = 0; i < RESTORATION_WIN; i++) {
+ Mc[i] = M + i * RESTORATION_WIN;
+ for (j = 0; j < RESTORATION_WIN; j++) {
+ Hc[i * RESTORATION_WIN + j] =
+ H + i * RESTORATION_WIN * RESTORATION_WIN2 + j * RESTORATION_WIN;
+ }
+ }
+ memcpy(a, init_filt, sizeof(*a) * RESTORATION_WIN);
+ memcpy(b, init_filt, sizeof(*b) * RESTORATION_WIN);
+
+ iter = 1;
+ while (iter < 10) {
+ update_a_sep_sym(Mc, Hc, a, b);
+ update_b_sep_sym(Mc, Hc, a, b);
+ iter++;
+ }
+ return 1;
+}
+
+// Computes the function x'*A*x - x'*b for the learned filters, and compares
+// against identity filters; Final score is defined as the difference between
+// the function values
+static double compute_score(double *M, double *H, int *vfilt, int *hfilt) {
+ double ab[RESTORATION_WIN * RESTORATION_WIN];
+ int i, k, l;
+ double P = 0, Q = 0;
+ double iP = 0, iQ = 0;
+ double Score, iScore;
+ int w;
+ double a[RESTORATION_WIN], b[RESTORATION_WIN];
+ w = RESTORATION_WIN;
+ a[RESTORATION_HALFWIN] = b[RESTORATION_HALFWIN] = 1.0;
+ for (i = 0; i < RESTORATION_HALFWIN; ++i) {
+ a[i] = a[RESTORATION_WIN - i - 1] =
+ (double)vfilt[i] / RESTORATION_FILT_STEP;
+ b[i] = b[RESTORATION_WIN - i - 1] =
+ (double)hfilt[i] / RESTORATION_FILT_STEP;
+ a[RESTORATION_HALFWIN] -= 2 * a[i];
+ b[RESTORATION_HALFWIN] -= 2 * b[i];
+ }
+ for (k = 0; k < w; ++k) {
+ for (l = 0; l < w; ++l) {
+ ab[k * w + l] = a[l] * b[k];
+ }
+ }
+ for (k = 0; k < w * w; ++k) {
+ P += ab[k] * M[k];
+ for (l = 0; l < w * w; ++l) Q += ab[k] * H[k * w * w + l] * ab[l];
+ }
+ Score = Q - 2 * P;
+
+ iP = M[(w * w) >> 1];
+ iQ = H[((w * w) >> 1) * w * w + ((w * w) >> 1)];
+ iScore = iQ - 2 * iP;
+
+ return Score - iScore;
+}
+
+#define CLIP(x, lo, hi) ((x) < (lo) ? (lo) : (x) > (hi) ? (hi) : (x))
+#define RINT(x) ((x) < 0 ? (int)((x)-0.5) : (int)((x) + 0.5))
+
+static void quantize_sym_filter(double *f, int *fi) {
+ int i;
+ for (i = 0; i < RESTORATION_HALFWIN; ++i) {
+ fi[i] = RINT(f[i] * RESTORATION_FILT_STEP);
+ }
+ // Specialize for 7-tap filter
+ fi[0] = CLIP(fi[0], WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP0_MAXV);
+ fi[1] = CLIP(fi[1], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV);
+ fi[2] = CLIP(fi[2], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV);
+}
+
+static int search_wiener_filter(const YV12_BUFFER_CONFIG *src, VP10_COMP *cpi,
+ int filter_level, int partial_frame,
+ int (*vfilter)[RESTORATION_HALFWIN],
+ int (*hfilter)[RESTORATION_HALFWIN],
+ int *process_tile, double *best_cost_ret) {
+ VP10_COMMON *const cm = &cpi->common;
+ RestorationInfo rsi;
+ int64_t err;
+ int bits;
+ double cost_wiener, cost_norestore;
+ MACROBLOCK *x = &cpi->td.mb;
+ double M[RESTORATION_WIN2];
+ double H[RESTORATION_WIN2 * RESTORATION_WIN2];
+ double vfilterd[RESTORATION_WIN], hfilterd[RESTORATION_WIN];
+ const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show;
+ const int width = cm->width;
+ const int height = cm->height;
+ const int src_stride = src->y_stride;
+ const int dgd_stride = dgd->y_stride;
+ double score;
+ int tile_idx, htile_idx, vtile_idx, tile_width, tile_height, nhtiles, nvtiles;
+ int h_start, h_end, v_start, v_end;
+ int i, j;
+
+ const int tilesize = WIENER_TILESIZE;
+ const int ntiles = vp10_get_restoration_ntiles(tilesize, width, height);
+
+ assert(width == dgd->y_crop_width);
+ assert(height == dgd->y_crop_height);
+ assert(width == src->y_crop_width);
+ assert(height == src->y_crop_height);
+
+ vp10_get_restoration_tile_size(tilesize, width, height, &tile_width,
+ &tile_height, &nhtiles, &nvtiles);
+
+ // Make a copy of the unfiltered / processed recon buffer
+ vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
+ vp10_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filter_level,
+ 1, partial_frame);
+ vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_db);
+
+ rsi.restoration_type = RESTORE_NONE;
+ err = try_restoration_frame(src, cpi, &rsi, partial_frame);
+ bits = 0;
+ cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv,
+ (bits << (VP10_PROB_COST_SHIFT - 4)), err);
+
+ rsi.restoration_type = RESTORE_WIENER;
+ rsi.vfilter =
+ (int(*)[RESTORATION_HALFWIN])vpx_malloc(sizeof(*rsi.vfilter) * ntiles);
+ assert(rsi.vfilter != NULL);
+ rsi.hfilter =
+ (int(*)[RESTORATION_HALFWIN])vpx_malloc(sizeof(*rsi.hfilter) * ntiles);
+ assert(rsi.hfilter != NULL);
+ rsi.wiener_level = (int *)vpx_malloc(sizeof(*rsi.wiener_level) * ntiles);
+ assert(rsi.wiener_level != NULL);
+
+ // Compute best Wiener filters for each tile
+ for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
+ htile_idx = tile_idx % nhtiles;
+ vtile_idx = tile_idx / nhtiles;
+ h_start =
+ htile_idx * tile_width + ((htile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+ h_end = (htile_idx < nhtiles - 1) ? ((htile_idx + 1) * tile_width)
+ : (width - RESTORATION_HALFWIN);
+ v_start =
+ vtile_idx * tile_height + ((vtile_idx > 0) ? 0 : RESTORATION_HALFWIN);
+ v_end = (vtile_idx < nvtiles - 1) ? ((vtile_idx + 1) * tile_height)
+ : (height - RESTORATION_HALFWIN);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cm->use_highbitdepth)
+ compute_stats_highbd(dgd->y_buffer, src->y_buffer, h_start, h_end,
+ v_start, v_end, dgd_stride, src_stride, M, H);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ compute_stats(dgd->y_buffer, src->y_buffer, h_start, h_end, v_start,
+ v_end, dgd_stride, src_stride, M, H);
+
+ if (!wiener_decompose_sep_sym(M, H, vfilterd, hfilterd)) {
+ for (i = 0; i < RESTORATION_HALFWIN; ++i)
+ rsi.vfilter[tile_idx][i] = rsi.hfilter[tile_idx][i] = 0;
+ process_tile[tile_idx] = 0;
+ continue;
+ }
+ quantize_sym_filter(vfilterd, rsi.vfilter[tile_idx]);
+ quantize_sym_filter(hfilterd, rsi.hfilter[tile_idx]);
+ process_tile[tile_idx] = 1;
+
+ // Filter score computes the value of the function x'*A*x - x'*b for the
+ // learned filter and compares it against identity filer. If there is no
+ // reduction in the function, the filter is reverted back to identity
+ score = compute_score(M, H, rsi.vfilter[tile_idx], rsi.hfilter[tile_idx]);
+ if (score > 0.0) {
+ for (i = 0; i < RESTORATION_HALFWIN; ++i)
+ rsi.vfilter[tile_idx][i] = rsi.hfilter[tile_idx][i] = 0;
+ process_tile[tile_idx] = 0;
+ continue;
+ }
+
+ for (j = 0; j < ntiles; ++j) rsi.wiener_level[j] = 0;
+ rsi.wiener_level[tile_idx] = 1;
+
+ err = try_restoration_frame(src, cpi, &rsi, partial_frame);
+ bits = 1 + WIENER_FILT_BITS;
+ cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv,
+ (bits << (VP10_PROB_COST_SHIFT - 4)), err);
+ if (cost_wiener >= cost_norestore) process_tile[tile_idx] = 0;
+ }
+ // Cost for Wiener filtering
+ bits = 0;
+ for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
+ bits += (process_tile[tile_idx] ? (WIENER_FILT_BITS + 1) : 1);
+ rsi.wiener_level[tile_idx] = process_tile[tile_idx];
+ }
+ err = try_restoration_frame(src, cpi, &rsi, partial_frame);
+ cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv,
+ (bits << (VP10_PROB_COST_SHIFT - 4)), err);
+
+ for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
+ if (process_tile[tile_idx] == 0) continue;
+ for (i = 0; i < RESTORATION_HALFWIN; ++i) {
+ vfilter[tile_idx][i] = rsi.vfilter[tile_idx][i];
+ hfilter[tile_idx][i] = rsi.hfilter[tile_idx][i];
+ }
+ }
+
+ vpx_free(rsi.vfilter);
+ vpx_free(rsi.hfilter);
+ vpx_free(rsi.wiener_level);
+
+ vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
+ if (cost_wiener < cost_norestore) {
+ if (best_cost_ret) *best_cost_ret = cost_wiener;
+ return 1;
+ } else {
+ if (best_cost_ret) *best_cost_ret = cost_norestore;
+ return 0;
+ }
+}
+
+void vp10_pick_filter_restoration(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
+ LPF_PICK_METHOD method) {
+ VP10_COMMON *const cm = &cpi->common;
+ struct loopfilter *const lf = &cm->lf;
+ int wiener_success = 0;
+ int bilateral_success = 0;
+ double cost_bilateral = DBL_MAX;
+ double cost_wiener = DBL_MAX;
+ double cost_norestore = DBL_MAX;
+ int ntiles;
+
+ ntiles =
+ vp10_get_restoration_ntiles(BILATERAL_TILESIZE, cm->width, cm->height);
+ cm->rst_info.bilateral_level =
+ (int *)vpx_realloc(cm->rst_info.bilateral_level,
+ sizeof(*cm->rst_info.bilateral_level) * ntiles);
+ assert(cm->rst_info.bilateral_level != NULL);
+
+ ntiles = vp10_get_restoration_ntiles(WIENER_TILESIZE, cm->width, cm->height);
+ cm->rst_info.wiener_level = (int *)vpx_realloc(
+ cm->rst_info.wiener_level, sizeof(*cm->rst_info.wiener_level) * ntiles);
+ assert(cm->rst_info.wiener_level != NULL);
+ cm->rst_info.vfilter = (int(*)[RESTORATION_HALFWIN])vpx_realloc(
+ cm->rst_info.vfilter, sizeof(*cm->rst_info.vfilter) * ntiles);
+ assert(cm->rst_info.vfilter != NULL);
+ cm->rst_info.hfilter = (int(*)[RESTORATION_HALFWIN])vpx_realloc(
+ cm->rst_info.hfilter, sizeof(*cm->rst_info.hfilter) * ntiles);
+ assert(cm->rst_info.hfilter != NULL);
+
+ lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness;
+
+ if (method == LPF_PICK_MINIMAL_LPF && lf->filter_level) {
+ lf->filter_level = 0;
+ cm->rst_info.restoration_type = RESTORE_NONE;
+ } else if (method >= LPF_PICK_FROM_Q) {
+ const int min_filter_level = 0;
+ const int max_filter_level = vp10_get_max_filter_level(cpi);
+ const int q = vp10_ac_quant(cm->base_qindex, 0, cm->bit_depth);
+// These values were determined by linear fitting the result of the
+// searched level, filt_guess = q * 0.316206 + 3.87252
+#if CONFIG_VP9_HIGHBITDEPTH
+ int filt_guess;
+ switch (cm->bit_depth) {
+ case VPX_BITS_8:
+ filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
+ break;
+ case VPX_BITS_10:
+ filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20);
+ break;
+ case VPX_BITS_12:
+ filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22);
+ break;
+ default:
+ assert(0 &&
+ "bit_depth should be VPX_BITS_8, VPX_BITS_10 "
+ "or VPX_BITS_12");
+ return;
+ }
+#else
+ int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ if (cm->frame_type == KEY_FRAME) filt_guess -= 4;
+ lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
+ bilateral_success = search_bilateral_level(
+ sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE,
+ cm->rst_info.bilateral_level, &cost_bilateral);
+ wiener_success = search_wiener_filter(
+ sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE,
+ cm->rst_info.vfilter, cm->rst_info.hfilter, cm->rst_info.wiener_level,
+ &cost_wiener);
+ if (cost_bilateral < cost_wiener) {
+ if (bilateral_success)
+ cm->rst_info.restoration_type = RESTORE_BILATERAL;
+ else
+ cm->rst_info.restoration_type = RESTORE_NONE;
+ } else {
+ if (wiener_success)
+ cm->rst_info.restoration_type = RESTORE_WIENER;
+ else
+ cm->rst_info.restoration_type = RESTORE_NONE;
+ }
+ } else {
+ int blf_filter_level = -1;
+ bilateral_success = search_filter_bilateral_level(
+ sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, &blf_filter_level,
+ cm->rst_info.bilateral_level, &cost_bilateral);
+ lf->filter_level = vp10_search_filter_level(
+ sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, &cost_norestore);
+ wiener_success = search_wiener_filter(
+ sd, cpi, lf->filter_level, method == LPF_PICK_FROM_SUBIMAGE,
+ cm->rst_info.vfilter, cm->rst_info.hfilter, cm->rst_info.wiener_level,
+ &cost_wiener);
+ if (cost_bilateral < cost_wiener) {
+ lf->filter_level = blf_filter_level;
+ if (bilateral_success)
+ cm->rst_info.restoration_type = RESTORE_BILATERAL;
+ else
+ cm->rst_info.restoration_type = RESTORE_NONE;
+ } else {
+ if (wiener_success)
+ cm->rst_info.restoration_type = RESTORE_WIENER;
+ else
+ cm->rst_info.restoration_type = RESTORE_NONE;
+ }
+ // printf("[%d] Costs %g %g (%d) %g (%d)\n", cm->rst_info.restoration_type,
+ // cost_norestore, cost_bilateral, lf->filter_level, cost_wiener,
+ // wiener_success);
+ }
+ if (cm->rst_info.restoration_type != RESTORE_BILATERAL) {
+ vpx_free(cm->rst_info.bilateral_level);
+ cm->rst_info.bilateral_level = NULL;
+ }
+ if (cm->rst_info.restoration_type != RESTORE_WIENER) {
+ vpx_free(cm->rst_info.vfilter);
+ cm->rst_info.vfilter = NULL;
+ vpx_free(cm->rst_info.hfilter);
+ cm->rst_info.hfilter = NULL;
+ vpx_free(cm->rst_info.wiener_level);
+ cm->rst_info.wiener_level = NULL;
+ }
+}
diff --git a/av1/encoder/pickrst.h b/av1/encoder/pickrst.h
new file mode 100644
index 0000000..6d94cef
--- /dev/null
+++ b/av1/encoder/pickrst.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_PICKRST_H_
+#define VP10_ENCODER_PICKRST_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "av1/encoder/encoder.h"
+
+struct yv12_buffer_config;
+struct VP10_COMP;
+
+void vp10_pick_filter_restoration(const YV12_BUFFER_CONFIG *sd, VP10_COMP *cpi,
+ LPF_PICK_METHOD method);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_PICKRST_H_
diff --git a/av1/encoder/quantize.c b/av1/encoder/quantize.c
new file mode 100644
index 0000000..ed8a04b
--- /dev/null
+++ b/av1/encoder/quantize.c
@@ -0,0 +1,1260 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include "./vpx_dsp_rtcd.h"
+#include "aom_dsp/quantize.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+
+#include "av1/common/quant_common.h"
+#include "av1/common/scan.h"
+#include "av1/common/seg_common.h"
+
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/quantize.h"
+#include "av1/encoder/rd.h"
+
+#if CONFIG_NEW_QUANT
+static INLINE int quantize_coeff_nuq(
+ const tran_low_t coeffv, const int16_t quant, const int16_t quant_shift,
+ const int16_t dequant, const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr) {
+ const int coeff = coeffv;
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int i, q;
+ int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+ for (i = 0; i < NUQ_KNOTS; i++) {
+ if (tmp < cuml_bins_ptr[i]) {
+ q = i;
+ break;
+ }
+ }
+ if (i == NUQ_KNOTS) {
+ tmp -= cuml_bins_ptr[NUQ_KNOTS - 1];
+ q = NUQ_KNOTS + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> 16);
+ }
+ if (q) {
+ *dqcoeff_ptr = vp10_dequant_abscoeff_nuq(q, dequant, dequant_val);
+ *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
+ *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+ } else {
+ *qcoeff_ptr = 0;
+ *dqcoeff_ptr = 0;
+ }
+ return (q != 0);
+}
+
+static INLINE int quantize_coeff_bigtx_nuq(
+ const tran_low_t coeffv, const int16_t quant, const int16_t quant_shift,
+ const int16_t dequant, const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, int logsizeby32) {
+ const int coeff = coeffv;
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int i, q;
+ int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+ for (i = 0; i < NUQ_KNOTS; i++) {
+ if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) {
+ q = i;
+ break;
+ }
+ }
+ if (i == NUQ_KNOTS) {
+ tmp -= ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], 1 + logsizeby32);
+ q = NUQ_KNOTS +
+ (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (15 - logsizeby32));
+ }
+ if (q) {
+ *dqcoeff_ptr = ROUND_POWER_OF_TWO(
+ vp10_dequant_abscoeff_nuq(q, dequant, dequant_val), 1 + logsizeby32);
+ // *dqcoeff_ptr = vp10_dequant_abscoeff_nuq(q, dequant, dequant_val) >>
+ // (1 + logsizeby32);
+ *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
+ *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+ } else {
+ *qcoeff_ptr = 0;
+ *dqcoeff_ptr = 0;
+ }
+ return (q != 0);
+}
+
+static INLINE int quantize_coeff_fp_nuq(
+ const tran_low_t coeffv, const int16_t quant, const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr) {
+ const int coeff = coeffv;
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int i, q;
+ int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+ for (i = 0; i < NUQ_KNOTS; i++) {
+ if (tmp < cuml_bins_ptr[i]) {
+ q = i;
+ break;
+ }
+ }
+ if (i == NUQ_KNOTS) {
+ q = NUQ_KNOTS +
+ ((((int64_t)tmp - cuml_bins_ptr[NUQ_KNOTS - 1]) * quant) >> 16);
+ }
+ if (q) {
+ *dqcoeff_ptr = vp10_dequant_abscoeff_nuq(q, dequant, dequant_val);
+ *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
+ *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+ } else {
+ *qcoeff_ptr = 0;
+ *dqcoeff_ptr = 0;
+ }
+ return (q != 0);
+}
+
+static INLINE int quantize_coeff_bigtx_fp_nuq(
+ const tran_low_t coeffv, const int16_t quant, const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, int logsizeby32) {
+ const int coeff = coeffv;
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int i, q;
+ int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+ for (i = 0; i < NUQ_KNOTS; i++) {
+ if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) {
+ q = i;
+ break;
+ }
+ }
+ if (i == NUQ_KNOTS) {
+ q = NUQ_KNOTS +
+ ((((int64_t)tmp -
+ ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], 1 + logsizeby32)) *
+ quant) >>
+ (15 - logsizeby32));
+ }
+ if (q) {
+ *dqcoeff_ptr = ROUND_POWER_OF_TWO(
+ vp10_dequant_abscoeff_nuq(q, dequant, dequant_val), 1 + logsizeby32);
+ // *dqcoeff_ptr = vp10_dequant_abscoeff_nuq(q, dequant, dequant_val) >>
+ // (1 + logsizeby32);
+ *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
+ *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+ } else {
+ *qcoeff_ptr = 0;
+ *dqcoeff_ptr = 0;
+ }
+ return (q != 0);
+}
+
+void quantize_dc_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t quant,
+ const int16_t quant_shift, const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ const int rc = 0;
+ if (quantize_coeff_nuq(coeff_ptr[rc], quant, quant_shift, dequant,
+ cuml_bins_ptr, dequant_val, qcoeff_ptr, dqcoeff_ptr))
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
+void quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t quant,
+ const int16_t dequant, const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ const int rc = 0;
+ if (quantize_coeff_fp_nuq(coeff_ptr[rc], quant, dequant, cuml_bins_ptr,
+ dequant_val, qcoeff_ptr, dqcoeff_ptr))
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
+void quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t quant,
+ const int16_t quant_shift, const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ const int rc = 0;
+ if (quantize_coeff_bigtx_nuq(coeff_ptr[rc], quant, quant_shift, dequant,
+ cuml_bins_ptr, dequant_val, qcoeff_ptr,
+ dqcoeff_ptr, 0))
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
+void quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t quant,
+ const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ const int rc = 0;
+ if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], quant, dequant,
+ cuml_bins_ptr, dequant_val, qcoeff_ptr,
+ dqcoeff_ptr, 0))
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
+void quantize_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, const int16_t *dequant_ptr,
+ const cuml_bins_type_nuq *cuml_bins_ptr,
+ const dequant_val_type_nuq *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const uint8_t *band) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ int i;
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ if (quantize_coeff_nuq(coeff_ptr[rc], quant_ptr[rc != 0],
+ quant_shift_ptr[rc != 0], dequant_ptr[rc != 0],
+ cuml_bins_ptr[band[i]], dequant_val[band[i]],
+ &qcoeff_ptr[rc], &dqcoeff_ptr[rc]))
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+void quantize_fp_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *quant_ptr,
+ const int16_t *dequant_ptr,
+ const cuml_bins_type_nuq *cuml_bins_ptr,
+ const dequant_val_type_nuq *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const uint8_t *band) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ int i;
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ if (quantize_coeff_fp_nuq(coeff_ptr[rc], quant_ptr[rc != 0],
+ dequant_ptr[rc != 0], cuml_bins_ptr[band[i]],
+ dequant_val[band[i]], &qcoeff_ptr[rc],
+ &dqcoeff_ptr[rc]))
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+void quantize_32x32_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ const int16_t *dequant_ptr,
+ const cuml_bins_type_nuq *cuml_bins_ptr,
+ const dequant_val_type_nuq *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const uint8_t *band) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ int i;
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ if (quantize_coeff_bigtx_nuq(
+ coeff_ptr[rc], quant_ptr[rc != 0], quant_shift_ptr[rc != 0],
+ dequant_ptr[rc != 0], cuml_bins_ptr[band[i]],
+ dequant_val[band[i]], &qcoeff_ptr[rc], &dqcoeff_ptr[rc], 0))
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+void quantize_32x32_fp_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *quant_ptr,
+ const int16_t *dequant_ptr,
+ const cuml_bins_type_nuq *cuml_bins_ptr,
+ const dequant_val_type_nuq *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const uint8_t *band) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ int i;
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ if (quantize_coeff_bigtx_fp_nuq(
+ coeff_ptr[rc], quant_ptr[rc != 0], dequant_ptr[rc != 0],
+ cuml_bins_ptr[band[i]], dequant_val[band[i]], &qcoeff_ptr[rc],
+ &dqcoeff_ptr[rc], 0))
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+#endif // CONFIG_NEW_QUANT
+
+void vp10_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ *eob_ptr = 0;
+}
+
+void vp10_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc, const QUANT_PARAM *qparam) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ if (qparam->log_scale == 0) {
+ vp10_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan);
+ } else {
+ vp10_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin,
+ p->round_fp, p->quant_fp, p->quant_shift, qcoeff_ptr,
+ dqcoeff_ptr, pd->dequant, eob_ptr, sc->scan,
+ sc->iscan);
+ }
+}
+
+void vp10_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc, const QUANT_PARAM *qparam) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ if (qparam->log_scale == 0) {
+ vpx_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, p->quant,
+ p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant,
+ eob_ptr, sc->scan, sc->iscan);
+ } else {
+ vpx_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan);
+ }
+}
+
+void vp10_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc, const QUANT_PARAM *qparam) {
+ // obsolete skip_block
+ const int skip_block = 0;
+ (void)sc;
+ if (qparam->log_scale == 0) {
+ vpx_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round,
+ p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, pd->dequant[0],
+ eob_ptr);
+ } else {
+ vpx_quantize_dc_32x32(coeff_ptr, skip_block, p->round, p->quant_fp[0],
+ qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr);
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_quantize_fp_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc,
+ const QUANT_PARAM *qparam) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ vp10_highbd_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan,
+ qparam->log_scale);
+}
+
+void vp10_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc,
+ const QUANT_PARAM *qparam) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ vp10_highbd_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant, eob_ptr, sc->scan, sc->iscan,
+ qparam->log_scale);
+}
+
+void vp10_highbd_quantize_dc_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc,
+ const QUANT_PARAM *qparam) {
+ // obsolete skip_block
+ const int skip_block = 0;
+
+ (void)sc;
+
+ vp10_highbd_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round,
+ p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr,
+ pd->dequant[0], eob_ptr, qparam->log_scale);
+}
+
+#if CONFIG_NEW_QUANT
+static INLINE int highbd_quantize_coeff_nuq(
+ const tran_low_t coeffv, const int16_t quant, const int16_t quant_shift,
+ const int16_t dequant, const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr) {
+ const int coeff = coeffv;
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int i, q;
+ int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+ for (i = 0; i < NUQ_KNOTS; i++) {
+ if (tmp < cuml_bins_ptr[i]) {
+ q = i;
+ break;
+ }
+ }
+ if (i == NUQ_KNOTS) {
+ tmp -= cuml_bins_ptr[NUQ_KNOTS - 1];
+ q = NUQ_KNOTS + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> 16);
+ }
+ if (q) {
+ *dqcoeff_ptr = vp10_dequant_abscoeff_nuq(q, dequant, dequant_val);
+ *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
+ *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+ } else {
+ *qcoeff_ptr = 0;
+ *dqcoeff_ptr = 0;
+ }
+ return (q != 0);
+}
+
+static INLINE int highbd_quantize_coeff_fp_nuq(
+ const tran_low_t coeffv, const int16_t quant, const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr) {
+ const int coeff = coeffv;
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int i, q;
+ int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+ for (i = 0; i < NUQ_KNOTS; i++) {
+ if (tmp < cuml_bins_ptr[i]) {
+ q = i;
+ break;
+ }
+ }
+ if (i == NUQ_KNOTS) {
+ q = NUQ_KNOTS + (((tmp - cuml_bins_ptr[NUQ_KNOTS - 1]) * quant) >> 16);
+ }
+ if (q) {
+ *dqcoeff_ptr = vp10_dequant_abscoeff_nuq(q, dequant, dequant_val);
+ *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
+ *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+ } else {
+ *qcoeff_ptr = 0;
+ *dqcoeff_ptr = 0;
+ }
+ return (q != 0);
+}
+
+static INLINE int highbd_quantize_coeff_bigtx_fp_nuq(
+ const tran_low_t coeffv, const int16_t quant, const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, int logsizeby32) {
+ const int coeff = coeffv;
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int i, q;
+ int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+ for (i = 0; i < NUQ_KNOTS; i++) {
+ if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) {
+ q = i;
+ break;
+ }
+ }
+ if (i == NUQ_KNOTS) {
+ q = NUQ_KNOTS + (((tmp - ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1],
+ 1 + logsizeby32)) *
+ quant) >>
+ (15 - logsizeby32));
+ }
+ if (q) {
+ *dqcoeff_ptr = ROUND_POWER_OF_TWO(
+ vp10_dequant_abscoeff_nuq(q, dequant, dequant_val), 1 + logsizeby32);
+ *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
+ *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+ } else {
+ *qcoeff_ptr = 0;
+ *dqcoeff_ptr = 0;
+ }
+ return (q != 0);
+}
+
+static INLINE int highbd_quantize_coeff_bigtx_nuq(
+ const tran_low_t coeffv, const int16_t quant, const int16_t quant_shift,
+ const int16_t dequant, const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, int logsizeby32) {
+ const int coeff = coeffv;
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int i, q;
+ int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX);
+ for (i = 0; i < NUQ_KNOTS; i++) {
+ if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) {
+ q = i;
+ break;
+ }
+ }
+ if (i == NUQ_KNOTS) {
+ tmp -= ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], 1 + logsizeby32);
+ q = NUQ_KNOTS +
+ (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (15 - logsizeby32));
+ }
+ if (q) {
+ *dqcoeff_ptr = ROUND_POWER_OF_TWO(
+ vp10_dequant_abscoeff_nuq(q, dequant, dequant_val), 1 + logsizeby32);
+ *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign;
+ *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr;
+ } else {
+ *qcoeff_ptr = 0;
+ *dqcoeff_ptr = 0;
+ }
+ return (q != 0);
+}
+
+void highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t quant,
+ const int16_t quant_shift, const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ const int rc = 0;
+ if (highbd_quantize_coeff_nuq(coeff_ptr[rc], quant, quant_shift, dequant,
+ cuml_bins_ptr, dequant_val, qcoeff_ptr,
+ dqcoeff_ptr))
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t quant,
+ const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ const int rc = 0;
+ if (highbd_quantize_coeff_fp_nuq(coeff_ptr[rc], quant, dequant,
+ cuml_bins_ptr, dequant_val, qcoeff_ptr,
+ dqcoeff_ptr))
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ const int16_t *dequant_ptr,
+ const cuml_bins_type_nuq *cuml_bins_ptr,
+ const dequant_val_type_nuq *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const uint8_t *band) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ int i;
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ if (highbd_quantize_coeff_nuq(
+ coeff_ptr[rc], quant_ptr[rc != 0], quant_shift_ptr[rc != 0],
+ dequant_ptr[rc != 0], cuml_bins_ptr[band[i]],
+ dequant_val[band[i]], &qcoeff_ptr[rc], &dqcoeff_ptr[rc]))
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_32x32_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ const int16_t *dequant_ptr,
+ const cuml_bins_type_nuq *cuml_bins_ptr,
+ const dequant_val_type_nuq *dequant_val,
+ tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const uint8_t *band) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ int i;
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ if (highbd_quantize_coeff_bigtx_nuq(
+ coeff_ptr[rc], quant_ptr[rc != 0], quant_shift_ptr[rc != 0],
+ dequant_ptr[rc != 0], cuml_bins_ptr[band[i]],
+ dequant_val[band[i]], &qcoeff_ptr[rc], &dqcoeff_ptr[rc], 0))
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_32x32_fp_nuq_c(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, int skip_block,
+ const int16_t *quant_ptr,
+ const int16_t *dequant_ptr,
+ const cuml_bins_type_nuq *cuml_bins_ptr,
+ const dequant_val_type_nuq *dequant_val,
+ tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const uint8_t *band) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ int i;
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ if (highbd_quantize_coeff_bigtx_fp_nuq(
+ coeff_ptr[rc], quant_ptr[rc != 0], dequant_ptr[rc != 0],
+ cuml_bins_ptr[band[i]], dequant_val[band[i]], &qcoeff_ptr[rc],
+ &dqcoeff_ptr[rc], 0))
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_fp_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *quant_ptr,
+ const int16_t *dequant_ptr,
+ const cuml_bins_type_nuq *cuml_bins_ptr,
+ const dequant_val_type_nuq *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const uint8_t *band) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ int i;
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ if (highbd_quantize_coeff_fp_nuq(
+ coeff_ptr[rc], quant_ptr[rc != 0], dequant_ptr[rc != 0],
+ cuml_bins_ptr[band[i]], dequant_val[band[i]], &qcoeff_ptr[rc],
+ &dqcoeff_ptr[rc]))
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_dc_32x32_nuq(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
+ const int16_t quant, const int16_t quant_shift, const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ const int rc = 0;
+ if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc], quant, quant_shift,
+ dequant, cuml_bins_ptr, dequant_val,
+ qcoeff_ptr, dqcoeff_ptr, 0))
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
+void highbd_quantize_dc_32x32_fp_nuq(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
+ const int16_t quant, const int16_t dequant, const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) {
+ int eob = -1;
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+ if (!skip_block) {
+ const int rc = 0;
+ if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], quant, dequant,
+ cuml_bins_ptr, dequant_val,
+ qcoeff_ptr, dqcoeff_ptr, 0))
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+#endif // CONFIG_NEW_QUANT
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan
+#if CONFIG_AOM_QM
+ ,
+ const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
+#endif
+ ) {
+ int i, eob = -1;
+ // TODO(jingning) Decide the need of these arguments after the
+ // quantization process is completed.
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+#if CONFIG_AOM_QM
+ const qm_val_t wt = qm_ptr[rc];
+ const qm_val_t iwt = iqm_ptr[rc];
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+#endif
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ int tmp32;
+#if CONFIG_AOM_QM
+ tmp32 = (int)((tmp * wt * quant_ptr[rc != 0]) >> (16 + AOM_QM_BITS));
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
+#else
+ tmp32 = (int)((tmp * quant_ptr[rc != 0]) >> 16);
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+#endif
+
+ if (tmp32) eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan,
+#if CONFIG_AOM_QM
+ const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
+#endif
+ int log_scale) {
+ int i;
+ int eob = -1;
+ const int scale = 1 << log_scale;
+ const int shift = 16 - log_scale;
+ // TODO(jingning) Decide the need of these arguments after the
+ // quantization process is completed.
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+#if CONFIG_AOM_QM
+ const qm_val_t wt = qm_ptr[rc];
+ const qm_val_t iwt = iqm_ptr[rc];
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+#endif
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ const int64_t tmp = abs_coeff + round_ptr[rc != 0];
+#if CONFIG_AOM_QM
+ const uint32_t abs_qcoeff =
+ (uint32_t)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS));
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / scale;
+#else
+ const uint32_t abs_qcoeff =
+ (uint32_t)((tmp * quant_ptr[rc != 0]) >> shift);
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / scale;
+#endif
+ if (abs_qcoeff) eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+// TODO(jingning) Refactor this file and combine functions with similar
+// operations.
+void vp10_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan
+#if CONFIG_AOM_QM
+ ,
+ const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
+#endif
+ ) {
+ int i, eob = -1;
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)iscan;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ for (i = 0; i < n_coeffs; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+#if CONFIG_AOM_QM
+ const qm_val_t wt = qm_ptr[rc];
+ const qm_val_t iwt = iqm_ptr[rc];
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+ int64_t tmp = 0;
+#endif
+ const int coeff_sign = (coeff >> 31);
+ int tmp32 = 0;
+ int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+#if CONFIG_AOM_QM
+ if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - 2))) {
+#else
+ if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) {
+#endif
+ abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+#if CONFIG_AOM_QM
+ tmp = abs_coeff * wt;
+ tmp32 = (int)(tmp * quant_ptr[rc != 0]) >> (AOM_QM_BITS + 15);
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 2;
+#else
+ tmp32 = (abs_coeff * quant_ptr[rc != 0]) >> 15;
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant_ptr[rc != 0]) / 2;
+#endif
+ }
+
+ if (tmp32) eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr,
+ const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan,
+#if CONFIG_AOM_QM
+ const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr,
+#endif
+ int log_scale) {
+ int i, non_zero_count = (int)n_coeffs, eob = -1;
+ int zbins[2] = { zbin_ptr[0], zbin_ptr[1] };
+ int round[2] = { round_ptr[0], round_ptr[1] };
+ int nzbins[2];
+ int scale = 1;
+ int shift = 16;
+ (void)iscan;
+
+ if (log_scale > 0) {
+ zbins[0] = ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale);
+ zbins[1] = ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale);
+ round[0] = ROUND_POWER_OF_TWO(round_ptr[0], log_scale);
+ round[1] = ROUND_POWER_OF_TWO(round_ptr[1], log_scale);
+ scale = 1 << log_scale;
+ shift = 16 - log_scale;
+ }
+
+ nzbins[0] = zbins[0] * -1;
+ nzbins[1] = zbins[1] * -1;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ // Pre-scan pass
+ for (i = (int)n_coeffs - 1; i >= 0; i--) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+#if CONFIG_AOM_QM
+ uint32_t abs_qcoeff = 0;
+ const qm_val_t wt = qm_ptr[rc];
+ const qm_val_t iwt = iqm_ptr[rc];
+ const int dequant =
+ (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
+ AOM_QM_BITS;
+#endif
+
+ if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0])
+ non_zero_count--;
+ else
+ break;
+ }
+
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < non_zero_count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+#if CONFIG_AOM_QM
+ if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) {
+#else
+
+ if (abs_coeff >= zbins[rc != 0]) {
+#endif
+ const int64_t tmp1 = abs_coeff + round[rc != 0];
+ const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
+#if CONFIG_AOM_QM
+ const uint32_t abs_qcoeff = (uint32_t)(
+ (tmp2 * wt * quant_shift_ptr[rc != 0]) >> (AOM_QM_BITS + shift));
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / scale;
+#else
+ const uint32_t abs_qcoeff =
+ (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> shift);
+ qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / scale;
+#endif // CONFIG_AOM_QM
+ if (abs_qcoeff) eob = i;
+ }
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t quant, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
+ uint16_t *eob_ptr, const int log_scale) {
+ int eob = -1;
+
+ memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
+
+ if (!skip_block) {
+ const int coeff = coeff_ptr[0];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ const int64_t tmp = abs_coeff + round_ptr[0];
+ const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> (16 - log_scale));
+ qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+ dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / (1 << log_scale);
+ if (abs_qcoeff) eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+#endif
+
+static void invert_quant(int16_t *quant, int16_t *shift, int d) {
+ unsigned t;
+ int l, m;
+ t = d;
+ for (l = 0; t > 1; l++) t >>= 1;
+ m = 1 + (1 << (16 + l)) / d;
+ *quant = (int16_t)(m - (1 << 16));
+ *shift = 1 << (16 - l);
+}
+
+static int get_qzbin_factor(int q, vpx_bit_depth_t bit_depth) {
+ const int quant = vp10_dc_quant(q, 0, bit_depth);
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (bit_depth) {
+ case VPX_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80);
+ case VPX_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80);
+ case VPX_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1;
+ }
+#else
+ (void)bit_depth;
+ return q == 0 ? 64 : (quant < 148 ? 84 : 80);
+#endif
+}
+
+void vp10_init_quantizer(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ QUANTS *const quants = &cpi->quants;
+ int i, q, quant;
+#if CONFIG_NEW_QUANT
+ int dq;
+#endif
+
+ for (q = 0; q < QINDEX_RANGE; q++) {
+ const int qzbin_factor = get_qzbin_factor(q, cm->bit_depth);
+ const int qrounding_factor = q == 0 ? 64 : 48;
+
+ for (i = 0; i < 2; ++i) {
+ int qrounding_factor_fp = 64;
+ // y
+ quant = i == 0 ? vp10_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth)
+ : vp10_ac_quant(q, 0, cm->bit_depth);
+ invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant);
+ quants->y_quant_fp[q][i] = (1 << 16) / quant;
+ quants->y_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
+ quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
+ quants->y_round[q][i] = (qrounding_factor * quant) >> 7;
+ cpi->y_dequant[q][i] = quant;
+
+ // uv
+ quant = i == 0 ? vp10_dc_quant(q, cm->uv_dc_delta_q, cm->bit_depth)
+ : vp10_ac_quant(q, cm->uv_ac_delta_q, cm->bit_depth);
+ invert_quant(&quants->uv_quant[q][i], &quants->uv_quant_shift[q][i],
+ quant);
+ quants->uv_quant_fp[q][i] = (1 << 16) / quant;
+ quants->uv_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
+ quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
+ quants->uv_round[q][i] = (qrounding_factor * quant) >> 7;
+ cpi->uv_dequant[q][i] = quant;
+ }
+
+#if CONFIG_NEW_QUANT
+ for (dq = 0; dq < QUANT_PROFILES; dq++) {
+ for (i = 0; i < COEF_BANDS; i++) {
+ const int quant = cpi->y_dequant[q][i != 0];
+ const int uvquant = cpi->uv_dequant[q][i != 0];
+ vp10_get_dequant_val_nuq(quant, q, i, cpi->y_dequant_val_nuq[dq][q][i],
+ quants->y_cuml_bins_nuq[dq][q][i], dq);
+ vp10_get_dequant_val_nuq(uvquant, q, i,
+ cpi->uv_dequant_val_nuq[dq][q][i],
+ quants->uv_cuml_bins_nuq[dq][q][i], dq);
+ }
+ }
+#endif // CONFIG_NEW_QUANT
+
+ for (i = 2; i < 8; i++) { // 8: SIMD width
+ quants->y_quant[q][i] = quants->y_quant[q][1];
+ quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1];
+ quants->y_round_fp[q][i] = quants->y_round_fp[q][1];
+ quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1];
+ quants->y_zbin[q][i] = quants->y_zbin[q][1];
+ quants->y_round[q][i] = quants->y_round[q][1];
+ cpi->y_dequant[q][i] = cpi->y_dequant[q][1];
+
+ quants->uv_quant[q][i] = quants->uv_quant[q][1];
+ quants->uv_quant_fp[q][i] = quants->uv_quant_fp[q][1];
+ quants->uv_round_fp[q][i] = quants->uv_round_fp[q][1];
+ quants->uv_quant_shift[q][i] = quants->uv_quant_shift[q][1];
+ quants->uv_zbin[q][i] = quants->uv_zbin[q][1];
+ quants->uv_round[q][i] = quants->uv_round[q][1];
+ cpi->uv_dequant[q][i] = cpi->uv_dequant[q][1];
+ }
+ }
+}
+
+void vp10_init_plane_quantizers(const VP10_COMP *cpi, MACROBLOCK *x,
+ int segment_id) {
+ const VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const QUANTS *const quants = &cpi->quants;
+ const int qindex = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+ const int rdmult = vp10_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
+ int i;
+#if CONFIG_AOM_QM
+ int minqm = cm->min_qmlevel;
+ int maxqm = cm->max_qmlevel;
+ // Quant matrix only depends on the base QP so there is only one set per frame
+ int qmlevel = (lossless || cm->using_qmatrix == 0)
+ ? NUM_QM_LEVELS - 1
+ : aom_get_qmlevel(cm->base_qindex, minqm, maxqm);
+#endif
+#if CONFIG_NEW_QUANT
+ int dq;
+#endif
+
+ // Y
+ x->plane[0].quant = quants->y_quant[qindex];
+ x->plane[0].quant_fp = quants->y_quant_fp[qindex];
+ x->plane[0].round_fp = quants->y_round_fp[qindex];
+ x->plane[0].quant_shift = quants->y_quant_shift[qindex];
+ x->plane[0].zbin = quants->y_zbin[qindex];
+ x->plane[0].round = quants->y_round[qindex];
+#if CONFIG_AOM_QM
+ memcpy(&xd->plane[0].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][0],
+ sizeof(cm->gqmatrix[qmlevel][0]));
+ memcpy(&xd->plane[0].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][0],
+ sizeof(cm->giqmatrix[qmlevel][0]));
+#endif
+ xd->plane[0].dequant = cpi->y_dequant[qindex];
+#if CONFIG_NEW_QUANT
+ for (dq = 0; dq < QUANT_PROFILES; dq++) {
+ x->plane[0].cuml_bins_nuq[dq] = quants->y_cuml_bins_nuq[dq][qindex];
+ xd->plane[0].dequant_val_nuq[dq] = cpi->y_dequant_val_nuq[dq][qindex];
+ }
+#endif // CONFIG_NEW_QUANT
+
+ x->plane[0].quant_thred[0] = x->plane[0].zbin[0] * x->plane[0].zbin[0];
+ x->plane[0].quant_thred[1] = x->plane[0].zbin[1] * x->plane[0].zbin[1];
+
+ // UV
+ for (i = 1; i < 3; i++) {
+ x->plane[i].quant = quants->uv_quant[qindex];
+ x->plane[i].quant_fp = quants->uv_quant_fp[qindex];
+ x->plane[i].round_fp = quants->uv_round_fp[qindex];
+ x->plane[i].quant_shift = quants->uv_quant_shift[qindex];
+ x->plane[i].zbin = quants->uv_zbin[qindex];
+ x->plane[i].round = quants->uv_round[qindex];
+#if CONFIG_AOM_QM
+ memcpy(&xd->plane[i].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][1],
+ sizeof(cm->gqmatrix[qmlevel][1]));
+ memcpy(&xd->plane[i].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][1],
+ sizeof(cm->giqmatrix[qmlevel][1]));
+#endif
+ xd->plane[i].dequant = cpi->uv_dequant[qindex];
+#if CONFIG_NEW_QUANT
+ for (dq = 0; dq < QUANT_PROFILES; dq++) {
+ x->plane[i].cuml_bins_nuq[dq] = quants->uv_cuml_bins_nuq[dq][qindex];
+ xd->plane[i].dequant_val_nuq[dq] = cpi->uv_dequant_val_nuq[dq][qindex];
+ }
+#endif // CONFIG_NEW_QUANT
+
+ x->plane[i].quant_thred[0] = x->plane[i].zbin[0] * x->plane[i].zbin[0];
+ x->plane[i].quant_thred[1] = x->plane[i].zbin[1] * x->plane[i].zbin[1];
+ }
+
+ x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
+ x->q_index = qindex;
+
+ set_error_per_bit(x, rdmult);
+
+ vp10_initialize_me_consts(cpi, x, x->q_index);
+}
+
+void vp10_frame_init_quantizer(VP10_COMP *cpi) {
+ MACROBLOCK *const x = &cpi->td.mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ vp10_init_plane_quantizers(cpi, x, xd->mi[0]->mbmi.segment_id);
+}
+
+void vp10_set_quantizer(VP10_COMMON *cm, int q) {
+ // quantizer has to be reinitialized with vp10_init_quantizer() if any
+ // delta_q changes.
+ cm->base_qindex = q;
+ cm->y_dc_delta_q = 0;
+ cm->uv_dc_delta_q = 0;
+ cm->uv_ac_delta_q = 0;
+}
+
+// Table that converts 0-63 Q-range values passed in outside to the Qindex
+// range used internally.
+static const int quantizer_to_qindex[] = {
+ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
+ 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
+ 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
+ 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
+ 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
+};
+
+int vp10_quantizer_to_qindex(int quantizer) {
+ return quantizer_to_qindex[quantizer];
+}
+
+int vp10_qindex_to_quantizer(int qindex) {
+ int quantizer;
+
+ for (quantizer = 0; quantizer < 64; ++quantizer)
+ if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
+
+ return 63;
+}
diff --git a/av1/encoder/quantize.h b/av1/encoder/quantize.h
new file mode 100644
index 0000000..6b1e739
--- /dev/null
+++ b/av1/encoder/quantize.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_QUANTIZE_H_
+#define VP10_ENCODER_QUANTIZE_H_
+
+#include "./vpx_config.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/scan.h"
+#include "av1/encoder/block.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct QUANT_PARAM { int log_scale; } QUANT_PARAM;
+
+typedef void (*VP10_QUANT_FACADE)(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc,
+ const QUANT_PARAM *qparam);
+
+typedef struct {
+#if CONFIG_NEW_QUANT
+ DECLARE_ALIGNED(
+ 16, tran_low_t,
+ y_cuml_bins_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS][NUQ_KNOTS]);
+ DECLARE_ALIGNED(
+ 16, tran_low_t,
+ uv_cuml_bins_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS][NUQ_KNOTS]);
+#endif // CONFIG_NEW_QUANT
+ // 0: dc 1: ac 2-8: ac repeated to SIMD width
+ DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]);
+
+ // TODO(jingning): in progress of re-working the quantization. will decide
+ // if we want to deprecate the current use of y_quant.
+ DECLARE_ALIGNED(16, int16_t, y_quant_fp[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, uv_quant_fp[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, y_round_fp[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, uv_round_fp[QINDEX_RANGE][8]);
+
+ DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, uv_zbin[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]);
+} QUANTS;
+
+struct VP10_COMP;
+struct VP10Common;
+
+void vp10_frame_init_quantizer(struct VP10_COMP *cpi);
+
+void vp10_init_plane_quantizers(const struct VP10_COMP *cpi, MACROBLOCK *x,
+ int segment_id);
+
+void vp10_init_quantizer(struct VP10_COMP *cpi);
+
+void vp10_set_quantizer(struct VP10Common *cm, int q);
+
+int vp10_quantizer_to_qindex(int quantizer);
+
+int vp10_qindex_to_quantizer(int qindex);
+
+void vp10_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
+
+void vp10_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc, const QUANT_PARAM *qparam);
+
+void vp10_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc, const QUANT_PARAM *qparam);
+
+void vp10_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc, const QUANT_PARAM *qparam);
+
+#if CONFIG_NEW_QUANT
+void quantize_dc_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t quant,
+ const int16_t quant_shift, const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
+void quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t quant,
+ const int16_t quant_shift, const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr);
+void quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t quant,
+ const int16_t dequant, const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
+void quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t quant,
+ const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr);
+#endif // CONFIG_NEW_QUANT
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_quantize_fp_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc,
+ const QUANT_PARAM *qparam);
+
+void vp10_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
+ intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr,
+ const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
+ const scan_order *sc,
+ const QUANT_PARAM *qparam);
+
+void vp10_highbd_quantize_dc_facade(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
+ tran_low_t *qcoeff_ptr, const MACROBLOCKD_PLANE *pd,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const scan_order *sc,
+ const QUANT_PARAM *qparam);
+
+void vp10_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t quant, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr,
+ uint16_t *eob_ptr, const int log_scale);
+#if CONFIG_NEW_QUANT
+void highbd_quantize_dc_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t quant,
+ const int16_t quant_shift, const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr);
+void highbd_quantize_dc_32x32_nuq(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
+ const int16_t quant, const int16_t quant_shift, const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
+void highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t quant,
+ const int16_t dequant,
+ const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+ uint16_t *eob_ptr);
+void highbd_quantize_dc_32x32_fp_nuq(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
+ const int16_t quant, const int16_t dequant, const tran_low_t *cuml_bins_ptr,
+ const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
+
+#endif // CONFIG_NEW_QUANT
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_QUANTIZE_H_
diff --git a/av1/encoder/ransac.c b/av1/encoder/ransac.c
new file mode 100644
index 0000000..e925068
--- /dev/null
+++ b/av1/encoder/ransac.c
@@ -0,0 +1,940 @@
+/*
+ * (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <memory.h>
+#include <math.h>
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "vp10/encoder/ransac.h"
+
+#define MAX_PARAMDIM 9
+#define MAX_MINPTS 4
+
+#define MAX_DEGENERATE_ITER 10
+#define MINPTS_MULTIPLIER 5
+
+// svdcmp
+// Adopted from Numerical Recipes in C
+
+static const double TINY_NEAR_ZERO = 1.0E-12;
+
+static inline double SIGN(double a, double b) {
+ return ((b) >= 0 ? fabs(a) : -fabs(a));
+}
+
+static inline double PYTHAG(double a, double b) {
+ double absa, absb, ct;
+ absa = fabs(a);
+ absb = fabs(b);
+
+ if (absa > absb) {
+ ct = absb / absa;
+ return absa * sqrt(1.0 + ct * ct);
+ } else {
+ ct = absa / absb;
+ return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
+ }
+}
+
+int IMIN(int a, int b) { return (((a) < (b)) ? (a) : (b)); }
+
+int IMAX(int a, int b) { return (((a) < (b)) ? (b) : (a)); }
+
+void MultiplyMat(double *m1, double *m2, double *res, const int M1,
+ const int N1, const int N2) {
+ int timesInner = N1;
+ int timesRows = M1;
+ int timesCols = N2;
+ double sum;
+
+ int row, col, inner;
+ for (row = 0; row < timesRows; ++row) {
+ for (col = 0; col < timesCols; ++col) {
+ sum = 0;
+ for (inner = 0; inner < timesInner; ++inner)
+ sum += m1[row * N1 + inner] * m2[inner * N2 + col];
+ *(res++) = sum;
+ }
+ }
+}
+
+static int svdcmp_(double **u, int m, int n, double w[], double **v) {
+ const int max_its = 30;
+ int flag, i, its, j, jj, k, l, nm;
+ double anorm, c, f, g, h, s, scale, x, y, z;
+ double *rv1 = (double *)malloc(sizeof(*rv1) * (n + 1));
+ g = scale = anorm = 0.0;
+ for (i = 0; i < n; i++) {
+ l = i + 1;
+ rv1[i] = scale * g;
+ g = s = scale = 0.0;
+ if (i < m) {
+ for (k = i; k < m; k++) scale += fabs(u[k][i]);
+ if (scale) {
+ for (k = i; k < m; k++) {
+ u[k][i] /= scale;
+ s += u[k][i] * u[k][i];
+ }
+ f = u[i][i];
+ g = -SIGN(sqrt(s), f);
+ h = f * g - s;
+ u[i][i] = f - g;
+ for (j = l; j < n; j++) {
+ for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
+ f = s / h;
+ for (k = i; k < m; k++) u[k][j] += f * u[k][i];
+ }
+ for (k = i; k < m; k++) u[k][i] *= scale;
+ }
+ }
+ w[i] = scale * g;
+ g = s = scale = 0.0;
+ if (i < m && i != n - 1) {
+ for (k = l; k < n; k++) scale += fabs(u[i][k]);
+ if (scale) {
+ for (k = l; k < n; k++) {
+ u[i][k] /= scale;
+ s += u[i][k] * u[i][k];
+ }
+ f = u[i][l];
+ g = -SIGN(sqrt(s), f);
+ h = f * g - s;
+ u[i][l] = f - g;
+ for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
+ for (j = l; j < m; j++) {
+ for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
+ for (k = l; k < n; k++) u[j][k] += s * rv1[k];
+ }
+ for (k = l; k < n; k++) u[i][k] *= scale;
+ }
+ }
+ anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
+ }
+
+ for (i = n - 1; i >= 0; i--) {
+ if (i < n - 1) {
+ if (g) {
+ for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
+ for (j = l; j < n; j++) {
+ for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
+ for (k = l; k < n; k++) v[k][j] += s * v[k][i];
+ }
+ }
+ for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
+ }
+ v[i][i] = 1.0;
+ g = rv1[i];
+ l = i;
+ }
+
+ for (i = IMIN(m, n) - 1; i >= 0; i--) {
+ l = i + 1;
+ g = w[i];
+ for (j = l; j < n; j++) u[i][j] = 0.0;
+ if (g) {
+ g = 1.0 / g;
+ for (j = l; j < n; j++) {
+ for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
+ f = (s / u[i][i]) * g;
+ for (k = i; k < m; k++) u[k][j] += f * u[k][i];
+ }
+ for (j = i; j < m; j++) u[j][i] *= g;
+ } else {
+ for (j = i; j < m; j++) u[j][i] = 0.0;
+ }
+ ++u[i][i];
+ }
+ for (k = n - 1; k >= 0; k--) {
+ for (its = 0; its < max_its; its++) {
+ flag = 1;
+ for (l = k; l >= 0; l--) {
+ nm = l - 1;
+ if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
+ flag = 0;
+ break;
+ }
+ if ((double)(fabs(w[nm]) + anorm) == anorm) break;
+ }
+ if (flag) {
+ c = 0.0;
+ s = 1.0;
+ for (i = l; i <= k; i++) {
+ f = s * rv1[i];
+ rv1[i] = c * rv1[i];
+ if ((double)(fabs(f) + anorm) == anorm) break;
+ g = w[i];
+ h = PYTHAG(f, g);
+ w[i] = h;
+ h = 1.0 / h;
+ c = g * h;
+ s = -f * h;
+ for (j = 0; j < m; j++) {
+ y = u[j][nm];
+ z = u[j][i];
+ u[j][nm] = y * c + z * s;
+ u[j][i] = z * c - y * s;
+ }
+ }
+ }
+ z = w[k];
+ if (l == k) {
+ if (z < 0.0) {
+ w[k] = -z;
+ for (j = 0; j < n; j++) v[j][k] = -v[j][k];
+ }
+ break;
+ }
+ if (its == max_its - 1) {
+ return 1;
+ }
+ assert(k > 0);
+ x = w[l];
+ nm = k - 1;
+ y = w[nm];
+ g = rv1[nm];
+ h = rv1[k];
+ f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
+ g = PYTHAG(f, 1.0);
+ f = ((x - z) * (x + z) + h * ((y / (f + SIGN(g, f))) - h)) / x;
+ c = s = 1.0;
+ for (j = l; j <= nm; j++) {
+ i = j + 1;
+ g = rv1[i];
+ y = w[i];
+ h = s * g;
+ g = c * g;
+ z = PYTHAG(f, h);
+ rv1[j] = z;
+ c = f / z;
+ s = h / z;
+ f = x * c + g * s;
+ g = g * c - x * s;
+ h = y * s;
+ y *= c;
+ for (jj = 0; jj < n; jj++) {
+ x = v[jj][j];
+ z = v[jj][i];
+ v[jj][j] = x * c + z * s;
+ v[jj][i] = z * c - x * s;
+ }
+ z = PYTHAG(f, h);
+ w[j] = z;
+ if (z) {
+ z = 1.0 / z;
+ c = f * z;
+ s = h * z;
+ }
+ f = c * g + s * y;
+ x = c * y - s * g;
+ for (jj = 0; jj < m; jj++) {
+ y = u[jj][j];
+ z = u[jj][i];
+ u[jj][j] = y * c + z * s;
+ u[jj][i] = z * c - y * s;
+ }
+ }
+ rv1[l] = 0.0;
+ rv1[k] = f;
+ w[k] = x;
+ }
+ }
+ free(rv1);
+ return 0;
+}
+
+static int SVD(double *U, double *W, double *V, double *matx, int M, int N) {
+ // Assumes allocation for U is MxN
+ double **nrU, **nrV;
+ int problem, i;
+
+ nrU = (double **)malloc((M) * sizeof(*nrU));
+ nrV = (double **)malloc((N) * sizeof(*nrV));
+ problem = !(nrU && nrV);
+ if (!problem) {
+ problem = 0;
+ for (i = 0; i < M; i++) {
+ nrU[i] = &U[i * N];
+ }
+ for (i = 0; i < N; i++) {
+ nrV[i] = &V[i * N];
+ }
+ }
+ if (problem) {
+ return 1;
+ }
+
+ /* copy from given matx into nrU */
+ for (i = 0; i < M; i++) {
+ memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
+ }
+
+ /* HERE IT IS: do SVD */
+ if (svdcmp_(nrU, M, N, W, nrV)) {
+ return 1;
+ }
+
+ /* free Numerical Recipes arrays */
+ free(nrU);
+ free(nrV);
+
+ return 0;
+}
+
+int PseudoInverse(double *inv, double *matx, const int M, const int N) {
+ double *U, *W, *V, ans;
+ int i, j, k;
+ U = (double *)malloc(M * N * sizeof(*matx));
+ W = (double *)malloc(N * sizeof(*matx));
+ V = (double *)malloc(N * N * sizeof(*matx));
+
+ if (!(U && W && V)) {
+ return 1;
+ }
+ if (SVD(U, W, V, matx, M, N)) {
+ return 1;
+ }
+ for (i = 0; i < N; i++) {
+ if (fabs(W[i]) < TINY_NEAR_ZERO) {
+ return 1;
+ }
+ }
+
+ for (i = 0; i < N; i++) {
+ for (j = 0; j < M; j++) {
+ ans = 0;
+ for (k = 0; k < N; k++) {
+ ans += V[k + N * i] * U[k + N * j] / W[k];
+ }
+ inv[j + M * i] = ans;
+ }
+ }
+ free(U);
+ free(W);
+ free(V);
+ return 0;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// ransac
+typedef int (*isDegenerateType)(double *p);
+typedef void (*normalizeType)(double *p, int np, double *T);
+typedef void (*denormalizeType)(double *H, double *T1, double *T2);
+typedef int (*findTransformationType)(int points, double *points1,
+ double *points2, double *H);
+
+static int get_rand_indices(int npoints, int minpts, int *indices) {
+ int i, j;
+ unsigned int seed = (unsigned int)npoints;
+ int ptr = rand_r(&seed) % npoints;
+ if (minpts > npoints) return 0;
+ indices[0] = ptr;
+ ptr = (ptr == npoints - 1 ? 0 : ptr + 1);
+ i = 1;
+ while (i < minpts) {
+ int index = rand_r(&seed) % npoints;
+ while (index) {
+ ptr = (ptr == npoints - 1 ? 0 : ptr + 1);
+ for (j = 0; j < i; ++j) {
+ if (indices[j] == ptr) break;
+ }
+ if (j == i) index--;
+ }
+ indices[i++] = ptr;
+ }
+ return 1;
+}
+
+int ransac_(double *matched_points, int npoints, int *number_of_inliers,
+ int *best_inlier_mask, double *bestH, const int minpts,
+ const int paramdim, isDegenerateType isDegenerate,
+ normalizeType normalize, denormalizeType denormalize,
+ findTransformationType findTransformation,
+ ProjectPointsType projectpoints, TransformationType type) {
+ static const double INLIER_THRESHOLD_NORMALIZED = 0.1;
+ static const double INLIER_THRESHOLD_UNNORMALIZED = 1.0;
+ static const double PROBABILITY_REQUIRED = 0.9;
+ static const double EPS = 1e-12;
+ static const int MIN_TRIALS = 20;
+
+ const double inlier_threshold =
+ (normalize && denormalize ? INLIER_THRESHOLD_NORMALIZED
+ : INLIER_THRESHOLD_UNNORMALIZED);
+ int N = 10000, trial_count = 0;
+ int i;
+ int ret_val = 0;
+
+ int max_inliers = 0;
+ double best_variance = 0.0;
+ double H[MAX_PARAMDIM];
+ WarpedMotionParams wm;
+ double points1[2 * MAX_MINPTS];
+ double points2[2 * MAX_MINPTS];
+ int indices[MAX_MINPTS];
+
+ double *best_inlier_set1;
+ double *best_inlier_set2;
+ double *inlier_set1;
+ double *inlier_set2;
+ double *corners1;
+ int *corners1_int;
+ double *corners2;
+ int *image1_coord;
+ int *inlier_mask;
+
+ double *cnp1, *cnp2;
+ double T1[9], T2[9];
+
+ // srand((unsigned)time(NULL)) ;
+ // better to make this deterministic for a given sequence for ease of testing
+ srand(npoints);
+
+ *number_of_inliers = 0;
+ if (npoints < minpts * MINPTS_MULTIPLIER) {
+ printf("Cannot find motion with %d matches\n", npoints);
+ return 1;
+ }
+
+ memset(&wm, 0, sizeof(wm));
+ best_inlier_set1 = (double *)malloc(sizeof(*best_inlier_set1) * npoints * 2);
+ best_inlier_set2 = (double *)malloc(sizeof(*best_inlier_set2) * npoints * 2);
+ inlier_set1 = (double *)malloc(sizeof(*inlier_set1) * npoints * 2);
+ inlier_set2 = (double *)malloc(sizeof(*inlier_set2) * npoints * 2);
+ corners1 = (double *)malloc(sizeof(*corners1) * npoints * 2);
+ corners1_int = (int *)malloc(sizeof(*corners1_int) * npoints * 2);
+ corners2 = (double *)malloc(sizeof(*corners2) * npoints * 2);
+ image1_coord = (int *)malloc(sizeof(*image1_coord) * npoints * 2);
+ inlier_mask = (int *)malloc(sizeof(*inlier_mask) * npoints);
+
+ for (cnp1 = corners1, cnp2 = corners2, i = 0; i < npoints; ++i) {
+ *(cnp1++) = *(matched_points++);
+ *(cnp1++) = *(matched_points++);
+ *(cnp2++) = *(matched_points++);
+ *(cnp2++) = *(matched_points++);
+ }
+ matched_points -= 4 * npoints;
+
+ if (normalize && denormalize) {
+ normalize(corners1, npoints, T1);
+ normalize(corners2, npoints, T2);
+ }
+
+ while (N > trial_count) {
+ int num_inliers = 0;
+ double sum_distance = 0.0;
+ double sum_distance_squared = 0.0;
+
+ int degenerate = 1;
+ int num_degenerate_iter = 0;
+ while (degenerate) {
+ num_degenerate_iter++;
+ if (!get_rand_indices(npoints, minpts, indices)) {
+ ret_val = 1;
+ goto finish_ransac;
+ }
+ i = 0;
+ while (i < minpts) {
+ int index = indices[i];
+ // add to list
+ points1[i * 2] = corners1[index * 2];
+ points1[i * 2 + 1] = corners1[index * 2 + 1];
+ points2[i * 2] = corners2[index * 2];
+ points2[i * 2 + 1] = corners2[index * 2 + 1];
+ i++;
+ }
+ degenerate = isDegenerate(points1);
+ if (num_degenerate_iter > MAX_DEGENERATE_ITER) {
+ ret_val = 1;
+ goto finish_ransac;
+ }
+ }
+
+ if (findTransformation(minpts, points1, points2, H)) {
+ trial_count++;
+ continue;
+ }
+
+ for (i = 0; i < npoints; ++i) {
+ corners1_int[2 * i] = (int)corners1[i * 2];
+ corners1_int[2 * i + 1] = (int)corners1[i * 2 + 1];
+ }
+
+ vp10_integerize_model(H, type, &wm);
+ projectpoints(wm.wmmat, corners1_int, image1_coord, npoints, 2, 2, 0, 0);
+
+ for (i = 0; i < npoints; ++i) {
+ double dx =
+ (image1_coord[i * 2] >> WARPEDPIXEL_PREC_BITS) - corners2[i * 2];
+ double dy = (image1_coord[i * 2 + 1] >> WARPEDPIXEL_PREC_BITS) -
+ corners2[i * 2 + 1];
+ double distance = sqrt(dx * dx + dy * dy);
+
+ inlier_mask[i] = distance < inlier_threshold;
+ if (inlier_mask[i]) {
+ inlier_set1[num_inliers * 2] = corners1[i * 2];
+ inlier_set1[num_inliers * 2 + 1] = corners1[i * 2 + 1];
+ inlier_set2[num_inliers * 2] = corners2[i * 2];
+ inlier_set2[num_inliers * 2 + 1] = corners2[i * 2 + 1];
+ num_inliers++;
+ sum_distance += distance;
+ sum_distance_squared += distance * distance;
+ }
+ }
+
+ if (num_inliers >= max_inliers) {
+ double mean_distance = sum_distance / ((double)num_inliers);
+ double variance = sum_distance_squared / ((double)num_inliers - 1.0) -
+ mean_distance * mean_distance * ((double)num_inliers) /
+ ((double)num_inliers - 1.0);
+ if ((num_inliers > max_inliers) ||
+ (num_inliers == max_inliers && variance < best_variance)) {
+ best_variance = variance;
+ max_inliers = num_inliers;
+ memcpy(bestH, H, paramdim * sizeof(*bestH));
+ memcpy(best_inlier_set1, inlier_set1,
+ num_inliers * 2 * sizeof(*best_inlier_set1));
+ memcpy(best_inlier_set2, inlier_set2,
+ num_inliers * 2 * sizeof(*best_inlier_set2));
+ memcpy(best_inlier_mask, inlier_mask,
+ npoints * sizeof(*best_inlier_mask));
+
+ if (num_inliers > 0) {
+ double fracinliers = (double)num_inliers / (double)npoints;
+ double pNoOutliers = 1 - pow(fracinliers, minpts);
+ int temp;
+ pNoOutliers = fmax(EPS, pNoOutliers);
+ pNoOutliers = fmin(1 - EPS, pNoOutliers);
+ temp = (int)(log(1.0 - PROBABILITY_REQUIRED) / log(pNoOutliers));
+ if (temp > 0 && temp < N) {
+ N = IMAX(temp, MIN_TRIALS);
+ }
+ }
+ }
+ }
+ trial_count++;
+ }
+ findTransformation(max_inliers, best_inlier_set1, best_inlier_set2, bestH);
+ if (normalize && denormalize) {
+ denormalize(bestH, T1, T2);
+ }
+ *number_of_inliers = max_inliers;
+finish_ransac:
+ free(best_inlier_set1);
+ free(best_inlier_set2);
+ free(inlier_set1);
+ free(inlier_set2);
+ free(corners1);
+ free(corners2);
+ free(image1_coord);
+ free(inlier_mask);
+ return ret_val;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+static void normalizeHomography(double *pts, int n, double *T) {
+ // Assume the points are 2d coordinates with scale = 1
+ double *p = pts;
+ double mean[2] = { 0, 0 };
+ double msqe = 0;
+ double scale;
+ int i;
+ for (i = 0; i < n; ++i, p += 2) {
+ mean[0] += p[0];
+ mean[1] += p[1];
+ }
+ mean[0] /= n;
+ mean[1] /= n;
+ for (p = pts, i = 0; i < n; ++i, p += 2) {
+ p[0] -= mean[0];
+ p[1] -= mean[1];
+ msqe += sqrt(p[0] * p[0] + p[1] * p[1]);
+ }
+ msqe /= n;
+ scale = sqrt(2) / msqe;
+ T[0] = scale;
+ T[1] = 0;
+ T[2] = -scale * mean[0];
+ T[3] = 0;
+ T[4] = scale;
+ T[5] = -scale * mean[1];
+ T[6] = 0;
+ T[7] = 0;
+ T[8] = 1;
+ for (p = pts, i = 0; i < n; ++i, p += 2) {
+ p[0] *= scale;
+ p[1] *= scale;
+ }
+}
+
+static void invnormalize_mat(double *T, double *iT) {
+ double is = 1.0 / T[0];
+ double m0 = -T[2] * is;
+ double m1 = -T[5] * is;
+ iT[0] = is;
+ iT[1] = 0;
+ iT[2] = m0;
+ iT[3] = 0;
+ iT[4] = is;
+ iT[5] = m1;
+ iT[6] = 0;
+ iT[7] = 0;
+ iT[8] = 1;
+}
+
+static void denormalizeHomography(double *H, double *T1, double *T2) {
+ double iT2[9];
+ double H2[9];
+ invnormalize_mat(T2, iT2);
+ MultiplyMat(H, T1, H2, 3, 3, 3);
+ MultiplyMat(iT2, H2, H, 3, 3, 3);
+}
+
+static void denormalizeAffine(double *H, double *T1, double *T2) {
+ double Ha[MAX_PARAMDIM];
+ Ha[0] = H[0];
+ Ha[1] = H[1];
+ Ha[2] = H[4];
+ Ha[3] = H[2];
+ Ha[4] = H[3];
+ Ha[5] = H[5];
+ Ha[6] = Ha[7] = 0;
+ Ha[8] = 1;
+ denormalizeHomography(Ha, T1, T2);
+ H[0] = Ha[2];
+ H[1] = Ha[5];
+ H[2] = Ha[0];
+ H[3] = Ha[1];
+ H[4] = Ha[3];
+ H[5] = Ha[4];
+}
+
+static void denormalizeRotZoom(double *H, double *T1, double *T2) {
+ double Ha[MAX_PARAMDIM];
+ Ha[0] = H[0];
+ Ha[1] = H[1];
+ Ha[2] = H[2];
+ Ha[3] = -H[1];
+ Ha[4] = H[0];
+ Ha[5] = H[3];
+ Ha[6] = Ha[7] = 0;
+ Ha[8] = 1;
+ denormalizeHomography(Ha, T1, T2);
+ H[0] = Ha[2];
+ H[1] = Ha[5];
+ H[2] = Ha[0];
+ H[3] = Ha[1];
+}
+
+static void denormalizeTranslation(double *H, double *T1, double *T2) {
+ double Ha[MAX_PARAMDIM];
+ Ha[0] = 1;
+ Ha[1] = 0;
+ Ha[2] = H[0];
+ Ha[3] = 0;
+ Ha[4] = 1;
+ Ha[5] = H[1];
+ Ha[6] = Ha[7] = 0;
+ Ha[8] = 1;
+ denormalizeHomography(Ha, T1, T2);
+ H[0] = Ha[2];
+ H[1] = Ha[5];
+}
+
+static int is_collinear3(double *p1, double *p2, double *p3) {
+ static const double collinear_eps = 1e-3;
+ const double v =
+ (p2[0] - p1[0]) * (p3[1] - p1[1]) - (p2[1] - p1[1]) * (p3[0] - p1[0]);
+ return fabs(v) < collinear_eps;
+}
+
+static int isDegenerateTranslation(double *p) {
+ return (p[0] - p[2]) * (p[0] - p[2]) + (p[1] - p[3]) * (p[1] - p[3]) <= 2;
+}
+
+static int isDegenerateAffine(double *p) {
+ return is_collinear3(p, p + 2, p + 4);
+}
+
+static int isDegenerateHomography(double *p) {
+ return is_collinear3(p, p + 2, p + 4) || is_collinear3(p, p + 2, p + 6) ||
+ is_collinear3(p, p + 4, p + 6) || is_collinear3(p + 2, p + 4, p + 6);
+}
+
+int findTranslation(const int np, double *pts1, double *pts2, double *mat) {
+ int i;
+ double sx, sy, dx, dy;
+ double sumx, sumy;
+
+ double T1[9], T2[9];
+ normalizeHomography(pts1, np, T1);
+ normalizeHomography(pts2, np, T2);
+
+ sumx = 0;
+ sumy = 0;
+ for (i = 0; i < np; ++i) {
+ dx = *(pts2++);
+ dy = *(pts2++);
+ sx = *(pts1++);
+ sy = *(pts1++);
+
+ sumx += dx - sx;
+ sumy += dy - sy;
+ }
+ mat[0] = sumx / np;
+ mat[1] = sumy / np;
+ denormalizeTranslation(mat, T1, T2);
+ return 0;
+}
+
+int findRotZoom(const int np, double *pts1, double *pts2, double *mat) {
+ const int np2 = np * 2;
+ double *a = (double *)malloc(sizeof(*a) * np2 * 9);
+ double *b = a + np2 * 4;
+ double *temp = b + np2;
+ int i;
+ double sx, sy, dx, dy;
+
+ double T1[9], T2[9];
+ normalizeHomography(pts1, np, T1);
+ normalizeHomography(pts2, np, T2);
+
+ for (i = 0; i < np; ++i) {
+ dx = *(pts2++);
+ dy = *(pts2++);
+ sx = *(pts1++);
+ sy = *(pts1++);
+
+ a[i * 2 * 4 + 0] = sx;
+ a[i * 2 * 4 + 1] = sy;
+ a[i * 2 * 4 + 2] = 1;
+ a[i * 2 * 4 + 3] = 0;
+ a[(i * 2 + 1) * 4 + 0] = sy;
+ a[(i * 2 + 1) * 4 + 1] = -sx;
+ a[(i * 2 + 1) * 4 + 2] = 0;
+ a[(i * 2 + 1) * 4 + 3] = 1;
+
+ b[2 * i] = dx;
+ b[2 * i + 1] = dy;
+ }
+ if (PseudoInverse(temp, a, np2, 4)) {
+ free(a);
+ return 1;
+ }
+ MultiplyMat(temp, b, mat, 4, np2, 1);
+ denormalizeRotZoom(mat, T1, T2);
+ free(a);
+ return 0;
+}
+
+int findAffine(const int np, double *pts1, double *pts2, double *mat) {
+ const int np2 = np * 2;
+ double *a = (double *)malloc(sizeof(*a) * np2 * 13);
+ double *b = a + np2 * 6;
+ double *temp = b + np2;
+ int i;
+ double sx, sy, dx, dy;
+
+ double T1[9], T2[9];
+ normalizeHomography(pts1, np, T1);
+ normalizeHomography(pts2, np, T2);
+
+ for (i = 0; i < np; ++i) {
+ dx = *(pts2++);
+ dy = *(pts2++);
+ sx = *(pts1++);
+ sy = *(pts1++);
+
+ a[i * 2 * 6 + 0] = sx;
+ a[i * 2 * 6 + 1] = sy;
+ a[i * 2 * 6 + 2] = 0;
+ a[i * 2 * 6 + 3] = 0;
+ a[i * 2 * 6 + 4] = 1;
+ a[i * 2 * 6 + 5] = 0;
+ a[(i * 2 + 1) * 6 + 0] = 0;
+ a[(i * 2 + 1) * 6 + 1] = 0;
+ a[(i * 2 + 1) * 6 + 2] = sx;
+ a[(i * 2 + 1) * 6 + 3] = sy;
+ a[(i * 2 + 1) * 6 + 4] = 0;
+ a[(i * 2 + 1) * 6 + 5] = 1;
+
+ b[2 * i] = dx;
+ b[2 * i + 1] = dy;
+ }
+ if (PseudoInverse(temp, a, np2, 6)) {
+ free(a);
+ return 1;
+ }
+ MultiplyMat(temp, b, mat, 6, np2, 1);
+ denormalizeAffine(mat, T1, T2);
+ free(a);
+ return 0;
+}
+
+int findHomography(const int np, double *pts1, double *pts2, double *mat) {
+ // Implemented from Peter Kovesi's normalized implementation
+ const int np3 = np * 3;
+ double *a = (double *)malloc(sizeof(*a) * np3 * 18);
+ double *U = a + np3 * 9;
+ double S[9], V[9 * 9];
+ int i, mini;
+ double sx, sy, dx, dy;
+ double T1[9], T2[9];
+
+ normalizeHomography(pts1, np, T1);
+ normalizeHomography(pts2, np, T2);
+
+ for (i = 0; i < np; ++i) {
+ dx = *(pts2++);
+ dy = *(pts2++);
+ sx = *(pts1++);
+ sy = *(pts1++);
+
+ a[i * 3 * 9 + 0] = a[i * 3 * 9 + 1] = a[i * 3 * 9 + 2] = 0;
+ a[i * 3 * 9 + 3] = -sx;
+ a[i * 3 * 9 + 4] = -sy;
+ a[i * 3 * 9 + 5] = -1;
+ a[i * 3 * 9 + 6] = dy * sx;
+ a[i * 3 * 9 + 7] = dy * sy;
+ a[i * 3 * 9 + 8] = dy;
+
+ a[(i * 3 + 1) * 9 + 0] = sx;
+ a[(i * 3 + 1) * 9 + 1] = sy;
+ a[(i * 3 + 1) * 9 + 2] = 1;
+ a[(i * 3 + 1) * 9 + 3] = a[(i * 3 + 1) * 9 + 4] = a[(i * 3 + 1) * 9 + 5] =
+ 0;
+ a[(i * 3 + 1) * 9 + 6] = -dx * sx;
+ a[(i * 3 + 1) * 9 + 7] = -dx * sy;
+ a[(i * 3 + 1) * 9 + 8] = -dx;
+
+ a[(i * 3 + 2) * 9 + 0] = -dy * sx;
+ a[(i * 3 + 2) * 9 + 1] = -dy * sy;
+ a[(i * 3 + 2) * 9 + 2] = -dy;
+ a[(i * 3 + 2) * 9 + 3] = dx * sx;
+ a[(i * 3 + 2) * 9 + 4] = dx * sy;
+ a[(i * 3 + 2) * 9 + 5] = dx;
+ a[(i * 3 + 2) * 9 + 6] = a[(i * 3 + 2) * 9 + 7] = a[(i * 3 + 2) * 9 + 8] =
+ 0;
+ }
+
+ if (SVD(U, S, V, a, np3, 9)) {
+ free(a);
+ return 1;
+ } else {
+ double minS = 1e12;
+ mini = -1;
+ for (i = 0; i < 9; ++i) {
+ if (S[i] < minS) {
+ minS = S[i];
+ mini = i;
+ }
+ }
+ }
+
+ for (i = 0; i < 9; i++) mat[i] = V[i * 9 + mini];
+ denormalizeHomography(mat, T1, T2);
+ free(a);
+ if (mat[8] == 0.0) {
+ return 1;
+ }
+ return 0;
+}
+
+int findHomographyScale1(const int np, double *pts1, double *pts2,
+ double *mat) {
+ // This implementation assumes h33 = 1, but does not seem to give good results
+ const int np2 = np * 2;
+ double *a = (double *)malloc(sizeof(*a) * np2 * 17);
+ double *b = a + np2 * 8;
+ double *temp = b + np2;
+ int i, j;
+ double sx, sy, dx, dy;
+ double T1[9], T2[9];
+
+ normalizeHomography(pts1, np, T1);
+ normalizeHomography(pts2, np, T2);
+
+ for (i = 0, j = np; i < np; ++i, ++j) {
+ dx = *(pts2++);
+ dy = *(pts2++);
+ sx = *(pts1++);
+ sy = *(pts1++);
+ a[i * 8 + 0] = a[j * 8 + 3] = sx;
+ a[i * 8 + 1] = a[j * 8 + 4] = sy;
+ a[i * 8 + 2] = a[j * 8 + 5] = 1;
+ a[i * 8 + 3] = a[i * 8 + 4] = a[i * 8 + 5] = a[j * 8 + 0] = a[j * 8 + 1] =
+ a[j * 8 + 2] = 0;
+ a[i * 8 + 6] = -dx * sx;
+ a[i * 8 + 7] = -dx * sy;
+ a[j * 8 + 6] = -dy * sx;
+ a[j * 8 + 7] = -dy * sy;
+ b[i] = dx;
+ b[j] = dy;
+ }
+
+ if (PseudoInverse(temp, a, np2, 8)) {
+ free(a);
+ return 1;
+ }
+ MultiplyMat(temp, b, &*mat, 8, np2, 1);
+ mat[8] = 1;
+
+ denormalizeHomography(mat, T1, T2);
+ free(a);
+ return 0;
+}
+
+int ransacTranslation(double *matched_points, int npoints,
+ int *number_of_inliers, int *best_inlier_mask,
+ double *bestH) {
+ return ransac_(matched_points, npoints, number_of_inliers, best_inlier_mask,
+ bestH, 3, 2, isDegenerateTranslation,
+ NULL, // normalizeHomography,
+ NULL, // denormalizeRotZoom,
+ findTranslation, projectPointsTranslation, TRANSLATION);
+}
+
+int ransacRotZoom(double *matched_points, int npoints, int *number_of_inliers,
+ int *best_inlier_mask, double *bestH) {
+ return ransac_(matched_points, npoints, number_of_inliers, best_inlier_mask,
+ bestH, 3, 4, isDegenerateAffine,
+ NULL, // normalizeHomography,
+ NULL, // denormalizeRotZoom,
+ findRotZoom, projectPointsRotZoom, ROTZOOM);
+}
+
+int ransacAffine(double *matched_points, int npoints, int *number_of_inliers,
+ int *best_inlier_mask, double *bestH) {
+ return ransac_(matched_points, npoints, number_of_inliers, best_inlier_mask,
+ bestH, 3, 6, isDegenerateAffine,
+ NULL, // normalizeHomography,
+ NULL, // denormalizeAffine,
+ findAffine, projectPointsAffine, AFFINE);
+}
+
+int ransacHomography(double *matched_points, int npoints,
+ int *number_of_inliers, int *best_inlier_mask,
+ double *bestH) {
+ int result = ransac_(matched_points, npoints, number_of_inliers,
+ best_inlier_mask, bestH, 4, 8, isDegenerateHomography,
+ NULL, // normalizeHomography,
+ NULL, // denormalizeHomography,
+ findHomography, projectPointsHomography, HOMOGRAPHY);
+ if (!result) {
+ // normalize so that H33 = 1
+ int i;
+ double m = 1.0 / bestH[8];
+ for (i = 0; i < 8; ++i) bestH[i] *= m;
+ bestH[8] = 1.0;
+ }
+ return result;
+}
diff --git a/av1/encoder/ransac.h b/av1/encoder/ransac.h
new file mode 100644
index 0000000..0b14ecf
--- /dev/null
+++ b/av1/encoder/ransac.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_RANSAC_H_
+#define VP10_ENCODER_RANSAC_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <memory.h>
+
+#include "vp10/common/warped_motion.h"
+
+typedef int (*RansacType)(double *matched_points, int npoints,
+ int *number_of_inliers, int *best_inlier_mask,
+ double *bestH);
+
+/* Each of these functions fits a motion model from a set of
+corresponding points in 2 frames using RANSAC.*/
+int ransacHomography(double *matched_points, int npoints,
+ int *number_of_inliers, int *best_inlier_indices,
+ double *bestH);
+int ransacAffine(double *matched_points, int npoints, int *number_of_inliers,
+ int *best_inlier_indices, double *bestH);
+int ransacRotZoom(double *matched_points, int npoints, int *number_of_inliers,
+ int *best_inlier_indices, double *bestH);
+int ransacTranslation(double *matched_points, int npoints,
+ int *number_of_inliers, int *best_inlier_indices,
+ double *bestH);
+#endif // VP10_ENCODER_RANSAC_H
diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
new file mode 100644
index 0000000..ddd5762
--- /dev/null
+++ b/av1/encoder/ratectrl.c
@@ -0,0 +1,1757 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/system_state.h"
+
+#include "av1/common/alloccommon.h"
+#include "av1/encoder/aq_cyclicrefresh.h"
+#include "av1/common/common.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/seg_common.h"
+
+#include "av1/encoder/encodemv.h"
+#include "av1/encoder/ratectrl.h"
+
+// Max rate target for 1080P and below encodes under normal circumstances
+// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
+#define MAX_MB_RATE 250
+#define MAXRATE_1080P 2025000
+
+#define DEFAULT_KF_BOOST 2000
+#define DEFAULT_GF_BOOST 2000
+
+#define MIN_BPB_FACTOR 0.005
+#define MAX_BPB_FACTOR 50
+
+#define FRAME_OVERHEAD_BITS 200
+#if CONFIG_VP9_HIGHBITDEPTH
+#define ASSIGN_MINQ_TABLE(bit_depth, name) \
+ do { \
+ switch (bit_depth) { \
+ case VPX_BITS_8: name = name##_8; break; \
+ case VPX_BITS_10: name = name##_10; break; \
+ case VPX_BITS_12: name = name##_12; break; \
+ default: \
+ assert(0 && \
+ "bit_depth should be VPX_BITS_8, VPX_BITS_10" \
+ " or VPX_BITS_12"); \
+ name = NULL; \
+ } \
+ } while (0)
+#else
+#define ASSIGN_MINQ_TABLE(bit_depth, name) \
+ do { \
+ (void) bit_depth; \
+ name = name##_8; \
+ } while (0)
+#endif
+
+// Tables relating active max Q to active min Q
+static int kf_low_motion_minq_8[QINDEX_RANGE];
+static int kf_high_motion_minq_8[QINDEX_RANGE];
+static int arfgf_low_motion_minq_8[QINDEX_RANGE];
+static int arfgf_high_motion_minq_8[QINDEX_RANGE];
+static int inter_minq_8[QINDEX_RANGE];
+static int rtc_minq_8[QINDEX_RANGE];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static int kf_low_motion_minq_10[QINDEX_RANGE];
+static int kf_high_motion_minq_10[QINDEX_RANGE];
+static int arfgf_low_motion_minq_10[QINDEX_RANGE];
+static int arfgf_high_motion_minq_10[QINDEX_RANGE];
+static int inter_minq_10[QINDEX_RANGE];
+static int rtc_minq_10[QINDEX_RANGE];
+static int kf_low_motion_minq_12[QINDEX_RANGE];
+static int kf_high_motion_minq_12[QINDEX_RANGE];
+static int arfgf_low_motion_minq_12[QINDEX_RANGE];
+static int arfgf_high_motion_minq_12[QINDEX_RANGE];
+static int inter_minq_12[QINDEX_RANGE];
+static int rtc_minq_12[QINDEX_RANGE];
+#endif
+
+static int gf_high = 2000;
+static int gf_low = 400;
+static int kf_high = 5000;
+static int kf_low = 400;
+
+// Functions to compute the active minq lookup table entries based on a
+// formulaic approach to facilitate easier adjustment of the Q tables.
+// The formulae were derived from computing a 3rd order polynomial best
+// fit to the original data (after plotting real maxq vs minq (not q index))
+static int get_minq_index(double maxq, double x3, double x2, double x1,
+ vpx_bit_depth_t bit_depth) {
+ int i;
+ const double minqtarget = VPXMIN(((x3 * maxq + x2) * maxq + x1) * maxq, maxq);
+
+ // Special case handling to deal with the step from q2.0
+ // down to lossless mode represented by q 1.0.
+ if (minqtarget <= 2.0) return 0;
+
+ for (i = 0; i < QINDEX_RANGE; i++) {
+ if (minqtarget <= vp10_convert_qindex_to_q(i, bit_depth)) return i;
+ }
+
+ return QINDEX_RANGE - 1;
+}
+
+static void init_minq_luts(int *kf_low_m, int *kf_high_m, int *arfgf_low,
+ int *arfgf_high, int *inter, int *rtc,
+ vpx_bit_depth_t bit_depth) {
+ int i;
+ for (i = 0; i < QINDEX_RANGE; i++) {
+ const double maxq = vp10_convert_qindex_to_q(i, bit_depth);
+ kf_low_m[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.150, bit_depth);
+ kf_high_m[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55, bit_depth);
+ arfgf_low[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.30, bit_depth);
+ arfgf_high[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55, bit_depth);
+ inter[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.90, bit_depth);
+ rtc[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70, bit_depth);
+ }
+}
+
+void vp10_rc_init_minq_luts(void) {
+ init_minq_luts(kf_low_motion_minq_8, kf_high_motion_minq_8,
+ arfgf_low_motion_minq_8, arfgf_high_motion_minq_8,
+ inter_minq_8, rtc_minq_8, VPX_BITS_8);
+#if CONFIG_VP9_HIGHBITDEPTH
+ init_minq_luts(kf_low_motion_minq_10, kf_high_motion_minq_10,
+ arfgf_low_motion_minq_10, arfgf_high_motion_minq_10,
+ inter_minq_10, rtc_minq_10, VPX_BITS_10);
+ init_minq_luts(kf_low_motion_minq_12, kf_high_motion_minq_12,
+ arfgf_low_motion_minq_12, arfgf_high_motion_minq_12,
+ inter_minq_12, rtc_minq_12, VPX_BITS_12);
+#endif
+}
+
+// These functions use formulaic calculations to make playing with the
+// quantizer tables easier. If necessary they can be replaced by lookup
+// tables if and when things settle down in the experimental bitstream
+double vp10_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth) {
+// Convert the index to a real Q value (scaled down to match old Q values)
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (bit_depth) {
+ case VPX_BITS_8: return vp10_ac_quant(qindex, 0, bit_depth) / 4.0;
+ case VPX_BITS_10: return vp10_ac_quant(qindex, 0, bit_depth) / 16.0;
+ case VPX_BITS_12: return vp10_ac_quant(qindex, 0, bit_depth) / 64.0;
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1.0;
+ }
+#else
+ return vp10_ac_quant(qindex, 0, bit_depth) / 4.0;
+#endif
+}
+
+int vp10_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
+ double correction_factor, vpx_bit_depth_t bit_depth) {
+ const double q = vp10_convert_qindex_to_q(qindex, bit_depth);
+ int enumerator = frame_type == KEY_FRAME ? 2700000 : 1800000;
+
+ assert(correction_factor <= MAX_BPB_FACTOR &&
+ correction_factor >= MIN_BPB_FACTOR);
+
+ // q based adjustment to baseline enumerator
+ enumerator += (int)(enumerator * q) >> 12;
+ return (int)(enumerator * correction_factor / q);
+}
+
+int vp10_estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs,
+ double correction_factor,
+ vpx_bit_depth_t bit_depth) {
+ const int bpm =
+ (int)(vp10_rc_bits_per_mb(frame_type, q, correction_factor, bit_depth));
+ return VPXMAX(FRAME_OVERHEAD_BITS,
+ (int)((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS);
+}
+
+int vp10_rc_clamp_pframe_target_size(const VP10_COMP *const cpi, int target) {
+ const RATE_CONTROL *rc = &cpi->rc;
+ const VP10EncoderConfig *oxcf = &cpi->oxcf;
+ const int min_frame_target =
+ VPXMAX(rc->min_frame_bandwidth, rc->avg_frame_bandwidth >> 5);
+// Clip the frame target to the minimum setup value.
+#if CONFIG_EXT_REFS
+ if (cpi->rc.is_src_frame_alt_ref) {
+#else
+ if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) {
+#endif
+ // If there is an active ARF at this location use the minimum
+ // bits on this frame even if it is a constructed arf.
+ // The active maximum quantizer insures that an appropriate
+ // number of bits will be spent if needed for constructed ARFs.
+ target = min_frame_target;
+ } else if (target < min_frame_target) {
+ target = min_frame_target;
+ }
+
+ // Clip the frame target to the maximum allowed value.
+ if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth;
+ if (oxcf->rc_max_inter_bitrate_pct) {
+ const int max_rate =
+ rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100;
+ target = VPXMIN(target, max_rate);
+ }
+
+ return target;
+}
+
+int vp10_rc_clamp_iframe_target_size(const VP10_COMP *const cpi, int target) {
+ const RATE_CONTROL *rc = &cpi->rc;
+ const VP10EncoderConfig *oxcf = &cpi->oxcf;
+ if (oxcf->rc_max_intra_bitrate_pct) {
+ const int max_rate =
+ rc->avg_frame_bandwidth * oxcf->rc_max_intra_bitrate_pct / 100;
+ target = VPXMIN(target, max_rate);
+ }
+ if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth;
+ return target;
+}
+
+// Update the buffer level: leaky bucket model.
+static void update_buffer_level(VP10_COMP *cpi, int encoded_frame_size) {
+ const VP10_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+
+// Non-viewable frames are a special case and are treated as pure overhead.
+#if CONFIG_EXT_REFS
+ // TODO(zoeliu): To further explore whether we should treat BWDREF_FRAME
+ // differently, since it is a no-show frame.
+ if (!cm->show_frame && !rc->is_bwd_ref_frame)
+#else
+ if (!cm->show_frame)
+#endif // CONFIG_EXT_REFS
+ rc->bits_off_target -= encoded_frame_size;
+ else
+ rc->bits_off_target += rc->avg_frame_bandwidth - encoded_frame_size;
+
+ // Clip the buffer level to the maximum specified buffer size.
+ rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
+ rc->buffer_level = rc->bits_off_target;
+}
+
+int vp10_rc_get_default_min_gf_interval(int width, int height,
+ double framerate) {
+ // Assume we do not need any constraint lower than 4K 20 fps
+ static const double factor_safe = 3840 * 2160 * 20.0;
+ const double factor = width * height * framerate;
+ const int default_interval =
+ clamp((int)(framerate * 0.125), MIN_GF_INTERVAL, MAX_GF_INTERVAL);
+
+ if (factor <= factor_safe)
+ return default_interval;
+ else
+ return VPXMAX(default_interval,
+ (int)(MIN_GF_INTERVAL * factor / factor_safe + 0.5));
+ // Note this logic makes:
+ // 4K24: 5
+ // 4K30: 6
+ // 4K60: 12
+}
+
+int vp10_rc_get_default_max_gf_interval(double framerate, int min_gf_interval) {
+ int interval = VPXMIN(MAX_GF_INTERVAL, (int)(framerate * 0.75));
+ interval += (interval & 0x01); // Round to even value
+ return VPXMAX(interval, min_gf_interval);
+}
+
+void vp10_rc_init(const VP10EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
+ int i;
+
+ if (pass == 0 && oxcf->rc_mode == VPX_CBR) {
+ rc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
+ rc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
+ } else {
+ rc->avg_frame_qindex[KEY_FRAME] =
+ (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2;
+ rc->avg_frame_qindex[INTER_FRAME] =
+ (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2;
+ }
+
+ rc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
+ rc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
+
+ rc->buffer_level = rc->starting_buffer_level;
+ rc->bits_off_target = rc->starting_buffer_level;
+
+ rc->rolling_target_bits = rc->avg_frame_bandwidth;
+ rc->rolling_actual_bits = rc->avg_frame_bandwidth;
+ rc->long_rolling_target_bits = rc->avg_frame_bandwidth;
+ rc->long_rolling_actual_bits = rc->avg_frame_bandwidth;
+
+ rc->total_actual_bits = 0;
+ rc->total_target_bits = 0;
+ rc->total_target_vs_actual = 0;
+
+ rc->frames_since_key = 8; // Sensible default for first frame.
+ rc->this_key_frame_forced = 0;
+ rc->next_key_frame_forced = 0;
+ rc->source_alt_ref_pending = 0;
+ rc->source_alt_ref_active = 0;
+
+ rc->frames_till_gf_update_due = 0;
+ rc->ni_av_qi = oxcf->worst_allowed_q;
+ rc->ni_tot_qi = 0;
+ rc->ni_frames = 0;
+
+ rc->tot_q = 0.0;
+ rc->avg_q = vp10_convert_qindex_to_q(oxcf->worst_allowed_q, oxcf->bit_depth);
+
+ for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
+ rc->rate_correction_factors[i] = 1.0;
+ }
+
+ rc->min_gf_interval = oxcf->min_gf_interval;
+ rc->max_gf_interval = oxcf->max_gf_interval;
+ if (rc->min_gf_interval == 0)
+ rc->min_gf_interval = vp10_rc_get_default_min_gf_interval(
+ oxcf->width, oxcf->height, oxcf->init_framerate);
+ if (rc->max_gf_interval == 0)
+ rc->max_gf_interval = vp10_rc_get_default_max_gf_interval(
+ oxcf->init_framerate, rc->min_gf_interval);
+ rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
+}
+
+int vp10_rc_drop_frame(VP10_COMP *cpi) {
+ const VP10EncoderConfig *oxcf = &cpi->oxcf;
+ RATE_CONTROL *const rc = &cpi->rc;
+
+ if (!oxcf->drop_frames_water_mark) {
+ return 0;
+ } else {
+ if (rc->buffer_level < 0) {
+ // Always drop if buffer is below 0.
+ return 1;
+ } else {
+ // If buffer is below drop_mark, for now just drop every other frame
+ // (starting with the next frame) until it increases back over drop_mark.
+ int drop_mark =
+ (int)(oxcf->drop_frames_water_mark * rc->optimal_buffer_level / 100);
+ if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) {
+ --rc->decimation_factor;
+ } else if (rc->buffer_level <= drop_mark && rc->decimation_factor == 0) {
+ rc->decimation_factor = 1;
+ }
+ if (rc->decimation_factor > 0) {
+ if (rc->decimation_count > 0) {
+ --rc->decimation_count;
+ return 1;
+ } else {
+ rc->decimation_count = rc->decimation_factor;
+ return 0;
+ }
+ } else {
+ rc->decimation_count = 0;
+ return 0;
+ }
+ }
+ }
+}
+
+static double get_rate_correction_factor(const VP10_COMP *cpi) {
+ const RATE_CONTROL *const rc = &cpi->rc;
+ double rcf;
+
+ if (cpi->common.frame_type == KEY_FRAME) {
+ rcf = rc->rate_correction_factors[KF_STD];
+ } else if (cpi->oxcf.pass == 2) {
+ RATE_FACTOR_LEVEL rf_lvl =
+ cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
+ rcf = rc->rate_correction_factors[rf_lvl];
+ } else {
+ if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
+ !rc->is_src_frame_alt_ref &&
+ (cpi->oxcf.rc_mode != VPX_CBR || cpi->oxcf.gf_cbr_boost_pct > 20))
+ rcf = rc->rate_correction_factors[GF_ARF_STD];
+ else
+ rcf = rc->rate_correction_factors[INTER_NORMAL];
+ }
+ rcf *= rcf_mult[rc->frame_size_selector];
+ return fclamp(rcf, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
+}
+
+static void set_rate_correction_factor(VP10_COMP *cpi, double factor) {
+ RATE_CONTROL *const rc = &cpi->rc;
+
+ // Normalize RCF to account for the size-dependent scaling factor.
+ factor /= rcf_mult[cpi->rc.frame_size_selector];
+
+ factor = fclamp(factor, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
+
+ if (cpi->common.frame_type == KEY_FRAME) {
+ rc->rate_correction_factors[KF_STD] = factor;
+ } else if (cpi->oxcf.pass == 2) {
+ RATE_FACTOR_LEVEL rf_lvl =
+ cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
+ rc->rate_correction_factors[rf_lvl] = factor;
+ } else {
+ if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
+ !rc->is_src_frame_alt_ref &&
+ (cpi->oxcf.rc_mode != VPX_CBR || cpi->oxcf.gf_cbr_boost_pct > 20))
+ rc->rate_correction_factors[GF_ARF_STD] = factor;
+ else
+ rc->rate_correction_factors[INTER_NORMAL] = factor;
+ }
+}
+
+void vp10_rc_update_rate_correction_factors(VP10_COMP *cpi) {
+ const VP10_COMMON *const cm = &cpi->common;
+ int correction_factor = 100;
+ double rate_correction_factor = get_rate_correction_factor(cpi);
+ double adjustment_limit;
+
+ int projected_size_based_on_q = 0;
+
+ // Do not update the rate factors for arf overlay frames.
+ if (cpi->rc.is_src_frame_alt_ref) return;
+
+ // Clear down mmx registers to allow floating point in what follows
+ vpx_clear_system_state();
+
+ // Work out how big we would have expected the frame to be at this Q given
+ // the current correction factor.
+ // Stay in double to avoid int overflow when values are large
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->common.seg.enabled) {
+ projected_size_based_on_q =
+ vp10_cyclic_refresh_estimate_bits_at_q(cpi, rate_correction_factor);
+ } else {
+ projected_size_based_on_q =
+ vp10_estimate_bits_at_q(cpi->common.frame_type, cm->base_qindex,
+ cm->MBs, rate_correction_factor, cm->bit_depth);
+ }
+ // Work out a size correction factor.
+ if (projected_size_based_on_q > FRAME_OVERHEAD_BITS)
+ correction_factor = (int)((100 * (int64_t)cpi->rc.projected_frame_size) /
+ projected_size_based_on_q);
+
+ // More heavily damped adjustment used if we have been oscillating either side
+ // of target.
+ adjustment_limit =
+ 0.25 + 0.5 * VPXMIN(1, fabs(log10(0.01 * correction_factor)));
+
+ cpi->rc.q_2_frame = cpi->rc.q_1_frame;
+ cpi->rc.q_1_frame = cm->base_qindex;
+ cpi->rc.rc_2_frame = cpi->rc.rc_1_frame;
+ if (correction_factor > 110)
+ cpi->rc.rc_1_frame = -1;
+ else if (correction_factor < 90)
+ cpi->rc.rc_1_frame = 1;
+ else
+ cpi->rc.rc_1_frame = 0;
+
+ if (correction_factor > 102) {
+ // We are not already at the worst allowable quality
+ correction_factor =
+ (int)(100 + ((correction_factor - 100) * adjustment_limit));
+ rate_correction_factor = (rate_correction_factor * correction_factor) / 100;
+ // Keep rate_correction_factor within limits
+ if (rate_correction_factor > MAX_BPB_FACTOR)
+ rate_correction_factor = MAX_BPB_FACTOR;
+ } else if (correction_factor < 99) {
+ // We are not already at the best allowable quality
+ correction_factor =
+ (int)(100 - ((100 - correction_factor) * adjustment_limit));
+ rate_correction_factor = (rate_correction_factor * correction_factor) / 100;
+
+ // Keep rate_correction_factor within limits
+ if (rate_correction_factor < MIN_BPB_FACTOR)
+ rate_correction_factor = MIN_BPB_FACTOR;
+ }
+
+ set_rate_correction_factor(cpi, rate_correction_factor);
+}
+
+int vp10_rc_regulate_q(const VP10_COMP *cpi, int target_bits_per_frame,
+ int active_best_quality, int active_worst_quality) {
+ const VP10_COMMON *const cm = &cpi->common;
+ int q = active_worst_quality;
+ int last_error = INT_MAX;
+ int i, target_bits_per_mb, bits_per_mb_at_this_q;
+ const double correction_factor = get_rate_correction_factor(cpi);
+
+ // Calculate required scaling factor based on target frame size and size of
+ // frame produced using previous Q.
+ target_bits_per_mb =
+ ((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs;
+
+ i = active_best_quality;
+
+ do {
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
+ bits_per_mb_at_this_q =
+ (int)vp10_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor);
+ } else {
+ bits_per_mb_at_this_q = (int)vp10_rc_bits_per_mb(
+ cm->frame_type, i, correction_factor, cm->bit_depth);
+ }
+
+ if (bits_per_mb_at_this_q <= target_bits_per_mb) {
+ if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error)
+ q = i;
+ else
+ q = i - 1;
+
+ break;
+ } else {
+ last_error = bits_per_mb_at_this_q - target_bits_per_mb;
+ }
+ } while (++i <= active_worst_quality);
+
+ // In CBR mode, this makes sure q is between oscillating Qs to prevent
+ // resonance.
+ if (cpi->oxcf.rc_mode == VPX_CBR &&
+ (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) &&
+ cpi->rc.q_1_frame != cpi->rc.q_2_frame) {
+ q = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
+ VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
+ }
+ return q;
+}
+
+static int get_active_quality(int q, int gfu_boost, int low, int high,
+ int *low_motion_minq, int *high_motion_minq) {
+ if (gfu_boost > high) {
+ return low_motion_minq[q];
+ } else if (gfu_boost < low) {
+ return high_motion_minq[q];
+ } else {
+ const int gap = high - low;
+ const int offset = high - gfu_boost;
+ const int qdiff = high_motion_minq[q] - low_motion_minq[q];
+ const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
+ return low_motion_minq[q] + adjustment;
+ }
+}
+
+static int get_kf_active_quality(const RATE_CONTROL *const rc, int q,
+ vpx_bit_depth_t bit_depth) {
+ int *kf_low_motion_minq;
+ int *kf_high_motion_minq;
+ ASSIGN_MINQ_TABLE(bit_depth, kf_low_motion_minq);
+ ASSIGN_MINQ_TABLE(bit_depth, kf_high_motion_minq);
+ return get_active_quality(q, rc->kf_boost, kf_low, kf_high,
+ kf_low_motion_minq, kf_high_motion_minq);
+}
+
+static int get_gf_active_quality(const RATE_CONTROL *const rc, int q,
+ vpx_bit_depth_t bit_depth) {
+ int *arfgf_low_motion_minq;
+ int *arfgf_high_motion_minq;
+ ASSIGN_MINQ_TABLE(bit_depth, arfgf_low_motion_minq);
+ ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq);
+ return get_active_quality(q, rc->gfu_boost, gf_low, gf_high,
+ arfgf_low_motion_minq, arfgf_high_motion_minq);
+}
+
+static int calc_active_worst_quality_one_pass_vbr(const VP10_COMP *cpi) {
+ const RATE_CONTROL *const rc = &cpi->rc;
+ const unsigned int curr_frame = cpi->common.current_video_frame;
+ int active_worst_quality;
+
+ if (cpi->common.frame_type == KEY_FRAME) {
+ active_worst_quality =
+ curr_frame == 0 ? rc->worst_quality : rc->last_q[KEY_FRAME] * 2;
+ } else {
+ if (!rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 5 / 4
+ : rc->last_q[INTER_FRAME];
+ } else {
+ active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 2
+ : rc->last_q[INTER_FRAME] * 2;
+ }
+ }
+ return VPXMIN(active_worst_quality, rc->worst_quality);
+}
+
+// Adjust active_worst_quality level based on buffer level.
+static int calc_active_worst_quality_one_pass_cbr(const VP10_COMP *cpi) {
+ // Adjust active_worst_quality: If buffer is above the optimal/target level,
+ // bring active_worst_quality down depending on fullness of buffer.
+ // If buffer is below the optimal level, let the active_worst_quality go from
+ // ambient Q (at buffer = optimal level) to worst_quality level
+ // (at buffer = critical level).
+ const VP10_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *rc = &cpi->rc;
+ // Buffer level below which we push active_worst to worst_quality.
+ int64_t critical_level = rc->optimal_buffer_level >> 3;
+ int64_t buff_lvl_step = 0;
+ int adjustment = 0;
+ int active_worst_quality;
+ int ambient_qp;
+ if (cm->frame_type == KEY_FRAME) return rc->worst_quality;
+ // For ambient_qp we use minimum of avg_frame_qindex[KEY_FRAME/INTER_FRAME]
+ // for the first few frames following key frame. These are both initialized
+ // to worst_quality and updated with (3/4, 1/4) average in postencode_update.
+ // So for first few frames following key, the qp of that key frame is weighted
+ // into the active_worst_quality setting.
+ ambient_qp = (cm->current_video_frame < 5)
+ ? VPXMIN(rc->avg_frame_qindex[INTER_FRAME],
+ rc->avg_frame_qindex[KEY_FRAME])
+ : rc->avg_frame_qindex[INTER_FRAME];
+ active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 / 4);
+ if (rc->buffer_level > rc->optimal_buffer_level) {
+ // Adjust down.
+ // Maximum limit for down adjustment, ~30%.
+ int max_adjustment_down = active_worst_quality / 3;
+ if (max_adjustment_down) {
+ buff_lvl_step = ((rc->maximum_buffer_size - rc->optimal_buffer_level) /
+ max_adjustment_down);
+ if (buff_lvl_step)
+ adjustment = (int)((rc->buffer_level - rc->optimal_buffer_level) /
+ buff_lvl_step);
+ active_worst_quality -= adjustment;
+ }
+ } else if (rc->buffer_level > critical_level) {
+ // Adjust up from ambient Q.
+ if (critical_level) {
+ buff_lvl_step = (rc->optimal_buffer_level - critical_level);
+ if (buff_lvl_step) {
+ adjustment = (int)((rc->worst_quality - ambient_qp) *
+ (rc->optimal_buffer_level - rc->buffer_level) /
+ buff_lvl_step);
+ }
+ active_worst_quality = ambient_qp + adjustment;
+ }
+ } else {
+ // Set to worst_quality if buffer is below critical level.
+ active_worst_quality = rc->worst_quality;
+ }
+ return active_worst_quality;
+}
+
+static int rc_pick_q_and_bounds_one_pass_cbr(const VP10_COMP *cpi,
+ int *bottom_index,
+ int *top_index) {
+ const VP10_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ int active_best_quality;
+ int active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi);
+ int q;
+ int *rtc_minq;
+ ASSIGN_MINQ_TABLE(cm->bit_depth, rtc_minq);
+
+ if (frame_is_intra_only(cm)) {
+ active_best_quality = rc->best_quality;
+ // Handle the special case for key frames forced when we have reached
+ // the maximum key frame interval. Here force the Q to a range
+ // based on the ambient Q to reduce the risk of popping.
+ if (rc->this_key_frame_forced) {
+ int qindex = rc->last_boosted_qindex;
+ double last_boosted_q = vp10_convert_qindex_to_q(qindex, cm->bit_depth);
+ int delta_qindex = vp10_compute_qdelta(
+ rc, last_boosted_q, (last_boosted_q * 0.75), cm->bit_depth);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
+ } else if (cm->current_video_frame > 0) {
+ // not first frame of one pass and kf_boost is set
+ double q_adj_factor = 1.0;
+ double q_val;
+
+ active_best_quality = get_kf_active_quality(
+ rc, rc->avg_frame_qindex[KEY_FRAME], cm->bit_depth);
+
+ // Allow somewhat lower kf minq with small image formats.
+ if ((cm->width * cm->height) <= (352 * 288)) {
+ q_adj_factor -= 0.25;
+ }
+
+ // Convert the adjustment factor to a qindex delta
+ // on active_best_quality.
+ q_val = vp10_convert_qindex_to_q(active_best_quality, cm->bit_depth);
+ active_best_quality +=
+ vp10_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth);
+ }
+ } else if (!rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ // Use the lower of active_worst_quality and recent
+ // average Q as basis for GF/ARF best Q limit unless last frame was
+ // a key frame.
+ if (rc->frames_since_key > 1 &&
+ rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
+ q = rc->avg_frame_qindex[INTER_FRAME];
+ } else {
+ q = active_worst_quality;
+ }
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ } else {
+ // Use the lower of active_worst_quality and recent/average Q.
+ if (cm->current_video_frame > 1) {
+ if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
+ active_best_quality = rtc_minq[rc->avg_frame_qindex[INTER_FRAME]];
+ else
+ active_best_quality = rtc_minq[active_worst_quality];
+ } else {
+ if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality)
+ active_best_quality = rtc_minq[rc->avg_frame_qindex[KEY_FRAME]];
+ else
+ active_best_quality = rtc_minq[active_worst_quality];
+ }
+ }
+
+ // Clip the active best and worst quality values to limits
+ active_best_quality =
+ clamp(active_best_quality, rc->best_quality, rc->worst_quality);
+ active_worst_quality =
+ clamp(active_worst_quality, active_best_quality, rc->worst_quality);
+
+ *top_index = active_worst_quality;
+ *bottom_index = active_best_quality;
+
+ // Limit Q range for the adaptive loop.
+ if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced &&
+ !(cm->current_video_frame == 0)) {
+ int qdelta = 0;
+ vpx_clear_system_state();
+ qdelta = vp10_compute_qdelta_by_rate(
+ &cpi->rc, cm->frame_type, active_worst_quality, 2.0, cm->bit_depth);
+ *top_index = active_worst_quality + qdelta;
+ *top_index = VPXMAX(*top_index, *bottom_index);
+ }
+
+ // Special case code to try and match quality with forced key frames
+ if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) {
+ q = rc->last_boosted_qindex;
+ } else {
+ q = vp10_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
+ active_worst_quality);
+ if (q > *top_index) {
+ // Special case when we are targeting the max allowed rate
+ if (rc->this_frame_target >= rc->max_frame_bandwidth)
+ *top_index = q;
+ else
+ q = *top_index;
+ }
+ }
+
+ assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality);
+ assert(*bottom_index <= rc->worst_quality &&
+ *bottom_index >= rc->best_quality);
+ assert(q <= rc->worst_quality && q >= rc->best_quality);
+ return q;
+}
+
+static int get_active_cq_level(const RATE_CONTROL *rc,
+ const VP10EncoderConfig *const oxcf) {
+ static const double cq_adjust_threshold = 0.1;
+ int active_cq_level = oxcf->cq_level;
+ if (oxcf->rc_mode == VPX_CQ && rc->total_target_bits > 0) {
+ const double x = (double)rc->total_actual_bits / rc->total_target_bits;
+ if (x < cq_adjust_threshold) {
+ active_cq_level = (int)(active_cq_level * x / cq_adjust_threshold);
+ }
+ }
+ return active_cq_level;
+}
+
+static int rc_pick_q_and_bounds_one_pass_vbr(const VP10_COMP *cpi,
+ int *bottom_index,
+ int *top_index) {
+ const VP10_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ const int cq_level = get_active_cq_level(rc, oxcf);
+ int active_best_quality;
+ int active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi);
+ int q;
+ int *inter_minq;
+ ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq);
+
+ if (frame_is_intra_only(cm)) {
+ if (oxcf->rc_mode == VPX_Q) {
+ int qindex = cq_level;
+ double q = vp10_convert_qindex_to_q(qindex, cm->bit_depth);
+ int delta_qindex = vp10_compute_qdelta(rc, q, q * 0.25, cm->bit_depth);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
+ } else if (rc->this_key_frame_forced) {
+ int qindex = rc->last_boosted_qindex;
+ double last_boosted_q = vp10_convert_qindex_to_q(qindex, cm->bit_depth);
+ int delta_qindex = vp10_compute_qdelta(
+ rc, last_boosted_q, last_boosted_q * 0.75, cm->bit_depth);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
+ } else {
+ // not first frame of one pass and kf_boost is set
+ double q_adj_factor = 1.0;
+ double q_val;
+
+ active_best_quality = get_kf_active_quality(
+ rc, rc->avg_frame_qindex[KEY_FRAME], cm->bit_depth);
+
+ // Allow somewhat lower kf minq with small image formats.
+ if ((cm->width * cm->height) <= (352 * 288)) {
+ q_adj_factor -= 0.25;
+ }
+
+ // Convert the adjustment factor to a qindex delta
+ // on active_best_quality.
+ q_val = vp10_convert_qindex_to_q(active_best_quality, cm->bit_depth);
+ active_best_quality +=
+ vp10_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth);
+ }
+ } else if (!rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ // Use the lower of active_worst_quality and recent
+ // average Q as basis for GF/ARF best Q limit unless last frame was
+ // a key frame.
+ if (rc->frames_since_key > 1 &&
+ rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
+ q = rc->avg_frame_qindex[INTER_FRAME];
+ } else {
+ q = rc->avg_frame_qindex[KEY_FRAME];
+ }
+ // For constrained quality dont allow Q less than the cq level
+ if (oxcf->rc_mode == VPX_CQ) {
+ if (q < cq_level) q = cq_level;
+
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+
+ // Constrained quality use slightly lower active best.
+ active_best_quality = active_best_quality * 15 / 16;
+
+ } else if (oxcf->rc_mode == VPX_Q) {
+ int qindex = cq_level;
+ double q = vp10_convert_qindex_to_q(qindex, cm->bit_depth);
+ int delta_qindex;
+ if (cpi->refresh_alt_ref_frame)
+ delta_qindex = vp10_compute_qdelta(rc, q, q * 0.40, cm->bit_depth);
+ else
+ delta_qindex = vp10_compute_qdelta(rc, q, q * 0.50, cm->bit_depth);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
+ } else {
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ }
+ } else {
+ if (oxcf->rc_mode == VPX_Q) {
+ int qindex = cq_level;
+ double q = vp10_convert_qindex_to_q(qindex, cm->bit_depth);
+ double delta_rate[FIXED_GF_INTERVAL] = { 0.50, 1.0, 0.85, 1.0,
+ 0.70, 1.0, 0.85, 1.0 };
+ int delta_qindex = vp10_compute_qdelta(
+ rc, q, q * delta_rate[cm->current_video_frame % FIXED_GF_INTERVAL],
+ cm->bit_depth);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
+ } else {
+ // Use the lower of active_worst_quality and recent/average Q.
+ if (cm->current_video_frame > 1)
+ active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
+ else
+ active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+ // For the constrained quality mode we don't want
+ // q to fall below the cq level.
+ if ((oxcf->rc_mode == VPX_CQ) && (active_best_quality < cq_level)) {
+ active_best_quality = cq_level;
+ }
+ }
+ }
+
+ // Clip the active best and worst quality values to limits
+ active_best_quality =
+ clamp(active_best_quality, rc->best_quality, rc->worst_quality);
+ active_worst_quality =
+ clamp(active_worst_quality, active_best_quality, rc->worst_quality);
+
+ *top_index = active_worst_quality;
+ *bottom_index = active_best_quality;
+
+ // Limit Q range for the adaptive loop.
+ {
+ int qdelta = 0;
+ vpx_clear_system_state();
+ if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced &&
+ !(cm->current_video_frame == 0)) {
+ qdelta = vp10_compute_qdelta_by_rate(
+ &cpi->rc, cm->frame_type, active_worst_quality, 2.0, cm->bit_depth);
+ } else if (!rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ qdelta = vp10_compute_qdelta_by_rate(
+ &cpi->rc, cm->frame_type, active_worst_quality, 1.75, cm->bit_depth);
+ }
+ *top_index = active_worst_quality + qdelta;
+ *top_index = VPXMAX(*top_index, *bottom_index);
+ }
+
+ if (oxcf->rc_mode == VPX_Q) {
+ q = active_best_quality;
+ // Special case code to try and match quality with forced key frames
+ } else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) {
+ q = rc->last_boosted_qindex;
+ } else {
+ q = vp10_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
+ active_worst_quality);
+ if (q > *top_index) {
+ // Special case when we are targeting the max allowed rate
+ if (rc->this_frame_target >= rc->max_frame_bandwidth)
+ *top_index = q;
+ else
+ q = *top_index;
+ }
+ }
+
+ assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality);
+ assert(*bottom_index <= rc->worst_quality &&
+ *bottom_index >= rc->best_quality);
+ assert(q <= rc->worst_quality && q >= rc->best_quality);
+ return q;
+}
+
+int vp10_frame_type_qdelta(const VP10_COMP *cpi, int rf_level, int q) {
+ static const double rate_factor_deltas[RATE_FACTOR_LEVELS] = {
+ 1.00, // INTER_NORMAL
+#if CONFIG_EXT_REFS
+ 0.80, // INTER_LOW
+ 1.50, // INTER_HIGH
+ 1.25, // GF_ARF_LOW
+#else
+ 1.00, // INTER_HIGH
+ 1.50, // GF_ARF_LOW
+#endif // CONFIG_EXT_REFS
+ 2.00, // GF_ARF_STD
+ 2.00, // KF_STD
+ };
+ static const FRAME_TYPE frame_type[RATE_FACTOR_LEVELS] =
+#if CONFIG_EXT_REFS
+ { INTER_FRAME, INTER_FRAME, INTER_FRAME,
+ INTER_FRAME, INTER_FRAME, KEY_FRAME };
+#else
+ { INTER_FRAME, INTER_FRAME, INTER_FRAME, INTER_FRAME, KEY_FRAME };
+#endif // CONFIG_EXT_REFS
+ const VP10_COMMON *const cm = &cpi->common;
+ int qdelta =
+ vp10_compute_qdelta_by_rate(&cpi->rc, frame_type[rf_level], q,
+ rate_factor_deltas[rf_level], cm->bit_depth);
+ return qdelta;
+}
+
+#define STATIC_MOTION_THRESH 95
+static int rc_pick_q_and_bounds_two_pass(const VP10_COMP *cpi,
+ int *bottom_index, int *top_index) {
+ const VP10_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ const GF_GROUP *gf_group = &cpi->twopass.gf_group;
+ const int cq_level = get_active_cq_level(rc, oxcf);
+ int active_best_quality;
+ int active_worst_quality = cpi->twopass.active_worst_quality;
+ int q;
+ int *inter_minq;
+ ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq);
+
+ if (frame_is_intra_only(cm)) {
+ // Handle the special case for key frames forced when we have reached
+ // the maximum key frame interval. Here force the Q to a range
+ // based on the ambient Q to reduce the risk of popping.
+ if (rc->this_key_frame_forced) {
+ double last_boosted_q;
+ int delta_qindex;
+ int qindex;
+
+ if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
+ qindex = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
+ active_best_quality = qindex;
+ last_boosted_q = vp10_convert_qindex_to_q(qindex, cm->bit_depth);
+ delta_qindex = vp10_compute_qdelta(
+ rc, last_boosted_q, last_boosted_q * 1.25, cm->bit_depth);
+ active_worst_quality =
+ VPXMIN(qindex + delta_qindex, active_worst_quality);
+ } else {
+ qindex = rc->last_boosted_qindex;
+ last_boosted_q = vp10_convert_qindex_to_q(qindex, cm->bit_depth);
+ delta_qindex = vp10_compute_qdelta(
+ rc, last_boosted_q, last_boosted_q * 0.75, cm->bit_depth);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
+ }
+ } else {
+ // Not forced keyframe.
+ double q_adj_factor = 1.0;
+ double q_val;
+
+ // Baseline value derived from cpi->active_worst_quality and kf boost.
+ active_best_quality =
+ get_kf_active_quality(rc, active_worst_quality, cm->bit_depth);
+
+ // Allow somewhat lower kf minq with small image formats.
+ if ((cm->width * cm->height) <= (352 * 288)) {
+ q_adj_factor -= 0.25;
+ }
+
+ // Make a further adjustment based on the kf zero motion measure.
+ q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct);
+
+ // Convert the adjustment factor to a qindex delta
+ // on active_best_quality.
+ q_val = vp10_convert_qindex_to_q(active_best_quality, cm->bit_depth);
+ active_best_quality +=
+ vp10_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth);
+ }
+ } else if (!rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ // Use the lower of active_worst_quality and recent
+ // average Q as basis for GF/ARF best Q limit unless last frame was
+ // a key frame.
+ if (rc->frames_since_key > 1 &&
+ rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
+ q = rc->avg_frame_qindex[INTER_FRAME];
+ } else {
+ q = active_worst_quality;
+ }
+ // For constrained quality dont allow Q less than the cq level
+ if (oxcf->rc_mode == VPX_CQ) {
+ if (q < cq_level) q = cq_level;
+
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+
+ // Constrained quality use slightly lower active best.
+ active_best_quality = active_best_quality * 15 / 16;
+
+ } else if (oxcf->rc_mode == VPX_Q) {
+ if (!cpi->refresh_alt_ref_frame) {
+ active_best_quality = cq_level;
+ } else {
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+
+ // Modify best quality for second level arfs. For mode VPX_Q this
+ // becomes the baseline frame q.
+ if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW)
+ active_best_quality = (active_best_quality + cq_level + 1) / 2;
+ }
+ } else {
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ }
+ } else {
+ if (oxcf->rc_mode == VPX_Q) {
+ active_best_quality = cq_level;
+ } else {
+ active_best_quality = inter_minq[active_worst_quality];
+
+ // For the constrained quality mode we don't want
+ // q to fall below the cq level.
+ if ((oxcf->rc_mode == VPX_CQ) && (active_best_quality < cq_level)) {
+ active_best_quality = cq_level;
+ }
+ }
+ }
+
+ // Extension to max or min Q if undershoot or overshoot is outside
+ // the permitted range.
+ if ((cpi->oxcf.rc_mode != VPX_Q) &&
+ (cpi->twopass.gf_zeromotion_pct < VLOW_MOTION_THRESHOLD)) {
+ if (frame_is_intra_only(cm) ||
+ (!rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
+ active_best_quality -=
+ (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast);
+ active_worst_quality += (cpi->twopass.extend_maxq / 2);
+ } else {
+ active_best_quality -=
+ (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2;
+ active_worst_quality += cpi->twopass.extend_maxq;
+ }
+ }
+
+ vpx_clear_system_state();
+ // Static forced key frames Q restrictions dealt with elsewhere.
+ if (!(frame_is_intra_only(cm)) || !rc->this_key_frame_forced ||
+ (cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) {
+ int qdelta = vp10_frame_type_qdelta(
+ cpi, gf_group->rf_level[gf_group->index], active_worst_quality);
+ active_worst_quality =
+ VPXMAX(active_worst_quality + qdelta, active_best_quality);
+ }
+
+ // Modify active_best_quality for downscaled normal frames.
+ if (rc->frame_size_selector != UNSCALED && !frame_is_kf_gf_arf(cpi)) {
+ int qdelta = vp10_compute_qdelta_by_rate(
+ rc, cm->frame_type, active_best_quality, 2.0, cm->bit_depth);
+ active_best_quality =
+ VPXMAX(active_best_quality + qdelta, rc->best_quality);
+ }
+
+ active_best_quality =
+ clamp(active_best_quality, rc->best_quality, rc->worst_quality);
+ active_worst_quality =
+ clamp(active_worst_quality, active_best_quality, rc->worst_quality);
+
+ if (oxcf->rc_mode == VPX_Q) {
+ q = active_best_quality;
+ // Special case code to try and match quality with forced key frames.
+ } else if (frame_is_intra_only(cm) && rc->this_key_frame_forced) {
+ // If static since last kf use better of last boosted and last kf q.
+ if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
+ q = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
+ } else {
+ q = rc->last_boosted_qindex;
+ }
+ } else {
+ q = vp10_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
+ active_worst_quality);
+ if (q > active_worst_quality) {
+ // Special case when we are targeting the max allowed rate.
+ if (rc->this_frame_target >= rc->max_frame_bandwidth)
+ active_worst_quality = q;
+ else
+ q = active_worst_quality;
+ }
+ }
+ clamp(q, active_best_quality, active_worst_quality);
+
+ *top_index = active_worst_quality;
+ *bottom_index = active_best_quality;
+
+ assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality);
+ assert(*bottom_index <= rc->worst_quality &&
+ *bottom_index >= rc->best_quality);
+ assert(q <= rc->worst_quality && q >= rc->best_quality);
+ return q;
+}
+
+int vp10_rc_pick_q_and_bounds(const VP10_COMP *cpi, int *bottom_index,
+ int *top_index) {
+ int q;
+ if (cpi->oxcf.pass == 0) {
+ if (cpi->oxcf.rc_mode == VPX_CBR)
+ q = rc_pick_q_and_bounds_one_pass_cbr(cpi, bottom_index, top_index);
+ else
+ q = rc_pick_q_and_bounds_one_pass_vbr(cpi, bottom_index, top_index);
+ } else {
+ q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index);
+ }
+
+ return q;
+}
+
+void vp10_rc_compute_frame_size_bounds(const VP10_COMP *cpi, int frame_target,
+ int *frame_under_shoot_limit,
+ int *frame_over_shoot_limit) {
+ if (cpi->oxcf.rc_mode == VPX_Q) {
+ *frame_under_shoot_limit = 0;
+ *frame_over_shoot_limit = INT_MAX;
+ } else {
+ // For very small rate targets where the fractional adjustment
+ // may be tiny make sure there is at least a minimum range.
+ const int tolerance = (cpi->sf.recode_tolerance * frame_target) / 100;
+ *frame_under_shoot_limit = VPXMAX(frame_target - tolerance - 200, 0);
+ *frame_over_shoot_limit =
+ VPXMIN(frame_target + tolerance + 200, cpi->rc.max_frame_bandwidth);
+ }
+}
+
+void vp10_rc_set_frame_target(VP10_COMP *cpi, int target) {
+ const VP10_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+
+ rc->this_frame_target = target;
+
+ // Modify frame size target when down-scaling.
+ if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC &&
+ rc->frame_size_selector != UNSCALED)
+ rc->this_frame_target = (int)(rc->this_frame_target *
+ rate_thresh_mult[rc->frame_size_selector]);
+
+ // Target rate per SB64 (including partial SB64s.
+ rc->sb64_target_rate =
+ ((int64_t)rc->this_frame_target * 64 * 64) / (cm->width * cm->height);
+}
+
+static void update_alt_ref_frame_stats(VP10_COMP *cpi) {
+ // this frame refreshes means next frames don't unless specified by user
+ RATE_CONTROL *const rc = &cpi->rc;
+ rc->frames_since_golden = 0;
+
+ // Mark the alt ref as done (setting to 0 means no further alt refs pending).
+ rc->source_alt_ref_pending = 0;
+
+ // Set the alternate reference frame active flag
+ rc->source_alt_ref_active = 1;
+}
+
+static void update_golden_frame_stats(VP10_COMP *cpi) {
+ RATE_CONTROL *const rc = &cpi->rc;
+
+#if CONFIG_EXT_REFS
+ // Update the Golden frame usage counts.
+ // NOTE(weitinglin): If we use show_existing_frame for an OVERLAY frame,
+ // only the virtual indices for the reference frame will be
+ // updated and cpi->refresh_golden_frame will still be zero.
+ if (cpi->refresh_golden_frame || rc->is_src_frame_alt_ref) {
+#else
+ // Update the Golden frame usage counts.
+ if (cpi->refresh_golden_frame) {
+#endif
+#if CONFIG_EXT_REFS
+ // We will not use internal overlay frames to replace the golden frame
+ if (!rc->is_src_frame_ext_arf)
+#endif
+ // this frame refreshes means next frames don't unless specified by user
+ rc->frames_since_golden = 0;
+
+ // If we are not using alt ref in the up and coming group clear the arf
+ // active flag. In multi arf group case, if the index is not 0 then
+ // we are overlaying a mid group arf so should not reset the flag.
+ if (cpi->oxcf.pass == 2) {
+ if (!rc->source_alt_ref_pending && (cpi->twopass.gf_group.index == 0))
+ rc->source_alt_ref_active = 0;
+ } else if (!rc->source_alt_ref_pending) {
+ rc->source_alt_ref_active = 0;
+ }
+
+ // Decrement count down till next gf
+ if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--;
+
+ } else if (!cpi->refresh_alt_ref_frame) {
+ // Decrement count down till next gf
+ if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--;
+
+ rc->frames_since_golden++;
+ }
+}
+
+void vp10_rc_postencode_update(VP10_COMP *cpi, uint64_t bytes_used) {
+ const VP10_COMMON *const cm = &cpi->common;
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ RATE_CONTROL *const rc = &cpi->rc;
+ const int qindex = cm->base_qindex;
+
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
+ vp10_cyclic_refresh_postencode(cpi);
+ }
+
+ // Update rate control heuristics
+ rc->projected_frame_size = (int)(bytes_used << 3);
+
+ // Post encode loop adjustment of Q prediction.
+ vp10_rc_update_rate_correction_factors(cpi);
+
+ // Keep a record of last Q and ambient average Q.
+ if (cm->frame_type == KEY_FRAME) {
+ rc->last_q[KEY_FRAME] = qindex;
+ rc->avg_frame_qindex[KEY_FRAME] =
+ ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
+ } else {
+ if (!rc->is_src_frame_alt_ref &&
+ !(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ rc->last_q[INTER_FRAME] = qindex;
+ rc->avg_frame_qindex[INTER_FRAME] =
+ ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
+ rc->ni_frames++;
+ rc->tot_q += vp10_convert_qindex_to_q(qindex, cm->bit_depth);
+ rc->avg_q = rc->tot_q / rc->ni_frames;
+ // Calculate the average Q for normal inter frames (not key or GFU
+ // frames).
+ rc->ni_tot_qi += qindex;
+ rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames;
+ }
+ }
+
+ // Keep record of last boosted (KF/GF/ARF) Q value.
+ // If the current frame is coded at a lower Q then we also update it.
+ // If all mbs in this group are skipped only update if the Q value is
+ // better than that already stored.
+ // This is used to help set quality in forced key frames to reduce popping
+ if ((qindex < rc->last_boosted_qindex) || (cm->frame_type == KEY_FRAME) ||
+ (!rc->constrained_gf_group &&
+ (cpi->refresh_alt_ref_frame ||
+ (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
+ rc->last_boosted_qindex = qindex;
+ }
+ if (cm->frame_type == KEY_FRAME) rc->last_kf_qindex = qindex;
+
+ update_buffer_level(cpi, rc->projected_frame_size);
+
+ // Rolling monitors of whether we are over or underspending used to help
+ // regulate min and Max Q in two pass.
+ if (cm->frame_type != KEY_FRAME) {
+ rc->rolling_target_bits = ROUND_POWER_OF_TWO(
+ rc->rolling_target_bits * 3 + rc->this_frame_target, 2);
+ rc->rolling_actual_bits = ROUND_POWER_OF_TWO(
+ rc->rolling_actual_bits * 3 + rc->projected_frame_size, 2);
+ rc->long_rolling_target_bits = ROUND_POWER_OF_TWO(
+ rc->long_rolling_target_bits * 31 + rc->this_frame_target, 5);
+ rc->long_rolling_actual_bits = ROUND_POWER_OF_TWO(
+ rc->long_rolling_actual_bits * 31 + rc->projected_frame_size, 5);
+ }
+
+ // Actual bits spent
+ rc->total_actual_bits += rc->projected_frame_size;
+#if CONFIG_EXT_REFS
+ rc->total_target_bits +=
+ (cm->show_frame || rc->is_bwd_ref_frame) ? rc->avg_frame_bandwidth : 0;
+#else
+ rc->total_target_bits += cm->show_frame ? rc->avg_frame_bandwidth : 0;
+#endif // CONFIG_EXT_REFS
+
+ rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits;
+
+ if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame &&
+ (cm->frame_type != KEY_FRAME))
+ // Update the alternate reference frame stats as appropriate.
+ update_alt_ref_frame_stats(cpi);
+ else
+ // Update the Golden frame stats as appropriate.
+ update_golden_frame_stats(cpi);
+
+ if (cm->frame_type == KEY_FRAME) rc->frames_since_key = 0;
+
+#if CONFIG_EXT_REFS
+ if (cm->show_frame || rc->is_bwd_ref_frame) {
+#else
+ if (cm->show_frame) {
+#endif // CONFIG_EXT_REFS
+ rc->frames_since_key++;
+ rc->frames_to_key--;
+ }
+
+ // Trigger the resizing of the next frame if it is scaled.
+ if (oxcf->pass != 0) {
+ cpi->resize_pending =
+ rc->next_frame_size_selector != rc->frame_size_selector;
+ rc->frame_size_selector = rc->next_frame_size_selector;
+ }
+}
+
+void vp10_rc_postencode_update_drop_frame(VP10_COMP *cpi) {
+ // Update buffer level with zero size, update frame counters, and return.
+ update_buffer_level(cpi, 0);
+ cpi->rc.frames_since_key++;
+ cpi->rc.frames_to_key--;
+ cpi->rc.rc_2_frame = 0;
+ cpi->rc.rc_1_frame = 0;
+}
+
+// Use this macro to turn on/off use of alt-refs in one-pass mode.
+#define USE_ALTREF_FOR_ONE_PASS 1
+
+static int calc_pframe_target_size_one_pass_vbr(const VP10_COMP *const cpi) {
+ static const int af_ratio = 10;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ int target;
+#if USE_ALTREF_FOR_ONE_PASS
+ target =
+ (!rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))
+ ? (rc->avg_frame_bandwidth * rc->baseline_gf_interval * af_ratio) /
+ (rc->baseline_gf_interval + af_ratio - 1)
+ : (rc->avg_frame_bandwidth * rc->baseline_gf_interval) /
+ (rc->baseline_gf_interval + af_ratio - 1);
+#else
+ target = rc->avg_frame_bandwidth;
+#endif
+ return vp10_rc_clamp_pframe_target_size(cpi, target);
+}
+
+static int calc_iframe_target_size_one_pass_vbr(const VP10_COMP *const cpi) {
+ static const int kf_ratio = 25;
+ const RATE_CONTROL *rc = &cpi->rc;
+ const int target = rc->avg_frame_bandwidth * kf_ratio;
+ return vp10_rc_clamp_iframe_target_size(cpi, target);
+}
+
+void vp10_rc_get_one_pass_vbr_params(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ int target;
+ // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic.
+ if (!cpi->refresh_alt_ref_frame &&
+ (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
+ rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) {
+ cm->frame_type = KEY_FRAME;
+ rc->this_key_frame_forced =
+ cm->current_video_frame != 0 && rc->frames_to_key == 0;
+ rc->frames_to_key = cpi->oxcf.key_freq;
+ rc->kf_boost = DEFAULT_KF_BOOST;
+ rc->source_alt_ref_active = 0;
+ } else {
+ cm->frame_type = INTER_FRAME;
+ }
+ if (rc->frames_till_gf_update_due == 0) {
+ rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ // NOTE: frames_till_gf_update_due must be <= frames_to_key.
+ if (rc->frames_till_gf_update_due > rc->frames_to_key) {
+ rc->frames_till_gf_update_due = rc->frames_to_key;
+ rc->constrained_gf_group = 1;
+ } else {
+ rc->constrained_gf_group = 0;
+ }
+ cpi->refresh_golden_frame = 1;
+ rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
+ rc->gfu_boost = DEFAULT_GF_BOOST;
+ }
+ if (cm->frame_type == KEY_FRAME)
+ target = calc_iframe_target_size_one_pass_vbr(cpi);
+ else
+ target = calc_pframe_target_size_one_pass_vbr(cpi);
+ vp10_rc_set_frame_target(cpi, target);
+}
+
+static int calc_pframe_target_size_one_pass_cbr(const VP10_COMP *cpi) {
+ const VP10EncoderConfig *oxcf = &cpi->oxcf;
+ const RATE_CONTROL *rc = &cpi->rc;
+ const int64_t diff = rc->optimal_buffer_level - rc->buffer_level;
+ const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100;
+ int min_frame_target =
+ VPXMAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS);
+ int target;
+
+ if (oxcf->gf_cbr_boost_pct) {
+ const int af_ratio_pct = oxcf->gf_cbr_boost_pct + 100;
+ target = cpi->refresh_golden_frame
+ ? (rc->avg_frame_bandwidth * rc->baseline_gf_interval *
+ af_ratio_pct) /
+ (rc->baseline_gf_interval * 100 + af_ratio_pct - 100)
+ : (rc->avg_frame_bandwidth * rc->baseline_gf_interval * 100) /
+ (rc->baseline_gf_interval * 100 + af_ratio_pct - 100);
+ } else {
+ target = rc->avg_frame_bandwidth;
+ }
+
+ if (diff > 0) {
+ // Lower the target bandwidth for this frame.
+ const int pct_low = (int)VPXMIN(diff / one_pct_bits, oxcf->under_shoot_pct);
+ target -= (target * pct_low) / 200;
+ } else if (diff < 0) {
+ // Increase the target bandwidth for this frame.
+ const int pct_high =
+ (int)VPXMIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
+ target += (target * pct_high) / 200;
+ }
+ if (oxcf->rc_max_inter_bitrate_pct) {
+ const int max_rate =
+ rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100;
+ target = VPXMIN(target, max_rate);
+ }
+ return VPXMAX(min_frame_target, target);
+}
+
+static int calc_iframe_target_size_one_pass_cbr(const VP10_COMP *cpi) {
+ const RATE_CONTROL *rc = &cpi->rc;
+ int target;
+ if (cpi->common.current_video_frame == 0) {
+ target = ((rc->starting_buffer_level / 2) > INT_MAX)
+ ? INT_MAX
+ : (int)(rc->starting_buffer_level / 2);
+ } else {
+ int kf_boost = 32;
+ double framerate = cpi->framerate;
+
+ kf_boost = VPXMAX(kf_boost, (int)(2 * framerate - 16));
+ if (rc->frames_since_key < framerate / 2) {
+ kf_boost = (int)(kf_boost * rc->frames_since_key / (framerate / 2));
+ }
+ target = ((16 + kf_boost) * rc->avg_frame_bandwidth) >> 4;
+ }
+ return vp10_rc_clamp_iframe_target_size(cpi, target);
+}
+
+void vp10_rc_get_one_pass_cbr_params(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ int target;
+ // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic.
+ if ((cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
+ rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) {
+ cm->frame_type = KEY_FRAME;
+ rc->this_key_frame_forced =
+ cm->current_video_frame != 0 && rc->frames_to_key == 0;
+ rc->frames_to_key = cpi->oxcf.key_freq;
+ rc->kf_boost = DEFAULT_KF_BOOST;
+ rc->source_alt_ref_active = 0;
+ } else {
+ cm->frame_type = INTER_FRAME;
+ }
+ if (rc->frames_till_gf_update_due == 0) {
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ vp10_cyclic_refresh_set_golden_update(cpi);
+ else
+ rc->baseline_gf_interval =
+ (rc->min_gf_interval + rc->max_gf_interval) / 2;
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ // NOTE: frames_till_gf_update_due must be <= frames_to_key.
+ if (rc->frames_till_gf_update_due > rc->frames_to_key)
+ rc->frames_till_gf_update_due = rc->frames_to_key;
+ cpi->refresh_golden_frame = 1;
+ rc->gfu_boost = DEFAULT_GF_BOOST;
+ }
+
+ // Any update/change of global cyclic refresh parameters (amount/delta-qp)
+ // should be done here, before the frame qp is selected.
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ vp10_cyclic_refresh_update_parameters(cpi);
+
+ if (cm->frame_type == KEY_FRAME)
+ target = calc_iframe_target_size_one_pass_cbr(cpi);
+ else
+ target = calc_pframe_target_size_one_pass_cbr(cpi);
+
+ vp10_rc_set_frame_target(cpi, target);
+ if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC)
+ cpi->resize_pending = vp10_resize_one_pass_cbr(cpi);
+ else
+ cpi->resize_pending = 0;
+}
+
+int vp10_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
+ vpx_bit_depth_t bit_depth) {
+ int start_index = rc->worst_quality;
+ int target_index = rc->worst_quality;
+ int i;
+
+ // Convert the average q value to an index.
+ for (i = rc->best_quality; i < rc->worst_quality; ++i) {
+ start_index = i;
+ if (vp10_convert_qindex_to_q(i, bit_depth) >= qstart) break;
+ }
+
+ // Convert the q target to an index
+ for (i = rc->best_quality; i < rc->worst_quality; ++i) {
+ target_index = i;
+ if (vp10_convert_qindex_to_q(i, bit_depth) >= qtarget) break;
+ }
+
+ return target_index - start_index;
+}
+
+int vp10_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
+ int qindex, double rate_target_ratio,
+ vpx_bit_depth_t bit_depth) {
+ int target_index = rc->worst_quality;
+ int i;
+
+ // Look up the current projected bits per block for the base index
+ const int base_bits_per_mb =
+ vp10_rc_bits_per_mb(frame_type, qindex, 1.0, bit_depth);
+
+ // Find the target bits per mb based on the base value and given ratio.
+ const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb);
+
+ // Convert the q target to an index
+ for (i = rc->best_quality; i < rc->worst_quality; ++i) {
+ if (vp10_rc_bits_per_mb(frame_type, i, 1.0, bit_depth) <=
+ target_bits_per_mb) {
+ target_index = i;
+ break;
+ }
+ }
+ return target_index - qindex;
+}
+
+void vp10_rc_set_gf_interval_range(const VP10_COMP *const cpi,
+ RATE_CONTROL *const rc) {
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+
+ // Special case code for 1 pass fixed Q mode tests
+ if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) {
+ rc->max_gf_interval = FIXED_GF_INTERVAL;
+ rc->min_gf_interval = FIXED_GF_INTERVAL;
+ rc->static_scene_max_gf_interval = FIXED_GF_INTERVAL;
+ } else {
+ // Set Maximum gf/arf interval
+ rc->max_gf_interval = oxcf->max_gf_interval;
+ rc->min_gf_interval = oxcf->min_gf_interval;
+ if (rc->min_gf_interval == 0)
+ rc->min_gf_interval = vp10_rc_get_default_min_gf_interval(
+ oxcf->width, oxcf->height, cpi->framerate);
+ if (rc->max_gf_interval == 0)
+ rc->max_gf_interval = vp10_rc_get_default_max_gf_interval(
+ cpi->framerate, rc->min_gf_interval);
+
+ // Extended interval for genuinely static scenes
+ rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
+
+ if (is_altref_enabled(cpi)) {
+ if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
+ rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
+ }
+
+ if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
+ rc->max_gf_interval = rc->static_scene_max_gf_interval;
+
+ // Clamp min to max
+ rc->min_gf_interval = VPXMIN(rc->min_gf_interval, rc->max_gf_interval);
+ }
+}
+
+void vp10_rc_update_framerate(VP10_COMP *cpi) {
+ const VP10_COMMON *const cm = &cpi->common;
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ RATE_CONTROL *const rc = &cpi->rc;
+ int vbr_max_bits;
+
+ rc->avg_frame_bandwidth = (int)(oxcf->target_bandwidth / cpi->framerate);
+ rc->min_frame_bandwidth =
+ (int)(rc->avg_frame_bandwidth * oxcf->two_pass_vbrmin_section / 100);
+
+ rc->min_frame_bandwidth =
+ VPXMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
+
+ // A maximum bitrate for a frame is defined.
+ // The baseline for this aligns with HW implementations that
+ // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
+ // per 16x16 MB (averaged over a frame). However this limit is extended if
+ // a very high rate is given on the command line or the the rate cannnot
+ // be acheived because of a user specificed max q (e.g. when the user
+ // specifies lossless encode.
+ vbr_max_bits =
+ (int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) /
+ 100);
+ rc->max_frame_bandwidth =
+ VPXMAX(VPXMAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits);
+
+ vp10_rc_set_gf_interval_range(cpi, rc);
+}
+
+#define VBR_PCT_ADJUSTMENT_LIMIT 50
+// For VBR...adjustment to the frame target based on error from previous frames
+static void vbr_rate_correction(VP10_COMP *cpi, int *this_frame_target) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ int64_t vbr_bits_off_target = rc->vbr_bits_off_target;
+ int max_delta;
+ double position_factor = 1.0;
+
+ // How far through the clip are we.
+ // This number is used to damp the per frame rate correction.
+ // Range 0 - 1.0
+ if (cpi->twopass.total_stats.count != 0.) {
+ position_factor = sqrt((double)cpi->common.current_video_frame /
+ cpi->twopass.total_stats.count);
+ }
+ max_delta = (int)(position_factor *
+ ((*this_frame_target * VBR_PCT_ADJUSTMENT_LIMIT) / 100));
+
+ // vbr_bits_off_target > 0 means we have extra bits to spend
+ if (vbr_bits_off_target > 0) {
+ *this_frame_target += (vbr_bits_off_target > max_delta)
+ ? max_delta
+ : (int)vbr_bits_off_target;
+ } else {
+ *this_frame_target -= (vbr_bits_off_target < -max_delta)
+ ? max_delta
+ : (int)-vbr_bits_off_target;
+ }
+
+ // Fast redistribution of bits arising from massive local undershoot.
+ // Dont do it for kf,arf,gf or overlay frames.
+ if (!frame_is_kf_gf_arf(cpi) && !rc->is_src_frame_alt_ref &&
+ rc->vbr_bits_off_target_fast) {
+ int one_frame_bits = VPXMAX(rc->avg_frame_bandwidth, *this_frame_target);
+ int fast_extra_bits;
+ fast_extra_bits = (int)VPXMIN(rc->vbr_bits_off_target_fast, one_frame_bits);
+ fast_extra_bits = (int)VPXMIN(
+ fast_extra_bits,
+ VPXMAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8));
+ *this_frame_target += (int)fast_extra_bits;
+ rc->vbr_bits_off_target_fast -= fast_extra_bits;
+ }
+}
+
+void vp10_set_target_rate(VP10_COMP *cpi) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ int target_rate = rc->base_frame_target;
+
+ // Correction to rate target based on prior over or under shoot.
+ if (cpi->oxcf.rc_mode == VPX_VBR || cpi->oxcf.rc_mode == VPX_CQ)
+ vbr_rate_correction(cpi, &target_rate);
+ vp10_rc_set_frame_target(cpi, target_rate);
+}
+
+// Check if we should resize, based on average QP from past x frames.
+// Only allow for resize at most one scale down for now, scaling factor is 2.
+int vp10_resize_one_pass_cbr(VP10_COMP *cpi) {
+ const VP10_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ int resize_now = 0;
+ cpi->resize_scale_num = 1;
+ cpi->resize_scale_den = 1;
+ // Don't resize on key frame; reset the counters on key frame.
+ if (cm->frame_type == KEY_FRAME) {
+ cpi->resize_avg_qp = 0;
+ cpi->resize_count = 0;
+ return 0;
+ }
+ // Resize based on average buffer underflow and QP over some window.
+ // Ignore samples close to key frame, since QP is usually high after key.
+ if (cpi->rc.frames_since_key > 2 * cpi->framerate) {
+ const int window = (int)(5 * cpi->framerate);
+ cpi->resize_avg_qp += cm->base_qindex;
+ if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100))
+ ++cpi->resize_buffer_underflow;
+ ++cpi->resize_count;
+ // Check for resize action every "window" frames.
+ if (cpi->resize_count >= window) {
+ int avg_qp = cpi->resize_avg_qp / cpi->resize_count;
+ // Resize down if buffer level has underflowed sufficent amount in past
+ // window, and we are at original resolution.
+ // Resize back up if average QP is low, and we are currently in a resized
+ // down state.
+ if (cpi->resize_state == 0 &&
+ cpi->resize_buffer_underflow > (cpi->resize_count >> 2)) {
+ resize_now = 1;
+ cpi->resize_state = 1;
+ } else if (cpi->resize_state == 1 &&
+ avg_qp < 40 * cpi->rc.worst_quality / 100) {
+ resize_now = -1;
+ cpi->resize_state = 0;
+ }
+ // Reset for next window measurement.
+ cpi->resize_avg_qp = 0;
+ cpi->resize_count = 0;
+ cpi->resize_buffer_underflow = 0;
+ }
+ }
+ // If decision is to resize, reset some quantities, and check is we should
+ // reduce rate correction factor,
+ if (resize_now != 0) {
+ int target_bits_per_frame;
+ int active_worst_quality;
+ int qindex;
+ int tot_scale_change;
+ // For now, resize is by 1/2 x 1/2.
+ cpi->resize_scale_num = 1;
+ cpi->resize_scale_den = 2;
+ tot_scale_change = (cpi->resize_scale_den * cpi->resize_scale_den) /
+ (cpi->resize_scale_num * cpi->resize_scale_num);
+ // Reset buffer level to optimal, update target size.
+ rc->buffer_level = rc->optimal_buffer_level;
+ rc->bits_off_target = rc->optimal_buffer_level;
+ rc->this_frame_target = calc_pframe_target_size_one_pass_cbr(cpi);
+ // Reset cyclic refresh parameters.
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
+ vp10_cyclic_refresh_reset_resize(cpi);
+ // Get the projected qindex, based on the scaled target frame size (scaled
+ // so target_bits_per_mb in vp10_rc_regulate_q will be correct target).
+ target_bits_per_frame = (resize_now == 1)
+ ? rc->this_frame_target * tot_scale_change
+ : rc->this_frame_target / tot_scale_change;
+ active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi);
+ qindex = vp10_rc_regulate_q(cpi, target_bits_per_frame, rc->best_quality,
+ active_worst_quality);
+ // If resize is down, check if projected q index is close to worst_quality,
+ // and if so, reduce the rate correction factor (since likely can afford
+ // lower q for resized frame).
+ if (resize_now == 1 && qindex > 90 * cpi->rc.worst_quality / 100) {
+ rc->rate_correction_factors[INTER_NORMAL] *= 0.85;
+ }
+ // If resize is back up, check if projected q index is too much above the
+ // current base_qindex, and if so, reduce the rate correction factor
+ // (since prefer to keep q for resized frame at least close to previous q).
+ if (resize_now == -1 && qindex > 130 * cm->base_qindex / 100) {
+ rc->rate_correction_factors[INTER_NORMAL] *= 0.9;
+ }
+ }
+ return resize_now;
+}
diff --git a/av1/encoder/ratectrl.h b/av1/encoder/ratectrl.h
new file mode 100644
index 0000000..88a14bc
--- /dev/null
+++ b/av1/encoder/ratectrl.h
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_RATECTRL_H_
+#define VP10_ENCODER_RATECTRL_H_
+
+#include "aom/vpx_codec.h"
+#include "aom/vpx_integer.h"
+
+#include "av1/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Bits Per MB at different Q (Multiplied by 512)
+#define BPER_MB_NORMBITS 9
+
+#define MIN_GF_INTERVAL 4
+#define MAX_GF_INTERVAL 16
+#define FIXED_GF_INTERVAL 8 // Used in some testing modes only
+
+#if CONFIG_EXT_REFS
+typedef enum {
+ INTER_NORMAL = 0,
+ INTER_LOW = 1,
+ INTER_HIGH = 2,
+ GF_ARF_LOW = 3,
+ GF_ARF_STD = 4,
+ KF_STD = 5,
+ RATE_FACTOR_LEVELS = 6
+} RATE_FACTOR_LEVEL;
+#else
+typedef enum {
+ INTER_NORMAL = 0,
+ INTER_HIGH = 1,
+ GF_ARF_LOW = 2,
+ GF_ARF_STD = 3,
+ KF_STD = 4,
+ RATE_FACTOR_LEVELS = 5
+} RATE_FACTOR_LEVEL;
+#endif // CONFIG_EXT_REFS
+
+// Internal frame scaling level.
+typedef enum {
+ UNSCALED = 0, // Frame is unscaled.
+ SCALE_STEP1 = 1, // First-level down-scaling.
+ FRAME_SCALE_STEPS
+} FRAME_SCALE_LEVEL;
+
+// Frame dimensions multiplier wrt the native frame size, in 1/16ths,
+// specified for the scale-up case.
+// e.g. 24 => 16/24 = 2/3 of native size. The restriction to 1/16th is
+// intended to match the capabilities of the normative scaling filters,
+// giving precedence to the up-scaling accuracy.
+static const int frame_scale_factor[FRAME_SCALE_STEPS] = { 16, 24 };
+
+// Multiplier of the target rate to be used as threshold for triggering scaling.
+static const double rate_thresh_mult[FRAME_SCALE_STEPS] = { 1.0, 2.0 };
+
+// Scale dependent Rate Correction Factor multipliers. Compensates for the
+// greater number of bits per pixel generated in down-scaled frames.
+static const double rcf_mult[FRAME_SCALE_STEPS] = { 1.0, 2.0 };
+
+typedef struct {
+ // Rate targetting variables
+ int base_frame_target; // A baseline frame target before adjustment
+ // for previous under or over shoot.
+ int this_frame_target; // Actual frame target after rc adjustment.
+ int projected_frame_size;
+ int sb64_target_rate;
+ int last_q[FRAME_TYPES]; // Separate values for Intra/Inter
+ int last_boosted_qindex; // Last boosted GF/KF/ARF q
+ int last_kf_qindex; // Q index of the last key frame coded.
+
+ int gfu_boost;
+ int last_boost;
+ int kf_boost;
+
+ double rate_correction_factors[RATE_FACTOR_LEVELS];
+
+ int frames_since_golden;
+ int frames_till_gf_update_due;
+ int min_gf_interval;
+ int max_gf_interval;
+ int static_scene_max_gf_interval;
+ int baseline_gf_interval;
+ int constrained_gf_group;
+ int frames_to_key;
+ int frames_since_key;
+ int this_key_frame_forced;
+ int next_key_frame_forced;
+ int source_alt_ref_pending;
+ int source_alt_ref_active;
+ int is_src_frame_alt_ref;
+
+#if CONFIG_EXT_REFS
+ // Length of the bi-predictive frame group interval
+ int bipred_group_interval;
+
+ // NOTE: Different types of frames may have different bits allocated
+ // accordingly, aiming to achieve the overall optimal RD performance.
+ int is_bwd_ref_frame;
+ int is_last_bipred_frame;
+ int is_bipred_frame;
+ int is_src_frame_ext_arf;
+#endif // CONFIG_EXT_REFS
+
+ int avg_frame_bandwidth; // Average frame size target for clip
+ int min_frame_bandwidth; // Minimum allocation used for any frame
+ int max_frame_bandwidth; // Maximum burst rate allowed for a frame.
+
+ int ni_av_qi;
+ int ni_tot_qi;
+ int ni_frames;
+ int avg_frame_qindex[FRAME_TYPES];
+ double tot_q;
+ double avg_q;
+
+ int64_t buffer_level;
+ int64_t bits_off_target;
+ int64_t vbr_bits_off_target;
+ int64_t vbr_bits_off_target_fast;
+
+ int decimation_factor;
+ int decimation_count;
+
+ int rolling_target_bits;
+ int rolling_actual_bits;
+
+ int long_rolling_target_bits;
+ int long_rolling_actual_bits;
+
+ int rate_error_estimate;
+
+ int64_t total_actual_bits;
+ int64_t total_target_bits;
+ int64_t total_target_vs_actual;
+
+ int worst_quality;
+ int best_quality;
+
+ int64_t starting_buffer_level;
+ int64_t optimal_buffer_level;
+ int64_t maximum_buffer_size;
+
+ // rate control history for last frame(1) and the frame before(2).
+ // -1: undershot
+ // 1: overshoot
+ // 0: not initialized.
+ int rc_1_frame;
+ int rc_2_frame;
+ int q_1_frame;
+ int q_2_frame;
+
+ // Auto frame-scaling variables.
+ FRAME_SCALE_LEVEL frame_size_selector;
+ FRAME_SCALE_LEVEL next_frame_size_selector;
+ int frame_width[FRAME_SCALE_STEPS];
+ int frame_height[FRAME_SCALE_STEPS];
+ int rf_level_maxq[RATE_FACTOR_LEVELS];
+} RATE_CONTROL;
+
+struct VP10_COMP;
+struct VP10EncoderConfig;
+
+void vp10_rc_init(const struct VP10EncoderConfig *oxcf, int pass,
+ RATE_CONTROL *rc);
+
+int vp10_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs,
+ double correction_factor,
+ vpx_bit_depth_t bit_depth);
+
+double vp10_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth);
+
+void vp10_rc_init_minq_luts(void);
+
+int vp10_rc_get_default_min_gf_interval(int width, int height,
+ double framerate);
+// Note vp10_rc_get_default_max_gf_interval() requires the min_gf_interval to
+// be passed in to ensure that the max_gf_interval returned is at least as bis
+// as that.
+int vp10_rc_get_default_max_gf_interval(double framerate, int min_frame_rate);
+
+// Generally at the high level, the following flow is expected
+// to be enforced for rate control:
+// First call per frame, one of:
+// vp10_rc_get_one_pass_vbr_params()
+// vp10_rc_get_one_pass_cbr_params()
+// vp10_rc_get_first_pass_params()
+// vp10_rc_get_second_pass_params()
+// depending on the usage to set the rate control encode parameters desired.
+//
+// Then, call encode_frame_to_data_rate() to perform the
+// actual encode. This function will in turn call encode_frame()
+// one or more times, followed by one of:
+// vp10_rc_postencode_update()
+// vp10_rc_postencode_update_drop_frame()
+//
+// The majority of rate control parameters are only expected
+// to be set in the vp10_rc_get_..._params() functions and
+// updated during the vp10_rc_postencode_update...() functions.
+// The only exceptions are vp10_rc_drop_frame() and
+// vp10_rc_update_rate_correction_factors() functions.
+
+// Functions to set parameters for encoding before the actual
+// encode_frame_to_data_rate() function.
+void vp10_rc_get_one_pass_vbr_params(struct VP10_COMP *cpi);
+void vp10_rc_get_one_pass_cbr_params(struct VP10_COMP *cpi);
+
+// Post encode update of the rate control parameters based
+// on bytes used
+void vp10_rc_postencode_update(struct VP10_COMP *cpi, uint64_t bytes_used);
+// Post encode update of the rate control parameters for dropped frames
+void vp10_rc_postencode_update_drop_frame(struct VP10_COMP *cpi);
+
+// Updates rate correction factors
+// Changes only the rate correction factors in the rate control structure.
+void vp10_rc_update_rate_correction_factors(struct VP10_COMP *cpi);
+
+// Decide if we should drop this frame: For 1-pass CBR.
+// Changes only the decimation count in the rate control structure
+int vp10_rc_drop_frame(struct VP10_COMP *cpi);
+
+// Computes frame size bounds.
+void vp10_rc_compute_frame_size_bounds(const struct VP10_COMP *cpi,
+ int this_frame_target,
+ int *frame_under_shoot_limit,
+ int *frame_over_shoot_limit);
+
+// Picks q and q bounds given the target for bits
+int vp10_rc_pick_q_and_bounds(const struct VP10_COMP *cpi, int *bottom_index,
+ int *top_index);
+
+// Estimates q to achieve a target bits per frame
+int vp10_rc_regulate_q(const struct VP10_COMP *cpi, int target_bits_per_frame,
+ int active_best_quality, int active_worst_quality);
+
+// Estimates bits per mb for a given qindex and correction factor.
+int vp10_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
+ double correction_factor, vpx_bit_depth_t bit_depth);
+
+// Clamping utilities for bitrate targets for iframes and pframes.
+int vp10_rc_clamp_iframe_target_size(const struct VP10_COMP *const cpi,
+ int target);
+int vp10_rc_clamp_pframe_target_size(const struct VP10_COMP *const cpi,
+ int target);
+// Utility to set frame_target into the RATE_CONTROL structure
+// This function is called only from the vp10_rc_get_..._params() functions.
+void vp10_rc_set_frame_target(struct VP10_COMP *cpi, int target);
+
+// Computes a q delta (in "q index" terms) to get from a starting q value
+// to a target q value
+int vp10_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
+ vpx_bit_depth_t bit_depth);
+
+// Computes a q delta (in "q index" terms) to get from a starting q value
+// to a value that should equate to the given rate ratio.
+int vp10_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
+ int qindex, double rate_target_ratio,
+ vpx_bit_depth_t bit_depth);
+
+int vp10_frame_type_qdelta(const struct VP10_COMP *cpi, int rf_level, int q);
+
+void vp10_rc_update_framerate(struct VP10_COMP *cpi);
+
+void vp10_rc_set_gf_interval_range(const struct VP10_COMP *const cpi,
+ RATE_CONTROL *const rc);
+
+void vp10_set_target_rate(struct VP10_COMP *cpi);
+
+int vp10_resize_one_pass_cbr(struct VP10_COMP *cpi);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_RATECTRL_H_
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
new file mode 100644
index 0000000..a8a8691
--- /dev/null
+++ b/av1/encoder/rd.c
@@ -0,0 +1,1103 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "./vp10_rtcd.h"
+
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/bitops.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/system_state.h"
+
+#include "av1/common/common.h"
+#include "av1/common/entropy.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/mvref_common.h"
+#include "av1/common/pred_common.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/reconintra.h"
+#include "av1/common/seg_common.h"
+
+#include "av1/encoder/cost.h"
+#include "av1/encoder/encodemb.h"
+#include "av1/encoder/encodemv.h"
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/mcomp.h"
+#include "av1/encoder/quantize.h"
+#include "av1/encoder/ratectrl.h"
+#include "av1/encoder/rd.h"
+#include "av1/encoder/tokenize.h"
+
+#define RD_THRESH_POW 1.25
+
+// Factor to weigh the rate for switchable interp filters.
+#define SWITCHABLE_INTERP_RATE_FACTOR 1
+
+void vp10_rd_cost_reset(RD_COST *rd_cost) {
+ rd_cost->rate = INT_MAX;
+ rd_cost->dist = INT64_MAX;
+ rd_cost->rdcost = INT64_MAX;
+}
+
+void vp10_rd_cost_init(RD_COST *rd_cost) {
+ rd_cost->rate = 0;
+ rd_cost->dist = 0;
+ rd_cost->rdcost = 0;
+}
+
+// The baseline rd thresholds for breaking out of the rd loop for
+// certain modes are assumed to be based on 8x8 blocks.
+// This table is used to correct for block size.
+// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
+static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
+ 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32,
+#if CONFIG_EXT_PARTITION
+ 48, 48, 64
+#endif // CONFIG_EXT_PARTITION
+};
+
+static void fill_mode_costs(VP10_COMP *cpi) {
+ const FRAME_CONTEXT *const fc = cpi->common.fc;
+ int i, j;
+
+ for (i = 0; i < INTRA_MODES; ++i)
+ for (j = 0; j < INTRA_MODES; ++j)
+ vp10_cost_tokens(cpi->y_mode_costs[i][j], vp10_kf_y_mode_prob[i][j],
+ vp10_intra_mode_tree);
+
+ for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
+ vp10_cost_tokens(cpi->mbmode_cost[i], fc->y_mode_prob[i],
+ vp10_intra_mode_tree);
+
+ for (i = 0; i < INTRA_MODES; ++i)
+ vp10_cost_tokens(cpi->intra_uv_mode_cost[i], fc->uv_mode_prob[i],
+ vp10_intra_mode_tree);
+
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
+ vp10_cost_tokens(cpi->switchable_interp_costs[i],
+ fc->switchable_interp_prob[i],
+ vp10_switchable_interp_tree);
+
+ for (i = 0; i < PALETTE_BLOCK_SIZES; ++i) {
+ vp10_cost_tokens(cpi->palette_y_size_cost[i],
+ vp10_default_palette_y_size_prob[i],
+ vp10_palette_size_tree);
+ vp10_cost_tokens(cpi->palette_uv_size_cost[i],
+ vp10_default_palette_uv_size_prob[i],
+ vp10_palette_size_tree);
+ }
+
+ for (i = 0; i < PALETTE_MAX_SIZE - 1; ++i)
+ for (j = 0; j < PALETTE_COLOR_CONTEXTS; ++j) {
+ vp10_cost_tokens(cpi->palette_y_color_cost[i][j],
+ vp10_default_palette_y_color_prob[i][j],
+ vp10_palette_color_tree[i]);
+ vp10_cost_tokens(cpi->palette_uv_color_cost[i][j],
+ vp10_default_palette_uv_color_prob[i][j],
+ vp10_palette_color_tree[i]);
+ }
+
+ for (i = 0; i < TX_SIZES - 1; ++i)
+ for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
+ vp10_cost_tokens(cpi->tx_size_cost[i][j], fc->tx_size_probs[i][j],
+ vp10_tx_size_tree[i]);
+
+#if CONFIG_EXT_TX
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ int s;
+ for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+ if (use_inter_ext_tx_for_txsize[s][i]) {
+ vp10_cost_tokens(cpi->inter_tx_type_costs[s][i],
+ fc->inter_ext_tx_prob[s][i],
+ vp10_ext_tx_inter_tree[s]);
+ }
+ }
+ for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
+ if (use_intra_ext_tx_for_txsize[s][i]) {
+ for (j = 0; j < INTRA_MODES; ++j)
+ vp10_cost_tokens(cpi->intra_tx_type_costs[s][i][j],
+ fc->intra_ext_tx_prob[s][i][j],
+ vp10_ext_tx_intra_tree[s]);
+ }
+ }
+ }
+#else
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ for (j = 0; j < TX_TYPES; ++j)
+ vp10_cost_tokens(cpi->intra_tx_type_costs[i][j],
+ fc->intra_ext_tx_prob[i][j], vp10_ext_tx_tree);
+ }
+ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
+ vp10_cost_tokens(cpi->inter_tx_type_costs[i], fc->inter_ext_tx_prob[i],
+ vp10_ext_tx_tree);
+ }
+#endif // CONFIG_EXT_TX
+#if CONFIG_EXT_INTRA
+ for (i = 0; i < INTRA_FILTERS + 1; ++i)
+ vp10_cost_tokens(cpi->intra_filter_cost[i], fc->intra_filter_probs[i],
+ vp10_intra_filter_tree);
+#endif // CONFIG_EXT_INTRA
+}
+
+void vp10_fill_token_costs(vp10_coeff_cost *c,
+#if CONFIG_ANS
+ coeff_cdf_model (*cdf)[PLANE_TYPES],
+#endif // CONFIG_ANS
+ vp10_coeff_probs_model (*p)[PLANE_TYPES]) {
+ int i, j, k, l;
+ TX_SIZE t;
+ for (t = TX_4X4; t <= TX_32X32; ++t)
+ for (i = 0; i < PLANE_TYPES; ++i)
+ for (j = 0; j < REF_TYPES; ++j)
+ for (k = 0; k < COEF_BANDS; ++k)
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+#if CONFIG_ANS
+ const vpx_prob *const tree_probs = p[t][i][j][k][l];
+ vp10_cost_tokens_ans((int *)c[t][i][j][k][0][l], tree_probs,
+ cdf[t][i][j][k][l], 0);
+ vp10_cost_tokens_ans((int *)c[t][i][j][k][1][l], tree_probs,
+ cdf[t][i][j][k][l], 1);
+#else
+ vpx_prob probs[ENTROPY_NODES];
+ vp10_model_to_full_probs(p[t][i][j][k][l], probs);
+ vp10_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp10_coef_tree);
+ vp10_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
+ vp10_coef_tree);
+#endif // CONFIG_ANS
+ assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
+ c[t][i][j][k][1][l][EOB_TOKEN]);
+ }
+}
+
+// Values are now correlated to quantizer.
+static int sad_per_bit16lut_8[QINDEX_RANGE];
+static int sad_per_bit4lut_8[QINDEX_RANGE];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static int sad_per_bit16lut_10[QINDEX_RANGE];
+static int sad_per_bit4lut_10[QINDEX_RANGE];
+static int sad_per_bit16lut_12[QINDEX_RANGE];
+static int sad_per_bit4lut_12[QINDEX_RANGE];
+#endif
+
+static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
+ vpx_bit_depth_t bit_depth) {
+ int i;
+ // Initialize the sad lut tables using a formulaic calculation for now.
+ // This is to make it easier to resolve the impact of experimental changes
+ // to the quantizer tables.
+ for (i = 0; i < range; i++) {
+ const double q = vp10_convert_qindex_to_q(i, bit_depth);
+ bit16lut[i] = (int)(0.0418 * q + 2.4107);
+ bit4lut[i] = (int)(0.063 * q + 2.742);
+ }
+}
+
+void vp10_init_me_luts(void) {
+ init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
+ VPX_BITS_8);
+#if CONFIG_VP9_HIGHBITDEPTH
+ init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
+ VPX_BITS_10);
+ init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
+ VPX_BITS_12);
+#endif
+}
+
+static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
+ 8, 8, 4, 4, 2, 2, 1, 0 };
+static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
+ 128, 144, 128, 128, 144,
+#if CONFIG_EXT_REFS
+ // TODO(zoeliu): To adjust further following factor values.
+ 128, 128, 128
+ // TODO(weitinglin): We should investigate if the values should be the same
+ // as the value used by OVERLAY frame
+ ,
+ 144
+#endif // CONFIG_EXT_REFS
+};
+
+int vp10_compute_rd_mult(const VP10_COMP *cpi, int qindex) {
+ const int64_t q = vp10_dc_quant(qindex, 0, cpi->common.bit_depth);
+#if CONFIG_VP9_HIGHBITDEPTH
+ int64_t rdmult = 0;
+ switch (cpi->common.bit_depth) {
+ case VPX_BITS_8: rdmult = 88 * q * q / 24; break;
+ case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break;
+ case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break;
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1;
+ }
+#else
+ int64_t rdmult = 88 * q * q / 24;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
+ const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
+
+ rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
+ rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
+ }
+ if (rdmult < 1) rdmult = 1;
+ return (int)rdmult;
+}
+
+static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
+ double q;
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (bit_depth) {
+ case VPX_BITS_8: q = vp10_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
+ case VPX_BITS_10: q = vp10_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
+ case VPX_BITS_12: q = vp10_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; break;
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1;
+ }
+#else
+ (void)bit_depth;
+ q = vp10_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ // TODO(debargha): Adjust the function below.
+ return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
+}
+
+void vp10_initialize_me_consts(const VP10_COMP *cpi, MACROBLOCK *x,
+ int qindex) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (cpi->common.bit_depth) {
+ case VPX_BITS_8:
+ x->sadperbit16 = sad_per_bit16lut_8[qindex];
+ x->sadperbit4 = sad_per_bit4lut_8[qindex];
+ break;
+ case VPX_BITS_10:
+ x->sadperbit16 = sad_per_bit16lut_10[qindex];
+ x->sadperbit4 = sad_per_bit4lut_10[qindex];
+ break;
+ case VPX_BITS_12:
+ x->sadperbit16 = sad_per_bit16lut_12[qindex];
+ x->sadperbit4 = sad_per_bit4lut_12[qindex];
+ break;
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ }
+#else
+ (void)cpi;
+ x->sadperbit16 = sad_per_bit16lut_8[qindex];
+ x->sadperbit4 = sad_per_bit4lut_8[qindex];
+#endif // CONFIG_VP9_HIGHBITDEPTH
+}
+
+static void set_block_thresholds(const VP10_COMMON *cm, RD_OPT *rd) {
+ int i, bsize, segment_id;
+
+ for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
+ const int qindex =
+ clamp(vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
+ cm->y_dc_delta_q,
+ 0, MAXQ);
+ const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
+
+ for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
+ // Threshold here seems unnecessarily harsh but fine given actual
+ // range of values used for cpi->sf.thresh_mult[].
+ const int t = q * rd_thresh_block_size_factor[bsize];
+ const int thresh_max = INT_MAX / t;
+
+ if (bsize >= BLOCK_8X8) {
+ for (i = 0; i < MAX_MODES; ++i)
+ rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
+ ? rd->thresh_mult[i] * t / 4
+ : INT_MAX;
+ } else {
+ for (i = 0; i < MAX_REFS; ++i)
+ rd->threshes[segment_id][bsize][i] =
+ rd->thresh_mult_sub8x8[i] < thresh_max
+ ? rd->thresh_mult_sub8x8[i] * t / 4
+ : INT_MAX;
+ }
+ }
+ }
+}
+
+#if CONFIG_REF_MV
+void vp10_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame) {
+ MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
+ int nmv_ctx = vp10_nmv_ctx(mbmi_ext->ref_mv_count[ref_frame],
+ mbmi_ext->ref_mv_stack[ref_frame]);
+ x->mvcost = x->mv_cost_stack[nmv_ctx];
+ x->nmvjointcost = x->nmv_vec_cost[nmv_ctx];
+ x->mvsadcost = x->mvcost;
+ x->nmvjointsadcost = x->nmvjointcost;
+
+ x->nmv_vec_cost[nmv_ctx][MV_JOINT_ZERO] =
+ x->zero_rmv_cost[nmv_ctx][1] - x->zero_rmv_cost[nmv_ctx][0];
+}
+#endif
+
+void vp10_initialize_rd_consts(VP10_COMP *cpi) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->td.mb;
+ RD_OPT *const rd = &cpi->rd;
+ int i;
+
+ vpx_clear_system_state();
+
+ rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
+ rd->RDMULT = vp10_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
+
+ set_error_per_bit(x, rd->RDMULT);
+
+ set_block_thresholds(cm, rd);
+
+ if (!frame_is_intra_only(cm)) {
+#if CONFIG_REF_MV
+ int nmv_ctx;
+
+ for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
+ vpx_prob tmp_prob = cm->fc->nmvc[nmv_ctx].joints[MV_JOINT_ZERO];
+ cm->fc->nmvc[nmv_ctx].joints[MV_JOINT_ZERO] = 1;
+
+ vp10_build_nmv_cost_table(
+ x->nmv_vec_cost[nmv_ctx],
+ cm->allow_high_precision_mv ? x->nmvcost_hp[nmv_ctx]
+ : x->nmvcost[nmv_ctx],
+ &cm->fc->nmvc[nmv_ctx], cm->allow_high_precision_mv);
+ cm->fc->nmvc[nmv_ctx].joints[MV_JOINT_ZERO] = tmp_prob;
+
+ x->nmv_vec_cost[nmv_ctx][MV_JOINT_ZERO] = 0;
+ x->zero_rmv_cost[nmv_ctx][0] =
+ vp10_cost_bit(cm->fc->nmvc[nmv_ctx].zero_rmv, 0);
+ x->zero_rmv_cost[nmv_ctx][1] =
+ vp10_cost_bit(cm->fc->nmvc[nmv_ctx].zero_rmv, 1);
+ }
+ x->mvcost = x->mv_cost_stack[0];
+ x->nmvjointcost = x->nmv_vec_cost[0];
+ x->mvsadcost = x->mvcost;
+ x->nmvjointsadcost = x->nmvjointcost;
+#else
+ vp10_build_nmv_cost_table(
+ x->nmvjointcost,
+ cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, &cm->fc->nmvc,
+ cm->allow_high_precision_mv);
+#endif
+ }
+ if (cpi->oxcf.pass != 1) {
+ vp10_fill_token_costs(x->token_costs,
+#if CONFIG_ANS
+ cm->fc->coef_cdfs,
+#endif // CONFIG_ANS
+ cm->fc->coef_probs);
+
+ if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
+ cm->frame_type == KEY_FRAME) {
+#if CONFIG_EXT_PARTITION_TYPES
+ vp10_cost_tokens(cpi->partition_cost[0], cm->fc->partition_prob[0],
+ vp10_partition_tree);
+ for (i = 1; i < PARTITION_CONTEXTS; ++i)
+ vp10_cost_tokens(cpi->partition_cost[i], cm->fc->partition_prob[i],
+ vp10_ext_partition_tree);
+#else
+ for (i = 0; i < PARTITION_CONTEXTS; ++i)
+ vp10_cost_tokens(cpi->partition_cost[i], cm->fc->partition_prob[i],
+ vp10_partition_tree);
+#endif // CONFIG_EXT_PARTITION_TYPES
+ }
+
+ fill_mode_costs(cpi);
+
+ if (!frame_is_intra_only(cm)) {
+#if CONFIG_REF_MV
+ for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
+ cpi->newmv_mode_cost[i][0] = vp10_cost_bit(cm->fc->newmv_prob[i], 0);
+ cpi->newmv_mode_cost[i][1] = vp10_cost_bit(cm->fc->newmv_prob[i], 1);
+ }
+
+ for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i) {
+ cpi->zeromv_mode_cost[i][0] = vp10_cost_bit(cm->fc->zeromv_prob[i], 0);
+ cpi->zeromv_mode_cost[i][1] = vp10_cost_bit(cm->fc->zeromv_prob[i], 1);
+ }
+
+ for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
+ cpi->refmv_mode_cost[i][0] = vp10_cost_bit(cm->fc->refmv_prob[i], 0);
+ cpi->refmv_mode_cost[i][1] = vp10_cost_bit(cm->fc->refmv_prob[i], 1);
+ }
+
+ for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
+ cpi->drl_mode_cost0[i][0] = vp10_cost_bit(cm->fc->drl_prob[i], 0);
+ cpi->drl_mode_cost0[i][1] = vp10_cost_bit(cm->fc->drl_prob[i], 1);
+ }
+#if CONFIG_EXT_INTER
+ cpi->new2mv_mode_cost[0] = vp10_cost_bit(cm->fc->new2mv_prob, 0);
+ cpi->new2mv_mode_cost[1] = vp10_cost_bit(cm->fc->new2mv_prob, 1);
+#endif // CONFIG_EXT_INTER
+#else
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+ vp10_cost_tokens((int *)cpi->inter_mode_cost[i],
+ cm->fc->inter_mode_probs[i], vp10_inter_mode_tree);
+#endif // CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+ vp10_cost_tokens((int *)cpi->inter_compound_mode_cost[i],
+ cm->fc->inter_compound_mode_probs[i],
+ vp10_inter_compound_mode_tree);
+ for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
+ vp10_cost_tokens((int *)cpi->interintra_mode_cost[i],
+ cm->fc->interintra_mode_prob[i],
+ vp10_interintra_mode_tree);
+#endif // CONFIG_EXT_INTER
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ for (i = BLOCK_8X8; i < BLOCK_SIZES; i++) {
+ vp10_cost_tokens((int *)cpi->motvar_cost[i], cm->fc->motvar_prob[i],
+ vp10_motvar_tree);
+ }
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ }
+ }
+}
+
+static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
+ // NOTE: The tables below must be of the same size.
+
+ // The functions described below are sampled at the four most significant
+ // bits of x^2 + 8 / 256.
+
+ // Normalized rate:
+ // This table models the rate for a Laplacian source with given variance
+ // when quantized with a uniform quantizer with given stepsize. The
+ // closed form expression is:
+ // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
+ // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
+ // and H(x) is the binary entropy function.
+ static const int rate_tab_q10[] = {
+ 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
+ 4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
+ 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
+ 2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
+ 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
+ 911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424,
+ 395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87,
+ 73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6,
+ 5, 3, 2, 1, 1, 1, 0, 0,
+ };
+ // Normalized distortion:
+ // This table models the normalized distortion for a Laplacian source
+ // with given variance when quantized with a uniform quantizer
+ // with given stepsize. The closed form expression is:
+ // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
+ // where x = qpstep / sqrt(variance).
+ // Note the actual distortion is Dn * variance.
+ static const int dist_tab_q10[] = {
+ 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5,
+ 5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17,
+ 18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54,
+ 59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142,
+ 151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351,
+ 375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659,
+ 680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936,
+ 949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
+ 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
+ };
+ static const int xsq_iq_q10[] = {
+ 0, 4, 8, 12, 16, 20, 24, 28, 32,
+ 40, 48, 56, 64, 72, 80, 88, 96, 112,
+ 128, 144, 160, 176, 192, 208, 224, 256, 288,
+ 320, 352, 384, 416, 448, 480, 544, 608, 672,
+ 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
+ 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
+ 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
+ 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
+ 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
+ 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
+ 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
+ 180192, 196576, 212960, 229344, 245728,
+ };
+ const int tmp = (xsq_q10 >> 2) + 8;
+ const int k = get_msb(tmp) - 3;
+ const int xq = (k << 3) + ((tmp >> k) & 0x7);
+ const int one_q10 = 1 << 10;
+ const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
+ const int b_q10 = one_q10 - a_q10;
+ *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
+ *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
+}
+
+void vp10_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
+ unsigned int qstep, int *rate,
+ int64_t *dist) {
+ // This function models the rate and distortion for a Laplacian
+ // source with given variance when quantized with a uniform quantizer
+ // with given stepsize. The closed form expressions are in:
+ // Hang and Chen, "Source Model for transform video coder and its
+ // application - Part I: Fundamental Theory", IEEE Trans. Circ.
+ // Sys. for Video Tech., April 1997.
+ if (var == 0) {
+ *rate = 0;
+ *dist = 0;
+ } else {
+ int d_q10, r_q10;
+ static const uint32_t MAX_XSQ_Q10 = 245727;
+ const uint64_t xsq_q10_64 =
+ (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
+ const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
+ model_rd_norm(xsq_q10, &r_q10, &d_q10);
+ *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP10_PROB_COST_SHIFT);
+ *dist = (var * (int64_t)d_q10 + 512) >> 10;
+ }
+}
+
+static void get_entropy_contexts_plane(
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size, const struct macroblockd_plane *pd,
+ ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE],
+ ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE]) {
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ const ENTROPY_CONTEXT *const above = pd->above_context;
+ const ENTROPY_CONTEXT *const left = pd->left_context;
+
+ int i;
+ switch (tx_size) {
+ case TX_4X4:
+ memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
+ memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
+ break;
+ case TX_8X8:
+ for (i = 0; i < num_4x4_w; i += 2)
+ t_above[i] = !!*(const uint16_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 2)
+ t_left[i] = !!*(const uint16_t *)&left[i];
+ break;
+ case TX_16X16:
+ for (i = 0; i < num_4x4_w; i += 4)
+ t_above[i] = !!*(const uint32_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 4)
+ t_left[i] = !!*(const uint32_t *)&left[i];
+ break;
+ case TX_32X32:
+ for (i = 0; i < num_4x4_w; i += 8)
+ t_above[i] = !!*(const uint64_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 8)
+ t_left[i] = !!*(const uint64_t *)&left[i];
+ break;
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ case TX_4X8:
+ memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
+ for (i = 0; i < num_4x4_h; i += 2)
+ t_left[i] = !!*(const uint16_t *)&left[i];
+ break;
+ case TX_8X4:
+ for (i = 0; i < num_4x4_w; i += 2)
+ t_above[i] = !!*(const uint16_t *)&above[i];
+ memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
+ break;
+ case TX_8X16:
+ for (i = 0; i < num_4x4_w; i += 2)
+ t_above[i] = !!*(const uint16_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 4)
+ t_left[i] = !!*(const uint32_t *)&left[i];
+ break;
+ case TX_16X8:
+ for (i = 0; i < num_4x4_w; i += 4)
+ t_above[i] = !!*(const uint32_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 2)
+ t_left[i] = !!*(const uint16_t *)&left[i];
+ break;
+ case TX_16X32:
+ for (i = 0; i < num_4x4_w; i += 4)
+ t_above[i] = !!*(const uint32_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 8)
+ t_left[i] = !!*(const uint64_t *)&left[i];
+ break;
+ case TX_32X16:
+ for (i = 0; i < num_4x4_w; i += 8)
+ t_above[i] = !!*(const uint64_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 4)
+ t_left[i] = !!*(const uint32_t *)&left[i];
+ break;
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+ default: assert(0 && "Invalid transform size."); break;
+ }
+}
+
+void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
+ const struct macroblockd_plane *pd,
+ ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE],
+ ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE]) {
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ get_entropy_contexts_plane(plane_bsize, tx_size, pd, t_above, t_left);
+}
+
+void vp10_mv_pred(VP10_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
+ int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
+ int i;
+ int zero_seen = 0;
+ int best_index = 0;
+ int best_sad = INT_MAX;
+ int this_sad = INT_MAX;
+ int max_mv = 0;
+ int near_same_nearest;
+ uint8_t *src_y_ptr = x->plane[0].src.buf;
+ uint8_t *ref_y_ptr;
+ const int num_mv_refs =
+ MAX_MV_REF_CANDIDATES +
+ (cpi->sf.adaptive_motion_search && block_size < x->max_partition_size);
+
+ MV pred_mv[3];
+ pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
+ pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
+ pred_mv[2] = x->pred_mv[ref_frame];
+ assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
+
+ near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
+ x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
+ // Get the sad for each candidate reference mv.
+ for (i = 0; i < num_mv_refs; ++i) {
+ const MV *this_mv = &pred_mv[i];
+ int fp_row, fp_col;
+
+ if (i == 1 && near_same_nearest) continue;
+ fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
+ fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
+ max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
+
+ if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
+ zero_seen |= (fp_row == 0 && fp_col == 0);
+
+ ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
+ // Find sad for current vector.
+ this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
+ ref_y_ptr, ref_y_stride);
+ // Note if it is the best so far.
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ best_index = i;
+ }
+ }
+
+ // Note the index of the mv that worked best in the reference list.
+ x->mv_best_ref_index[ref_frame] = best_index;
+ x->max_mv_context[ref_frame] = max_mv;
+ x->pred_mv_sad[ref_frame] = best_sad;
+}
+
+void vp10_setup_pred_block(const MACROBLOCKD *xd,
+ struct buf_2d dst[MAX_MB_PLANE],
+ const YV12_BUFFER_CONFIG *src, int mi_row,
+ int mi_col, const struct scale_factors *scale,
+ const struct scale_factors *scale_uv) {
+ int i;
+
+ dst[0].buf = src->y_buffer;
+ dst[0].stride = src->y_stride;
+ dst[1].buf = src->u_buffer;
+ dst[2].buf = src->v_buffer;
+ dst[1].stride = dst[2].stride = src->uv_stride;
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ setup_pred_plane(dst + i, dst[i].buf,
+ i ? src->uv_crop_width : src->y_crop_width,
+ i ? src->uv_crop_height : src->y_crop_height,
+ dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
+ xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
+ }
+}
+
+int vp10_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
+ int stride) {
+ const int bw = b_width_log2_lookup[plane_bsize];
+ const int y = 4 * (raster_block >> bw);
+ const int x = 4 * (raster_block & ((1 << bw) - 1));
+ return y * stride + x;
+}
+
+int16_t *vp10_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
+ int raster_block, int16_t *base) {
+ const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ return base + vp10_raster_block_offset(plane_bsize, raster_block, stride);
+}
+
+YV12_BUFFER_CONFIG *vp10_get_scaled_ref_frame(const VP10_COMP *cpi,
+ int ref_frame) {
+ const VP10_COMMON *const cm = &cpi->common;
+ const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
+ const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+ return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
+ ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
+ : NULL;
+}
+
+#if CONFIG_DUAL_FILTER
+int vp10_get_switchable_rate(const VP10_COMP *cpi,
+ const MACROBLOCKD *const xd) {
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int inter_filter_cost = 0;
+ int dir;
+
+ for (dir = 0; dir < 2; ++dir) {
+ if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
+ (mbmi->ref_frame[1] > INTRA_FRAME &&
+ has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
+ const int ctx = vp10_get_pred_context_switchable_interp(xd, dir);
+ inter_filter_cost +=
+ cpi->switchable_interp_costs[ctx][mbmi->interp_filter[dir]];
+ }
+ }
+ return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
+}
+#else
+int vp10_get_switchable_rate(const VP10_COMP *cpi,
+ const MACROBLOCKD *const xd) {
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const int ctx = vp10_get_pred_context_switchable_interp(xd);
+#if CONFIG_EXT_INTERP
+ if (!vp10_is_interp_needed(xd)) return 0;
+#endif // CONFIG_EXT_INTERP
+ return SWITCHABLE_INTERP_RATE_FACTOR *
+ cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
+}
+#endif
+
+void vp10_set_rd_speed_thresholds(VP10_COMP *cpi) {
+ int i;
+ RD_OPT *const rd = &cpi->rd;
+ SPEED_FEATURES *const sf = &cpi->sf;
+
+ // Set baseline threshold values.
+ for (i = 0; i < MAX_MODES; ++i)
+ rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
+
+ if (sf->adaptive_rd_thresh) {
+ rd->thresh_mult[THR_NEARESTMV] = 300;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEARESTL2] = 300;
+ rd->thresh_mult[THR_NEARESTL3] = 300;
+ rd->thresh_mult[THR_NEARESTB] = 300;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEARESTA] = 300;
+ rd->thresh_mult[THR_NEARESTG] = 300;
+ } else {
+ rd->thresh_mult[THR_NEARESTMV] = 0;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEARESTL2] = 0;
+ rd->thresh_mult[THR_NEARESTL3] = 0;
+ rd->thresh_mult[THR_NEARESTB] = 0;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEARESTA] = 0;
+ rd->thresh_mult[THR_NEARESTG] = 0;
+ }
+
+ rd->thresh_mult[THR_DC] += 1000;
+
+ rd->thresh_mult[THR_NEWMV] += 1000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEWL2] += 1000;
+ rd->thresh_mult[THR_NEWL3] += 1000;
+ rd->thresh_mult[THR_NEWB] += 1000;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEWA] += 1000;
+ rd->thresh_mult[THR_NEWG] += 1000;
+
+ rd->thresh_mult[THR_NEARMV] += 1000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEARL2] += 1000;
+ rd->thresh_mult[THR_NEARL3] += 1000;
+ rd->thresh_mult[THR_NEARB] += 1000;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEARA] += 1000;
+ rd->thresh_mult[THR_NEARG] += 1000;
+
+#if CONFIG_EXT_INTER
+ rd->thresh_mult[THR_NEWFROMNEARMV] += 1000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEWFROMNEARL2] += 1000;
+ rd->thresh_mult[THR_NEWFROMNEARL3] += 1000;
+ rd->thresh_mult[THR_NEWFROMNEARB] += 1000;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_NEWFROMNEARA] += 1000;
+ rd->thresh_mult[THR_NEWFROMNEARG] += 1000;
+#endif // CONFIG_EXT_INTER
+
+ rd->thresh_mult[THR_ZEROMV] += 2000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_ZEROL2] += 2000;
+ rd->thresh_mult[THR_ZEROL3] += 2000;
+ rd->thresh_mult[THR_ZEROB] += 2000;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_ZEROG] += 2000;
+ rd->thresh_mult[THR_ZEROA] += 2000;
+
+ rd->thresh_mult[THR_TM] += 1000;
+
+#if CONFIG_EXT_INTER
+
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] += 1000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] += 1000;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] += 1000;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] += 1000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] += 1000;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] += 1000;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] += 1000;
+ rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] += 1000;
+#endif // CONFIG_EXT_REFS
+
+#else // CONFIG_EXT_INTER
+
+ rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEARESTL2A] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTL3A] += 1000;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEARESTLB] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTL2B] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTL3B] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTGB] += 1000;
+#endif // CONFIG_EXT_REFS
+
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_EXT_INTER
+
+ rd->thresh_mult[THR_COMP_NEAREST_NEARLA] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTLA] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARLA] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWLA] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTLA] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWLA] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARLA] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWLA] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROLA] += 2500;
+
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEAREST_NEARL2A] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTL2A] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARL2A] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWL2A] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARL2A] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWL2A] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROL2A] += 2500;
+
+ rd->thresh_mult[THR_COMP_NEAREST_NEARL3A] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTL3A] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARL3A] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWL3A] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARL3A] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWL3A] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROL3A] += 2500;
+#endif // CONFIG_EXT_REFS
+
+ rd->thresh_mult[THR_COMP_NEAREST_NEARGA] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTGA] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARGA] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWGA] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTGA] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWGA] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARGA] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWGA] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROGA] += 2500;
+
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEAREST_NEARLB] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTLB] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARLB] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWLB] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTLB] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWLB] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARLB] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWLB] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROLB] += 2500;
+
+ rd->thresh_mult[THR_COMP_NEAREST_NEARL2B] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTL2B] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARL2B] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWL2B] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARL2B] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWL2B] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROL2B] += 2500;
+
+ rd->thresh_mult[THR_COMP_NEAREST_NEARL3B] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTL3B] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARL3B] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWL3B] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARL3B] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWL3B] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROL3B] += 2500;
+
+ rd->thresh_mult[THR_COMP_NEAREST_NEARGB] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARESTGB] += 1200;
+ rd->thresh_mult[THR_COMP_NEAR_NEARGB] += 1200;
+ rd->thresh_mult[THR_COMP_NEAREST_NEWGB] += 1500;
+ rd->thresh_mult[THR_COMP_NEW_NEARESTGB] += 1500;
+ rd->thresh_mult[THR_COMP_NEAR_NEWGB] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEARGB] += 1700;
+ rd->thresh_mult[THR_COMP_NEW_NEWGB] += 2000;
+ rd->thresh_mult[THR_COMP_ZERO_ZEROGB] += 2500;
+#endif // CONFIG_EXT_REFS
+
+#else // CONFIG_EXT_INTER
+
+ rd->thresh_mult[THR_COMP_NEARLA] += 1500;
+ rd->thresh_mult[THR_COMP_NEWLA] += 2000;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEARL2A] += 1500;
+ rd->thresh_mult[THR_COMP_NEWL2A] += 2000;
+ rd->thresh_mult[THR_COMP_NEARL3A] += 1500;
+ rd->thresh_mult[THR_COMP_NEWL3A] += 2000;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEARGA] += 1500;
+ rd->thresh_mult[THR_COMP_NEWGA] += 2000;
+
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_NEARLB] += 1500;
+ rd->thresh_mult[THR_COMP_NEWLB] += 2000;
+ rd->thresh_mult[THR_COMP_NEARL2B] += 1500;
+ rd->thresh_mult[THR_COMP_NEWL2B] += 2000;
+ rd->thresh_mult[THR_COMP_NEARL3B] += 1500;
+ rd->thresh_mult[THR_COMP_NEWL3B] += 2000;
+ rd->thresh_mult[THR_COMP_NEARGB] += 1500;
+ rd->thresh_mult[THR_COMP_NEWGB] += 2000;
+#endif // CONFIG_EXT_REFS
+
+ rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_ZEROL2A] += 2500;
+ rd->thresh_mult[THR_COMP_ZEROL3A] += 2500;
+#endif // CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
+
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_ZEROLB] += 2500;
+ rd->thresh_mult[THR_COMP_ZEROL2B] += 2500;
+ rd->thresh_mult[THR_COMP_ZEROL3B] += 2500;
+ rd->thresh_mult[THR_COMP_ZEROGB] += 2500;
+#endif // CONFIG_EXT_REFS
+
+#endif // CONFIG_EXT_INTER
+
+ rd->thresh_mult[THR_H_PRED] += 2000;
+ rd->thresh_mult[THR_V_PRED] += 2000;
+ rd->thresh_mult[THR_D135_PRED] += 2500;
+ rd->thresh_mult[THR_D207_PRED] += 2500;
+ rd->thresh_mult[THR_D153_PRED] += 2500;
+ rd->thresh_mult[THR_D63_PRED] += 2500;
+ rd->thresh_mult[THR_D117_PRED] += 2500;
+ rd->thresh_mult[THR_D45_PRED] += 2500;
+
+#if CONFIG_EXT_INTER
+ rd->thresh_mult[THR_COMP_INTERINTRA_ZEROL] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTL] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEARL] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEWL] += 2000;
+
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_INTERINTRA_ZEROL2] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTL2] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEARL2] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEWL2] += 2000;
+
+ rd->thresh_mult[THR_COMP_INTERINTRA_ZEROL3] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTL3] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEARL3] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEWL3] += 2000;
+#endif // CONFIG_EXT_REFS
+
+ rd->thresh_mult[THR_COMP_INTERINTRA_ZEROG] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTG] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEARG] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEWG] += 2000;
+
+#if CONFIG_EXT_REFS
+ rd->thresh_mult[THR_COMP_INTERINTRA_ZEROB] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTB] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEARB] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEWB] += 2000;
+#endif // CONFIG_EXT_REFS
+
+ rd->thresh_mult[THR_COMP_INTERINTRA_ZEROA] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEARESTA] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEARA] += 1500;
+ rd->thresh_mult[THR_COMP_INTERINTRA_NEWA] += 2000;
+#endif // CONFIG_EXT_INTER
+}
+
+void vp10_set_rd_speed_thresholds_sub8x8(VP10_COMP *cpi) {
+ static const int thresh_mult[2][MAX_REFS] = {
+#if CONFIG_EXT_REFS
+ { 2500, 2500, 2500, 2500, 2500, 2500, 4500, 4500, 4500, 4500, 4500, 4500,
+ 4500, 4500, 2500 },
+ { 2000, 2000, 2000, 2000, 2000, 2000, 4000, 4000, 4000, 4000, 4000, 4000,
+ 4000, 4000, 2000 }
+#else
+ { 2500, 2500, 2500, 4500, 4500, 2500 },
+ { 2000, 2000, 2000, 4000, 4000, 2000 }
+#endif // CONFIG_EXT_REFS
+ };
+ RD_OPT *const rd = &cpi->rd;
+ const int idx = cpi->oxcf.mode == BEST;
+ memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
+}
+
+void vp10_update_rd_thresh_fact(const VP10_COMMON *const cm,
+ int (*factor_buf)[MAX_MODES], int rd_thresh,
+ int bsize, int best_mode_index) {
+ if (rd_thresh > 0) {
+ const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
+ int mode;
+ for (mode = 0; mode < top_mode; ++mode) {
+ const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
+ const BLOCK_SIZE max_size = VPXMIN(bsize + 2, cm->sb_size);
+ BLOCK_SIZE bs;
+ for (bs = min_size; bs <= max_size; ++bs) {
+ int *const fact = &factor_buf[bs][mode];
+ if (mode == best_mode_index) {
+ *fact -= (*fact >> 4);
+ } else {
+ *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
+ }
+ }
+ }
+ }
+}
+
+int vp10_get_intra_cost_penalty(int qindex, int qdelta,
+ vpx_bit_depth_t bit_depth) {
+ const int q = vp10_dc_quant(qindex, qdelta, bit_depth);
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (bit_depth) {
+ case VPX_BITS_8: return 20 * q;
+ case VPX_BITS_10: return 5 * q;
+ case VPX_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
+ default:
+ assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ return -1;
+ }
+#else
+ return 20 * q;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+}
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
new file mode 100644
index 0000000..9680215
--- /dev/null
+++ b/av1/encoder/rd.h
@@ -0,0 +1,464 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_RD_H_
+#define VP10_ENCODER_RD_H_
+
+#include <limits.h>
+
+#if CONFIG_ANS
+#include "av1/common/ans.h"
+#endif // CONFIG_ANS
+#include "av1/common/blockd.h"
+
+#include "av1/encoder/block.h"
+#include "av1/encoder/context_tree.h"
+#include "av1/encoder/cost.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RDDIV_BITS 7
+#define RD_EPB_SHIFT 6
+
+#define RDCOST(RM, DM, R, D) \
+ (ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP10_PROB_COST_SHIFT) + (D << DM))
+
+#define RDCOST_DBL(RM, DM, R, D) \
+ (((((double)(R)) * (RM)) / (double)(1 << VP10_PROB_COST_SHIFT)) + \
+ ((double)(D) * (1 << (DM))))
+
+#define QIDX_SKIP_THRESH 115
+
+#define MV_COST_WEIGHT 108
+#define MV_COST_WEIGHT_SUB 120
+
+#define INVALID_MV 0x80008000
+
+#if CONFIG_EXT_REFS
+
+#if CONFIG_EXT_INTER
+#define MAX_MODES 144
+#else // CONFIG_EXT_INTER
+#define MAX_MODES 66
+#endif // CONFIG_EXT_INTER
+
+#else // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_INTER
+#define MAX_MODES 57
+#else // CONFIG_EXT_INTER
+#define MAX_MODES 30
+#endif // CONFIG_EXT_INTER
+
+#endif // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+#define MAX_REFS 15
+#else
+#define MAX_REFS 6
+#endif // CONFIG_EXT_REFS
+
+#define RD_THRESH_MAX_FACT 64
+#define RD_THRESH_INC 1
+
+// This enumerator type needs to be kept aligned with the mode order in
+// const MODE_DEFINITION vp10_mode_order[MAX_MODES] used in the rd code.
+typedef enum {
+ THR_NEARESTMV,
+#if CONFIG_EXT_REFS
+ THR_NEARESTL2,
+ THR_NEARESTL3,
+ THR_NEARESTB,
+#endif // CONFIG_EXT_REFS
+ THR_NEARESTA,
+ THR_NEARESTG,
+
+ THR_DC,
+
+ THR_NEWMV,
+#if CONFIG_EXT_REFS
+ THR_NEWL2,
+ THR_NEWL3,
+ THR_NEWB,
+#endif // CONFIG_EXT_REFS
+ THR_NEWA,
+ THR_NEWG,
+
+ THR_NEARMV,
+#if CONFIG_EXT_REFS
+ THR_NEARL2,
+ THR_NEARL3,
+ THR_NEARB,
+#endif // CONFIG_EXT_REFS
+ THR_NEARA,
+ THR_NEARG,
+
+#if CONFIG_EXT_INTER
+ THR_NEWFROMNEARMV,
+#if CONFIG_EXT_REFS
+ THR_NEWFROMNEARL2,
+ THR_NEWFROMNEARL3,
+ THR_NEWFROMNEARB,
+#endif // CONFIG_EXT_REFS
+ THR_NEWFROMNEARA,
+ THR_NEWFROMNEARG,
+#endif // CONFIG_EXT_INTER
+
+ THR_ZEROMV,
+#if CONFIG_EXT_REFS
+ THR_ZEROL2,
+ THR_ZEROL3,
+ THR_ZEROB,
+#endif // CONFIG_EXT_REFS
+ THR_ZEROG,
+ THR_ZEROA,
+
+#if CONFIG_EXT_INTER
+
+ THR_COMP_NEAREST_NEARESTLA,
+#if CONFIG_EXT_REFS
+ THR_COMP_NEAREST_NEARESTL2A,
+ THR_COMP_NEAREST_NEARESTL3A,
+#endif // CONFIG_EXT_REFS
+ THR_COMP_NEAREST_NEARESTGA,
+#if CONFIG_EXT_REFS
+ THR_COMP_NEAREST_NEARESTLB,
+ THR_COMP_NEAREST_NEARESTL2B,
+ THR_COMP_NEAREST_NEARESTL3B,
+ THR_COMP_NEAREST_NEARESTGB,
+#endif // CONFIG_EXT_REFS
+
+#else // CONFIG_EXT_INTER
+
+ THR_COMP_NEARESTLA,
+#if CONFIG_EXT_REFS
+ THR_COMP_NEARESTL2A,
+ THR_COMP_NEARESTL3A,
+#endif // CONFIG_EXT_REFS
+ THR_COMP_NEARESTGA,
+#if CONFIG_EXT_REFS
+ THR_COMP_NEARESTLB,
+ THR_COMP_NEARESTL2B,
+ THR_COMP_NEARESTL3B,
+ THR_COMP_NEARESTGB,
+#endif // CONFIG_EXT_REFS
+
+#endif // CONFIG_EXT_INTER
+
+ THR_TM,
+
+#if CONFIG_EXT_INTER
+
+ THR_COMP_NEAR_NEARESTLA,
+ THR_COMP_NEAREST_NEARLA,
+ THR_COMP_NEAR_NEARLA,
+ THR_COMP_NEW_NEARESTLA,
+ THR_COMP_NEAREST_NEWLA,
+ THR_COMP_NEW_NEARLA,
+ THR_COMP_NEAR_NEWLA,
+ THR_COMP_NEW_NEWLA,
+ THR_COMP_ZERO_ZEROLA,
+
+#if CONFIG_EXT_REFS
+ THR_COMP_NEAR_NEARESTL2A,
+ THR_COMP_NEAREST_NEARL2A,
+ THR_COMP_NEAR_NEARL2A,
+ THR_COMP_NEW_NEARESTL2A,
+ THR_COMP_NEAREST_NEWL2A,
+ THR_COMP_NEW_NEARL2A,
+ THR_COMP_NEAR_NEWL2A,
+ THR_COMP_NEW_NEWL2A,
+ THR_COMP_ZERO_ZEROL2A,
+
+ THR_COMP_NEAR_NEARESTL3A,
+ THR_COMP_NEAREST_NEARL3A,
+ THR_COMP_NEAR_NEARL3A,
+ THR_COMP_NEW_NEARESTL3A,
+ THR_COMP_NEAREST_NEWL3A,
+ THR_COMP_NEW_NEARL3A,
+ THR_COMP_NEAR_NEWL3A,
+ THR_COMP_NEW_NEWL3A,
+ THR_COMP_ZERO_ZEROL3A,
+#endif // CONFIG_EXT_REFS
+
+ THR_COMP_NEAR_NEARESTGA,
+ THR_COMP_NEAREST_NEARGA,
+ THR_COMP_NEAR_NEARGA,
+ THR_COMP_NEW_NEARESTGA,
+ THR_COMP_NEAREST_NEWGA,
+ THR_COMP_NEW_NEARGA,
+ THR_COMP_NEAR_NEWGA,
+ THR_COMP_NEW_NEWGA,
+ THR_COMP_ZERO_ZEROGA,
+
+#if CONFIG_EXT_REFS
+ THR_COMP_NEAR_NEARESTLB,
+ THR_COMP_NEAREST_NEARLB,
+ THR_COMP_NEAR_NEARLB,
+ THR_COMP_NEW_NEARESTLB,
+ THR_COMP_NEAREST_NEWLB,
+ THR_COMP_NEW_NEARLB,
+ THR_COMP_NEAR_NEWLB,
+ THR_COMP_NEW_NEWLB,
+ THR_COMP_ZERO_ZEROLB,
+
+ THR_COMP_NEAR_NEARESTL2B,
+ THR_COMP_NEAREST_NEARL2B,
+ THR_COMP_NEAR_NEARL2B,
+ THR_COMP_NEW_NEARESTL2B,
+ THR_COMP_NEAREST_NEWL2B,
+ THR_COMP_NEW_NEARL2B,
+ THR_COMP_NEAR_NEWL2B,
+ THR_COMP_NEW_NEWL2B,
+ THR_COMP_ZERO_ZEROL2B,
+
+ THR_COMP_NEAR_NEARESTL3B,
+ THR_COMP_NEAREST_NEARL3B,
+ THR_COMP_NEAR_NEARL3B,
+ THR_COMP_NEW_NEARESTL3B,
+ THR_COMP_NEAREST_NEWL3B,
+ THR_COMP_NEW_NEARL3B,
+ THR_COMP_NEAR_NEWL3B,
+ THR_COMP_NEW_NEWL3B,
+ THR_COMP_ZERO_ZEROL3B,
+
+ THR_COMP_NEAR_NEARESTGB,
+ THR_COMP_NEAREST_NEARGB,
+ THR_COMP_NEAR_NEARGB,
+ THR_COMP_NEW_NEARESTGB,
+ THR_COMP_NEAREST_NEWGB,
+ THR_COMP_NEW_NEARGB,
+ THR_COMP_NEAR_NEWGB,
+ THR_COMP_NEW_NEWGB,
+ THR_COMP_ZERO_ZEROGB,
+#endif // CONFIG_EXT_REFS
+
+#else // CONFIG_EXT_INTER
+
+ THR_COMP_NEARLA,
+ THR_COMP_NEWLA,
+#if CONFIG_EXT_REFS
+ THR_COMP_NEARL2A,
+ THR_COMP_NEWL2A,
+ THR_COMP_NEARL3A,
+ THR_COMP_NEWL3A,
+#endif // CONFIG_EXT_REFS
+ THR_COMP_NEARGA,
+ THR_COMP_NEWGA,
+
+#if CONFIG_EXT_REFS
+ THR_COMP_NEARLB,
+ THR_COMP_NEWLB,
+ THR_COMP_NEARL2B,
+ THR_COMP_NEWL2B,
+ THR_COMP_NEARL3B,
+ THR_COMP_NEWL3B,
+ THR_COMP_NEARGB,
+ THR_COMP_NEWGB,
+#endif // CONFIG_EXT_REFS
+
+ THR_COMP_ZEROLA,
+#if CONFIG_EXT_REFS
+ THR_COMP_ZEROL2A,
+ THR_COMP_ZEROL3A,
+#endif // CONFIG_EXT_REFS
+ THR_COMP_ZEROGA,
+
+#if CONFIG_EXT_REFS
+ THR_COMP_ZEROLB,
+ THR_COMP_ZEROL2B,
+ THR_COMP_ZEROL3B,
+ THR_COMP_ZEROGB,
+#endif // CONFIG_EXT_REFS
+
+#endif // CONFIG_EXT_INTER
+
+ THR_H_PRED,
+ THR_V_PRED,
+ THR_D135_PRED,
+ THR_D207_PRED,
+ THR_D153_PRED,
+ THR_D63_PRED,
+ THR_D117_PRED,
+ THR_D45_PRED,
+
+#if CONFIG_EXT_INTER
+ THR_COMP_INTERINTRA_ZEROL,
+ THR_COMP_INTERINTRA_NEARESTL,
+ THR_COMP_INTERINTRA_NEARL,
+ THR_COMP_INTERINTRA_NEWL,
+
+#if CONFIG_EXT_REFS
+ THR_COMP_INTERINTRA_ZEROL2,
+ THR_COMP_INTERINTRA_NEARESTL2,
+ THR_COMP_INTERINTRA_NEARL2,
+ THR_COMP_INTERINTRA_NEWL2,
+
+ THR_COMP_INTERINTRA_ZEROL3,
+ THR_COMP_INTERINTRA_NEARESTL3,
+ THR_COMP_INTERINTRA_NEARL3,
+ THR_COMP_INTERINTRA_NEWL3,
+#endif // CONFIG_EXT_REFS
+
+ THR_COMP_INTERINTRA_ZEROG,
+ THR_COMP_INTERINTRA_NEARESTG,
+ THR_COMP_INTERINTRA_NEARG,
+ THR_COMP_INTERINTRA_NEWG,
+
+#if CONFIG_EXT_REFS
+ THR_COMP_INTERINTRA_ZEROB,
+ THR_COMP_INTERINTRA_NEARESTB,
+ THR_COMP_INTERINTRA_NEARB,
+ THR_COMP_INTERINTRA_NEWB,
+#endif // CONFIG_EXT_REFS
+
+ THR_COMP_INTERINTRA_ZEROA,
+ THR_COMP_INTERINTRA_NEARESTA,
+ THR_COMP_INTERINTRA_NEARA,
+ THR_COMP_INTERINTRA_NEWA,
+#endif // CONFIG_EXT_INTER
+} THR_MODES;
+
+typedef enum {
+ THR_LAST,
+#if CONFIG_EXT_REFS
+ THR_LAST2,
+ THR_LAST3,
+ THR_BWDR,
+#endif // CONFIG_EXT_REFS
+ THR_GOLD,
+ THR_ALTR,
+
+ THR_COMP_LA,
+#if CONFIG_EXT_REFS
+ THR_COMP_L2A,
+ THR_COMP_L3A,
+#endif // CONFIG_EXT_REFS
+ THR_COMP_GA,
+
+#if CONFIG_EXT_REFS
+ THR_COMP_LB,
+ THR_COMP_L2B,
+ THR_COMP_L3B,
+ THR_COMP_GB,
+#endif // CONFIG_EXT_REFS
+
+ THR_INTRA,
+} THR_MODES_SUB8X8;
+
+typedef struct RD_OPT {
+ // Thresh_mult is used to set a threshold for the rd score. A higher value
+ // means that we will accept the best mode so far more often. This number
+ // is used in combination with the current block size, and thresh_freq_fact
+ // to pick a threshold.
+ int thresh_mult[MAX_MODES];
+ int thresh_mult_sub8x8[MAX_REFS];
+
+ int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
+
+ int64_t prediction_type_threshes[TOTAL_REFS_PER_FRAME][REFERENCE_MODES];
+
+ int RDMULT;
+ int RDDIV;
+} RD_OPT;
+
+typedef struct RD_COST {
+ int rate;
+ int64_t dist;
+ int64_t rdcost;
+} RD_COST;
+
+// Reset the rate distortion cost values to maximum (invalid) value.
+void vp10_rd_cost_reset(RD_COST *rd_cost);
+// Initialize the rate distortion cost values to zero.
+void vp10_rd_cost_init(RD_COST *rd_cost);
+
+struct TileInfo;
+struct TileDataEnc;
+struct VP10_COMP;
+struct macroblock;
+
+int vp10_compute_rd_mult(const struct VP10_COMP *cpi, int qindex);
+
+void vp10_initialize_rd_consts(struct VP10_COMP *cpi);
+
+void vp10_initialize_me_consts(const struct VP10_COMP *cpi, MACROBLOCK *x,
+ int qindex);
+
+void vp10_model_rd_from_var_lapndz(int64_t var, unsigned int n,
+ unsigned int qstep, int *rate,
+ int64_t *dist);
+
+int vp10_get_switchable_rate(const struct VP10_COMP *cpi,
+ const MACROBLOCKD *const xd);
+
+int vp10_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
+ int stride);
+
+int16_t *vp10_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
+ int raster_block, int16_t *base);
+
+YV12_BUFFER_CONFIG *vp10_get_scaled_ref_frame(const struct VP10_COMP *cpi,
+ int ref_frame);
+
+void vp10_init_me_luts(void);
+
+#if CONFIG_REF_MV
+void vp10_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame);
+#endif
+
+void vp10_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
+ const struct macroblockd_plane *pd,
+ ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE],
+ ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE]);
+
+void vp10_set_rd_speed_thresholds(struct VP10_COMP *cpi);
+
+void vp10_set_rd_speed_thresholds_sub8x8(struct VP10_COMP *cpi);
+
+void vp10_update_rd_thresh_fact(const VP10_COMMON *const cm,
+ int (*fact)[MAX_MODES], int rd_thresh,
+ int bsize, int best_mode_index);
+
+void vp10_fill_token_costs(vp10_coeff_cost *c,
+#if CONFIG_ANS
+ coeff_cdf_model (*cdf)[PLANE_TYPES],
+#endif // CONFIG_ANS
+ vp10_coeff_probs_model (*p)[PLANE_TYPES]);
+
+static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
+ int thresh_fact) {
+ return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
+}
+
+void vp10_mv_pred(struct VP10_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
+ int ref_y_stride, int ref_frame, BLOCK_SIZE block_size);
+
+static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
+ x->errorperbit = rdmult >> RD_EPB_SHIFT;
+ x->errorperbit += (x->errorperbit == 0);
+}
+
+void vp10_setup_pred_block(const MACROBLOCKD *xd,
+ struct buf_2d dst[MAX_MB_PLANE],
+ const YV12_BUFFER_CONFIG *src, int mi_row,
+ int mi_col, const struct scale_factors *scale,
+ const struct scale_factors *scale_uv);
+
+int vp10_get_intra_cost_penalty(int qindex, int qdelta,
+ vpx_bit_depth_t bit_depth);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_RD_H_
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
new file mode 100644
index 0000000..62334a3
--- /dev/null
+++ b/av1/encoder/rdopt.c
@@ -0,0 +1,10808 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <math.h>
+
+#include "./vp10_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_dsp/blend.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/system_state.h"
+
+#include "av1/common/common.h"
+#include "av1/common/common_data.h"
+#include "av1/common/entropy.h"
+#include "av1/common/entropymode.h"
+#include "av1/common/idct.h"
+#include "av1/common/mvref_common.h"
+#include "av1/common/pred_common.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/reconintra.h"
+#include "av1/common/scan.h"
+#include "av1/common/seg_common.h"
+
+#include "av1/encoder/cost.h"
+#include "av1/encoder/encodemb.h"
+#include "av1/encoder/encodemv.h"
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/hybrid_fwd_txfm.h"
+#include "av1/encoder/mcomp.h"
+#include "av1/encoder/palette.h"
+#include "av1/encoder/quantize.h"
+#include "av1/encoder/ratectrl.h"
+#include "av1/encoder/rd.h"
+#include "av1/encoder/rdopt.h"
+#include "av1/encoder/aq_variance.h"
+
+#if CONFIG_DUAL_FILTER
+#if CONFIG_EXT_INTERP
+static const int filter_sets[25][2] = {
+ { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 0, 4 }, { 1, 0 }, { 1, 1 },
+ { 1, 2 }, { 1, 3 }, { 1, 4 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
+ { 2, 4 }, { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }, { 3, 4 }, { 4, 0 },
+ { 4, 1 }, { 4, 2 }, { 4, 3 }, { 4, 4 },
+};
+#else
+static const int filter_sets[9][2] = {
+ { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
+ { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
+};
+#endif
+#endif
+
+#if CONFIG_EXT_REFS
+
+#define LAST_FRAME_MODE_MASK \
+ ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
+ (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
+#define LAST2_FRAME_MODE_MASK \
+ ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
+ (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
+#define LAST3_FRAME_MODE_MASK \
+ ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
+ (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
+#define GOLDEN_FRAME_MODE_MASK \
+ ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
+ (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
+#define BWDREF_FRAME_MODE_MASK \
+ ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
+ (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
+#define ALTREF_FRAME_MODE_MASK \
+ ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
+ (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))
+
+#else
+
+#define LAST_FRAME_MODE_MASK \
+ ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
+#define GOLDEN_FRAME_MODE_MASK \
+ ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
+#define ALTREF_FRAME_MODE_MASK \
+ ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
+
+#endif // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
+#else
+#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
+#endif // CONFIG_EXT_REFS
+
+#define MIN_EARLY_TERM_INDEX 3
+#define NEW_MV_DISCOUNT_FACTOR 8
+
+#if CONFIG_EXT_INTRA
+#define ANGLE_FAST_SEARCH 1
+#define ANGLE_SKIP_THRESH 10
+#define FILTER_FAST_SEARCH 1
+#endif // CONFIG_EXT_INTRA
+
+const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671, // vert
+ -7.7051, -3.2234, -3.6193, 3.4533 }; // horz
+
+typedef struct {
+ PREDICTION_MODE mode;
+ MV_REFERENCE_FRAME ref_frame[2];
+} MODE_DEFINITION;
+
+typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
+
+struct rdcost_block_args {
+ const VP10_COMP *cpi;
+ MACROBLOCK *x;
+ ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
+ ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
+ int this_rate;
+ int64_t this_dist;
+ int64_t this_sse;
+ int64_t this_rd;
+ int64_t best_rd;
+ int exit_early;
+ int use_fast_coef_costing;
+ const scan_order *so;
+ uint8_t skippable;
+};
+
+#define LAST_NEW_MV_INDEX 6
+static const MODE_DEFINITION vp10_mode_order[MAX_MODES] = {
+ { NEARESTMV, { LAST_FRAME, NONE } },
+#if CONFIG_EXT_REFS
+ { NEARESTMV, { LAST2_FRAME, NONE } },
+ { NEARESTMV, { LAST3_FRAME, NONE } },
+ { NEARESTMV, { BWDREF_FRAME, NONE } },
+#endif // CONFIG_EXT_REFS
+ { NEARESTMV, { ALTREF_FRAME, NONE } },
+ { NEARESTMV, { GOLDEN_FRAME, NONE } },
+
+ { DC_PRED, { INTRA_FRAME, NONE } },
+
+ { NEWMV, { LAST_FRAME, NONE } },
+#if CONFIG_EXT_REFS
+ { NEWMV, { LAST2_FRAME, NONE } },
+ { NEWMV, { LAST3_FRAME, NONE } },
+ { NEWMV, { BWDREF_FRAME, NONE } },
+#endif // CONFIG_EXT_REFS
+ { NEWMV, { ALTREF_FRAME, NONE } },
+ { NEWMV, { GOLDEN_FRAME, NONE } },
+
+ { NEARMV, { LAST_FRAME, NONE } },
+#if CONFIG_EXT_REFS
+ { NEARMV, { LAST2_FRAME, NONE } },
+ { NEARMV, { LAST3_FRAME, NONE } },
+ { NEARMV, { BWDREF_FRAME, NONE } },
+#endif // CONFIG_EXT_REFS
+ { NEARMV, { ALTREF_FRAME, NONE } },
+ { NEARMV, { GOLDEN_FRAME, NONE } },
+
+#if CONFIG_EXT_INTER
+ { NEWFROMNEARMV, { LAST_FRAME, NONE } },
+#if CONFIG_EXT_REFS
+ { NEWFROMNEARMV, { LAST2_FRAME, NONE } },
+ { NEWFROMNEARMV, { LAST3_FRAME, NONE } },
+ { NEWFROMNEARMV, { BWDREF_FRAME, NONE } },
+#endif // CONFIG_EXT_REFS
+ { NEWFROMNEARMV, { ALTREF_FRAME, NONE } },
+ { NEWFROMNEARMV, { GOLDEN_FRAME, NONE } },
+#endif // CONFIG_EXT_INTER
+
+ { ZEROMV, { LAST_FRAME, NONE } },
+#if CONFIG_EXT_REFS
+ { ZEROMV, { LAST2_FRAME, NONE } },
+ { ZEROMV, { LAST3_FRAME, NONE } },
+ { ZEROMV, { BWDREF_FRAME, NONE } },
+#endif // CONFIG_EXT_REFS
+ { ZEROMV, { GOLDEN_FRAME, NONE } },
+ { ZEROMV, { ALTREF_FRAME, NONE } },
+
+// TODO(zoeliu): May need to reconsider the order on the modes to check
+
+#if CONFIG_EXT_INTER
+ { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
+#if CONFIG_EXT_REFS
+ { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+#if CONFIG_EXT_REFS
+ { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
+ { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+#endif // CONFIG_EXT_REFS
+
+#else // CONFIG_EXT_INTER
+
+ { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
+#if CONFIG_EXT_REFS
+ { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+#if CONFIG_EXT_REFS
+ { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
+ { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+#endif // CONFIG_EXT_REFS
+#endif // CONFIG_EXT_INTER
+
+ { TM_PRED, { INTRA_FRAME, NONE } },
+
+#if CONFIG_EXT_INTER
+ { NEAR_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
+ { NEAREST_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
+ { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
+ { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
+ { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
+ { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
+ { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
+ { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
+
+#if CONFIG_EXT_REFS
+ { NEAR_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { NEAREST_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
+
+ { NEAR_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
+ { NEAREST_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
+ { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
+ { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
+ { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
+ { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
+ { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
+ { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
+ { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_REFS
+
+ { NEAR_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+ { NEAREST_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+ { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+ { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+ { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+ { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+ { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+ { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+ { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+
+#if CONFIG_EXT_REFS
+ { NEAR_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
+ { NEAREST_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
+ { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
+ { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
+ { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
+ { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
+ { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
+ { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
+ { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
+
+ { NEAR_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { NEAREST_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
+
+ { NEAR_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { NEAREST_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
+
+ { NEAR_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+ { NEAREST_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+ { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+ { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+ { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+ { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+ { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+ { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+ { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+#endif // CONFIG_EXT_REFS
+
+#else // CONFIG_EXT_INTER
+
+ { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
+ { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
+#if CONFIG_EXT_REFS
+ { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
+ { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+ { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+
+#if CONFIG_EXT_REFS
+ { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
+ { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
+ { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+ { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+#endif // CONFIG_EXT_REFS
+
+ { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
+#if CONFIG_EXT_REFS
+ { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
+ { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
+
+#if CONFIG_EXT_REFS
+ { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
+ { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
+ { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
+ { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
+#endif // CONFIG_EXT_REFS
+
+#endif // CONFIG_EXT_INTER
+
+ { H_PRED, { INTRA_FRAME, NONE } },
+ { V_PRED, { INTRA_FRAME, NONE } },
+ { D135_PRED, { INTRA_FRAME, NONE } },
+ { D207_PRED, { INTRA_FRAME, NONE } },
+ { D153_PRED, { INTRA_FRAME, NONE } },
+ { D63_PRED, { INTRA_FRAME, NONE } },
+ { D117_PRED, { INTRA_FRAME, NONE } },
+ { D45_PRED, { INTRA_FRAME, NONE } },
+
+#if CONFIG_EXT_INTER
+ { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
+ { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
+ { NEARMV, { LAST_FRAME, INTRA_FRAME } },
+ { NEWMV, { LAST_FRAME, INTRA_FRAME } },
+
+#if CONFIG_EXT_REFS
+ { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
+ { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
+ { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
+ { NEWMV, { LAST2_FRAME, INTRA_FRAME } },
+
+ { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
+ { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
+ { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
+ { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
+#endif // CONFIG_EXT_REFS
+
+ { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
+ { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
+ { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
+ { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
+
+#if CONFIG_EXT_REFS
+ { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
+ { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
+ { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
+ { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
+#endif // CONFIG_EXT_REFS
+
+ { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
+ { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
+ { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
+ { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
+#endif // CONFIG_EXT_INTER
+};
+
+static const REF_DEFINITION vp10_ref_order[MAX_REFS] = {
+ { { LAST_FRAME, NONE } },
+#if CONFIG_EXT_REFS
+ { { LAST2_FRAME, NONE } }, { { LAST3_FRAME, NONE } },
+ { { BWDREF_FRAME, NONE } },
+#endif // CONFIG_EXT_REFS
+ { { GOLDEN_FRAME, NONE } }, { { ALTREF_FRAME, NONE } },
+
+ { { LAST_FRAME, ALTREF_FRAME } },
+#if CONFIG_EXT_REFS
+ { { LAST2_FRAME, ALTREF_FRAME } }, { { LAST3_FRAME, ALTREF_FRAME } },
+#endif // CONFIG_EXT_REFS
+ { { GOLDEN_FRAME, ALTREF_FRAME } },
+
+#if CONFIG_EXT_REFS
+ { { LAST_FRAME, BWDREF_FRAME } }, { { LAST2_FRAME, BWDREF_FRAME } },
+ { { LAST3_FRAME, BWDREF_FRAME } }, { { GOLDEN_FRAME, BWDREF_FRAME } },
+#endif // CONFIG_EXT_REFS
+
+ { { INTRA_FRAME, NONE } },
+};
+
+static INLINE int write_uniform_cost(int n, int v) {
+ int l = get_unsigned_bits(n), m = (1 << l) - n;
+ if (l == 0) return 0;
+ if (v < m)
+ return (l - 1) * vp10_cost_bit(128, 0);
+ else
+ return l * vp10_cost_bit(128, 0);
+}
+
+// constants for prune 1 and prune 2 decision boundaries
+#define FAST_EXT_TX_CORR_MID 0.0
+#define FAST_EXT_TX_EDST_MID 0.1
+#define FAST_EXT_TX_CORR_MARGIN 0.5
+#define FAST_EXT_TX_EDST_MARGIN 0.3
+
+static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
+ DCT_1D, ADST_1D, DCT_1D, ADST_1D,
+#if CONFIG_EXT_TX
+ FLIPADST_1D, DCT_1D, FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
+ DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D, IDTX_1D,
+#endif // CONFIG_EXT_TX
+};
+
+static const TX_TYPE_1D htx_tab[TX_TYPES] = {
+ DCT_1D, DCT_1D, ADST_1D, ADST_1D,
+#if CONFIG_EXT_TX
+ DCT_1D, FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
+ IDTX_1D, DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D,
+#endif // CONFIG_EXT_TX
+};
+
+static void get_energy_distribution_fine(const VP10_COMP *cpi, BLOCK_SIZE bsize,
+ uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ double *hordist, double *verdist) {
+ int bw = 4 << (b_width_log2_lookup[bsize]);
+ int bh = 4 << (b_height_log2_lookup[bsize]);
+ unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ unsigned int var[16];
+ double total = 0;
+
+ const int f_index = bsize - BLOCK_16X16;
+ if (f_index < 0) {
+ int i, j, index;
+ int w_shift = bw == 8 ? 1 : 2;
+ int h_shift = bh == 8 ? 1 : 2;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->common.use_highbitdepth) {
+ uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+ uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
+ for (i = 0; i < bh; ++i)
+ for (j = 0; j < bw; ++j) {
+ index = (j >> w_shift) + ((i >> h_shift) << 2);
+ esq[index] +=
+ (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
+ (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
+ }
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ for (i = 0; i < bh; ++i)
+ for (j = 0; j < bw; ++j) {
+ index = (j >> w_shift) + ((i >> h_shift) << 2);
+ esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
+ (src[j + i * src_stride] - dst[j + i * dst_stride]);
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ } else {
+ var[0] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
+ var[1] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
+ dst_stride, &esq[1]);
+ var[2] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
+ dst_stride, &esq[2]);
+ var[3] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
+ dst + 3 * bw / 4, dst_stride, &esq[3]);
+ src += bh / 4 * src_stride;
+ dst += bh / 4 * dst_stride;
+
+ var[4] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
+ var[5] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
+ dst_stride, &esq[5]);
+ var[6] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
+ dst_stride, &esq[6]);
+ var[7] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
+ dst + 3 * bw / 4, dst_stride, &esq[7]);
+ src += bh / 4 * src_stride;
+ dst += bh / 4 * dst_stride;
+
+ var[8] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
+ var[9] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
+ dst_stride, &esq[9]);
+ var[10] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
+ dst_stride, &esq[10]);
+ var[11] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
+ dst + 3 * bw / 4, dst_stride, &esq[11]);
+ src += bh / 4 * src_stride;
+ dst += bh / 4 * dst_stride;
+
+ var[12] =
+ cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
+ var[13] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
+ dst_stride, &esq[13]);
+ var[14] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
+ dst_stride, &esq[14]);
+ var[15] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
+ dst + 3 * bw / 4, dst_stride, &esq[15]);
+ }
+
+ total = esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] + esq[6] +
+ esq[7] + esq[8] + esq[9] + esq[10] + esq[11] + esq[12] + esq[13] +
+ esq[14] + esq[15];
+ if (total > 0) {
+ const double e_recip = 1.0 / total;
+ hordist[0] =
+ ((double)esq[0] + (double)esq[4] + (double)esq[8] + (double)esq[12]) *
+ e_recip;
+ hordist[1] =
+ ((double)esq[1] + (double)esq[5] + (double)esq[9] + (double)esq[13]) *
+ e_recip;
+ hordist[2] =
+ ((double)esq[2] + (double)esq[6] + (double)esq[10] + (double)esq[14]) *
+ e_recip;
+ verdist[0] =
+ ((double)esq[0] + (double)esq[1] + (double)esq[2] + (double)esq[3]) *
+ e_recip;
+ verdist[1] =
+ ((double)esq[4] + (double)esq[5] + (double)esq[6] + (double)esq[7]) *
+ e_recip;
+ verdist[2] =
+ ((double)esq[8] + (double)esq[9] + (double)esq[10] + (double)esq[11]) *
+ e_recip;
+ } else {
+ hordist[0] = verdist[0] = 0.25;
+ hordist[1] = verdist[1] = 0.25;
+ hordist[2] = verdist[2] = 0.25;
+ }
+ (void)var[0];
+ (void)var[1];
+ (void)var[2];
+ (void)var[3];
+ (void)var[4];
+ (void)var[5];
+ (void)var[6];
+ (void)var[7];
+ (void)var[8];
+ (void)var[9];
+ (void)var[10];
+ (void)var[11];
+ (void)var[12];
+ (void)var[13];
+ (void)var[14];
+ (void)var[15];
+}
+
+static int adst_vs_flipadst(const VP10_COMP *cpi, BLOCK_SIZE bsize,
+ uint8_t *src, int src_stride, uint8_t *dst,
+ int dst_stride, double *hdist, double *vdist) {
+ int prune_bitmask = 0;
+ double svm_proj_h = 0, svm_proj_v = 0;
+ get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
+ hdist, vdist);
+
+ svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
+ vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
+ svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
+ hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
+ if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
+ prune_bitmask |= 1 << FLIPADST_1D;
+ else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
+ prune_bitmask |= 1 << ADST_1D;
+
+ if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
+ prune_bitmask |= 1 << (FLIPADST_1D + 8);
+ else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
+ prune_bitmask |= 1 << (ADST_1D + 8);
+
+ return prune_bitmask;
+}
+
+#if CONFIG_EXT_TX
+static void get_horver_correlation(int16_t *diff, int stride, int w, int h,
+ double *hcorr, double *vcorr) {
+ // Returns hor/ver correlation coefficient
+ const int num = (h - 1) * (w - 1);
+ double num_r;
+ int i, j;
+ int64_t xy_sum = 0, xz_sum = 0;
+ int64_t x_sum = 0, y_sum = 0, z_sum = 0;
+ int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
+ double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
+ *hcorr = *vcorr = 1;
+
+ assert(num > 0);
+ num_r = 1.0 / num;
+ for (i = 1; i < h; ++i) {
+ for (j = 1; j < w; ++j) {
+ const int16_t x = diff[i * stride + j];
+ const int16_t y = diff[i * stride + j - 1];
+ const int16_t z = diff[(i - 1) * stride + j];
+ xy_sum += x * y;
+ xz_sum += x * z;
+ x_sum += x;
+ y_sum += y;
+ z_sum += z;
+ x2_sum += x * x;
+ y2_sum += y * y;
+ z2_sum += z * z;
+ }
+ }
+ x_var_n = x2_sum - (x_sum * x_sum) * num_r;
+ y_var_n = y2_sum - (y_sum * y_sum) * num_r;
+ z_var_n = z2_sum - (z_sum * z_sum) * num_r;
+ xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
+ xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
+ if (x_var_n > 0 && y_var_n > 0) {
+ *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
+ *hcorr = *hcorr < 0 ? 0 : *hcorr;
+ }
+ if (x_var_n > 0 && z_var_n > 0) {
+ *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
+ *vcorr = *vcorr < 0 ? 0 : *vcorr;
+ }
+}
+
+int dct_vs_idtx(int16_t *diff, int stride, int w, int h, double *hcorr,
+ double *vcorr) {
+ int prune_bitmask = 0;
+ get_horver_correlation(diff, stride, w, h, hcorr, vcorr);
+
+ if (*vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
+ prune_bitmask |= 1 << IDTX_1D;
+ else if (*vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
+ prune_bitmask |= 1 << DCT_1D;
+
+ if (*hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
+ prune_bitmask |= 1 << (IDTX_1D + 8);
+ else if (*hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
+ prune_bitmask |= 1 << (DCT_1D + 8);
+ return prune_bitmask;
+}
+
+// Performance drop: 0.5%, Speed improvement: 24%
+static int prune_two_for_sby(const VP10_COMP *cpi, BLOCK_SIZE bsize,
+ MACROBLOCK *x, MACROBLOCKD *xd, int adst_flipadst,
+ int dct_idtx) {
+ struct macroblock_plane *const p = &x->plane[0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
+ const int bw = 4 << (b_width_log2_lookup[bs]);
+ const int bh = 4 << (b_height_log2_lookup[bs]);
+ double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
+ double hcorr, vcorr;
+ int prune = 0;
+ vp10_subtract_plane(x, bsize, 0);
+
+ if (adst_flipadst)
+ prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride, hdist, vdist);
+ if (dct_idtx) prune |= dct_vs_idtx(p->src_diff, bw, bw, bh, &hcorr, &vcorr);
+
+ return prune;
+}
+#endif // CONFIG_EXT_TX
+
+// Performance drop: 0.3%, Speed improvement: 5%
+static int prune_one_for_sby(const VP10_COMP *cpi, BLOCK_SIZE bsize,
+ MACROBLOCK *x, MACROBLOCKD *xd) {
+ struct macroblock_plane *const p = &x->plane[0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
+ vp10_subtract_plane(x, bsize, 0);
+ return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
+ pd->dst.stride, hdist, vdist);
+}
+
+static int prune_tx_types(const VP10_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
+ MACROBLOCKD *xd, int tx_set) {
+#if CONFIG_EXT_TX
+ const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
+#else
+ const int tx_set_1D[TX_TYPES_1D] = { 0 };
+#endif
+
+ switch (cpi->sf.tx_type_search.prune_mode) {
+ case NO_PRUNE: return 0; break;
+ case PRUNE_ONE:
+ if ((tx_set >= 0) & !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
+ return 0;
+ return prune_one_for_sby(cpi, bsize, x, xd);
+ break;
+#if CONFIG_EXT_TX
+ case PRUNE_TWO:
+ if ((tx_set >= 0) & !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
+ if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
+ return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
+ }
+ if ((tx_set >= 0) & !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
+ return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
+ return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
+ break;
+#endif
+ }
+ assert(0);
+ return 0;
+}
+
+static int do_tx_type_search(TX_TYPE tx_type, int prune) {
+// TODO(sarahparker) implement for non ext tx
+#if CONFIG_EXT_TX
+ return !(((prune >> vtx_tab[tx_type]) & 1) |
+ ((prune >> (htx_tab[tx_type] + 8)) & 1));
+#else
+ // temporary to avoid compiler warnings
+ (void)vtx_tab;
+ (void)htx_tab;
+ (void)tx_type;
+ (void)prune;
+ return 1;
+#endif
+}
+
+static void model_rd_from_sse(const VP10_COMP *const cpi,
+ const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
+ int plane, int64_t sse, int *rate,
+ int64_t *dist) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int dequant_shift =
+#if CONFIG_VP9_HIGHBITDEPTH
+ (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ 3;
+
+ // Fast approximate the modelling function.
+ if (cpi->sf.simple_model_rd_from_var) {
+ const int64_t square_error = sse;
+ int quantizer = (pd->dequant[1] >> dequant_shift);
+
+ if (quantizer < 120)
+ *rate = (int)((square_error * (280 - quantizer)) >>
+ (16 - VP10_PROB_COST_SHIFT));
+ else
+ *rate = 0;
+ *dist = (square_error * quantizer) >> 8;
+ } else {
+ vp10_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
+ pd->dequant[1] >> dequant_shift, rate, dist);
+ }
+
+ *dist <<= 4;
+}
+
+static void model_rd_for_sb(const VP10_COMP *const cpi, BLOCK_SIZE bsize,
+ MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
+ int plane_to, int *out_rate_sum,
+ int64_t *out_dist_sum, int *skip_txfm_sb,
+ int64_t *skip_sse_sb) {
+ // Note our transform coeffs are 8 times an orthogonal transform.
+ // Hence quantizer step is also 8 times. To get effective quantizer
+ // we need to divide by 8 before sending to modeling function.
+ int plane;
+ const int ref = xd->mi[0]->mbmi.ref_frame[0];
+
+ int64_t rate_sum = 0;
+ int64_t dist_sum = 0;
+ int64_t total_sse = 0;
+
+ x->pred_sse[ref] = 0;
+
+ for (plane = plane_from; plane <= plane_to; ++plane) {
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
+
+ unsigned int sse;
+ int rate;
+ int64_t dist;
+
+ // TODO(geza): Write direct sse functions that do not compute
+ // variance as well.
+ cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
+ &sse);
+
+ if (plane == 0) x->pred_sse[ref] = sse;
+
+ total_sse += sse;
+
+ model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
+
+ rate_sum += rate;
+ dist_sum += dist;
+ }
+
+ *skip_txfm_sb = total_sse == 0;
+ *skip_sse_sb = total_sse << 4;
+ *out_rate_sum = (int)rate_sum;
+ *out_dist_sum = dist_sum;
+}
+
+int64_t vp10_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
+ intptr_t block_size, int64_t *ssz) {
+ int i;
+ int64_t error = 0, sqcoeff = 0;
+
+ for (i = 0; i < block_size; i++) {
+ const int diff = coeff[i] - dqcoeff[i];
+ error += diff * diff;
+ sqcoeff += coeff[i] * coeff[i];
+ }
+
+ *ssz = sqcoeff;
+ return error;
+}
+
+int64_t vp10_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
+ int block_size) {
+ int i;
+ int64_t error = 0;
+
+ for (i = 0; i < block_size; i++) {
+ const int diff = coeff[i] - dqcoeff[i];
+ error += diff * diff;
+ }
+
+ return error;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+int64_t vp10_highbd_block_error_c(const tran_low_t *coeff,
+ const tran_low_t *dqcoeff,
+ intptr_t block_size, int64_t *ssz, int bd) {
+ int i;
+ int64_t error = 0, sqcoeff = 0;
+ int shift = 2 * (bd - 8);
+ int rounding = shift > 0 ? 1 << (shift - 1) : 0;
+
+ for (i = 0; i < block_size; i++) {
+ const int64_t diff = coeff[i] - dqcoeff[i];
+ error += diff * diff;
+ sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
+ }
+ assert(error >= 0 && sqcoeff >= 0);
+ error = (error + rounding) >> shift;
+ sqcoeff = (sqcoeff + rounding) >> shift;
+
+ *ssz = sqcoeff;
+ return error;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+/* The trailing '0' is a terminator which is used inside cost_coeffs() to
+ * decide whether to include cost of a trailing EOB node or not (i.e. we
+ * can skip this if the last coefficient in this transform block, e.g. the
+ * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
+ * were non-zero). */
+static int cost_coeffs(MACROBLOCK *x, int plane, int block,
+#if CONFIG_VAR_TX
+ int coeff_ctx,
+#else
+ ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
+#endif
+ TX_SIZE tx_size, const int16_t *scan, const int16_t *nb,
+ int use_fast_coef_costing) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ const struct macroblock_plane *p = &x->plane[plane];
+ const struct macroblockd_plane *pd = &xd->plane[plane];
+ const PLANE_TYPE type = pd->plane_type;
+ const uint16_t *band_count = &band_count_table[tx_size][1];
+ const int eob = p->eobs[block];
+ const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ const int tx_size_ctx = txsize_sqr_map[tx_size];
+ unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
+ x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
+ uint8_t token_cache[MAX_TX_SQUARE];
+#if CONFIG_VAR_TX
+ int pt = coeff_ctx;
+#else
+ int pt = combine_entropy_contexts(*A, *L);
+#endif
+ int c, cost;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
+#else
+ const int *cat6_high_cost = vp10_get_high_cost_table(8);
+#endif
+
+#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
+ // Check for consistency of tx_size with mode info
+ assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
+ : get_uv_tx_size(mbmi, pd) == tx_size);
+#endif // !CONFIG_VAR_TX && !CONFIG_SUPERTX
+
+ if (eob == 0) {
+ // single eob token
+ cost = token_costs[0][0][pt][EOB_TOKEN];
+ c = 0;
+ } else {
+ if (use_fast_coef_costing) {
+ int band_left = *band_count++;
+
+ // dc token
+ int v = qcoeff[0];
+ int16_t prev_t;
+ cost = vp10_get_token_cost(v, &prev_t, cat6_high_cost);
+ cost += (*token_costs)[0][pt][prev_t];
+
+ token_cache[0] = vp10_pt_energy_class[prev_t];
+ ++token_costs;
+
+ // ac tokens
+ for (c = 1; c < eob; c++) {
+ const int rc = scan[c];
+ int16_t t;
+
+ v = qcoeff[rc];
+ cost += vp10_get_token_cost(v, &t, cat6_high_cost);
+ cost += (*token_costs)[!prev_t][!prev_t][t];
+ prev_t = t;
+ if (!--band_left) {
+ band_left = *band_count++;
+ ++token_costs;
+ }
+ }
+
+ // eob token
+ if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
+
+ } else { // !use_fast_coef_costing
+ int band_left = *band_count++;
+
+ // dc token
+ int v = qcoeff[0];
+ int16_t tok;
+ unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
+ cost = vp10_get_token_cost(v, &tok, cat6_high_cost);
+ cost += (*token_costs)[0][pt][tok];
+
+ token_cache[0] = vp10_pt_energy_class[tok];
+ ++token_costs;
+
+ tok_cost_ptr = &((*token_costs)[!tok]);
+
+ // ac tokens
+ for (c = 1; c < eob; c++) {
+ const int rc = scan[c];
+
+ v = qcoeff[rc];
+ cost += vp10_get_token_cost(v, &tok, cat6_high_cost);
+ pt = get_coef_context(nb, token_cache, c);
+ cost += (*tok_cost_ptr)[pt][tok];
+ token_cache[rc] = vp10_pt_energy_class[tok];
+ if (!--band_left) {
+ band_left = *band_count++;
+ ++token_costs;
+ }
+ tok_cost_ptr = &((*token_costs)[!tok]);
+ }
+
+ // eob token
+ if (band_left) {
+ pt = get_coef_context(nb, token_cache, c);
+ cost += (*token_costs)[0][pt][EOB_TOKEN];
+ }
+ }
+ }
+
+#if !CONFIG_VAR_TX
+ // is eob first coefficient;
+ *A = *L = (c > 0);
+#endif
+
+ return cost;
+}
+
+static void dist_block(const VP10_COMP *cpi, MACROBLOCK *x, int plane,
+ int block, int blk_row, int blk_col, TX_SIZE tx_size,
+ int64_t *out_dist, int64_t *out_sse) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ if (cpi->sf.use_transform_domain_distortion) {
+ // Transform domain distortion computation is more accurate as it does
+ // not involve an inverse transform, but it is less accurate.
+ const int ss_txfrm_size = num_4x4_blocks_txsize_log2_lookup[tx_size];
+ int64_t this_sse;
+ int tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
+ int shift = (MAX_TX_SCALE - get_tx_scale(xd, tx_type, tx_size)) * 2;
+ tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
+ *out_dist = vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
+ &this_sse, bd) >>
+ shift;
+#else
+ *out_dist =
+ vp10_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >>
+ shift;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ *out_sse = this_sse >> shift;
+ } else {
+ const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
+ const int bsw = 4 * num_4x4_blocks_wide_lookup[tx_bsize];
+ const int bsh = 4 * num_4x4_blocks_high_lookup[tx_bsize];
+ const int src_stride = x->plane[plane].src.stride;
+ const int dst_stride = xd->plane[plane].dst.stride;
+ const int src_idx = 4 * (blk_row * src_stride + blk_col);
+ const int dst_idx = 4 * (blk_row * dst_stride + blk_col);
+ const uint8_t *src = &x->plane[plane].src.buf[src_idx];
+ const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
+ const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ const uint16_t eob = p->eobs[block];
+
+ unsigned int tmp;
+
+ assert(cpi != NULL);
+
+ cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
+ *out_sse = (int64_t)tmp * 16;
+
+ if (eob) {
+ const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
+ uint8_t *recon = (uint8_t *)recon16;
+#else
+ DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ const PLANE_TYPE plane_type = plane == 0 ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+
+ INV_TXFM_PARAM inv_txfm_param;
+
+ inv_txfm_param.tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ inv_txfm_param.tx_size = tx_size;
+ inv_txfm_param.eob = eob;
+ inv_txfm_param.lossless = xd->lossless[mbmi->segment_id];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ recon = CONVERT_TO_BYTEPTR(recon);
+ inv_txfm_param.bd = xd->bd;
+ vpx_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0,
+ NULL, 0, bsw, bsh, xd->bd);
+ highbd_inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
+ } else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ {
+ vpx_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL, 0,
+ bsw, bsh);
+ inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
+ }
+
+ cpi->fn_ptr[tx_bsize].vf(src, src_stride, recon, MAX_TX_SIZE, &tmp);
+ }
+
+ *out_dist = (int64_t)tmp * 16;
+ }
+}
+
+static int rate_block(int plane, int block, int blk_row, int blk_col,
+ TX_SIZE tx_size, struct rdcost_block_args *args) {
+#if CONFIG_VAR_TX
+ int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col),
+ *(args->t_left + blk_row));
+ int coeff_cost =
+ cost_coeffs(args->x, plane, block, coeff_ctx, tx_size, args->so->scan,
+ args->so->neighbors, args->use_fast_coef_costing);
+ const struct macroblock_plane *p = &args->x->plane[plane];
+ *(args->t_above + blk_col) = !(p->eobs[block] == 0);
+ *(args->t_left + blk_row) = !(p->eobs[block] == 0);
+ return coeff_cost;
+#else
+ return cost_coeffs(args->x, plane, block, args->t_above + blk_col,
+ args->t_left + blk_row, tx_size, args->so->scan,
+ args->so->neighbors, args->use_fast_coef_costing);
+#endif // CONFIG_VAR_TX
+}
+
+static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
+ TX_SIZE tx_size) {
+ uint64_t sse;
+ switch (tx_size) {
+#if CONFIG_EXT_TX
+ case TX_4X8:
+ sse = vpx_sum_squares_2d_i16(diff, diff_stride, 4) +
+ vpx_sum_squares_2d_i16(diff + 4 * diff_stride, diff_stride, 4);
+ break;
+ case TX_8X4:
+ sse = vpx_sum_squares_2d_i16(diff, diff_stride, 4) +
+ vpx_sum_squares_2d_i16(diff + 4, diff_stride, 4);
+ break;
+ case TX_8X16:
+ sse = vpx_sum_squares_2d_i16(diff, diff_stride, 8) +
+ vpx_sum_squares_2d_i16(diff + 8 * diff_stride, diff_stride, 8);
+ break;
+ case TX_16X8:
+ sse = vpx_sum_squares_2d_i16(diff, diff_stride, 8) +
+ vpx_sum_squares_2d_i16(diff + 8, diff_stride, 8);
+ break;
+ case TX_16X32:
+ sse = vpx_sum_squares_2d_i16(diff, diff_stride, 16) +
+ vpx_sum_squares_2d_i16(diff + 16 * diff_stride, diff_stride, 16);
+ break;
+ case TX_32X16:
+ sse = vpx_sum_squares_2d_i16(diff, diff_stride, 16) +
+ vpx_sum_squares_2d_i16(diff + 16, diff_stride, 16);
+ break;
+#endif // CONFIG_EXT_TX
+ default:
+ assert(tx_size < TX_SIZES);
+ sse = vpx_sum_squares_2d_i16(
+ diff, diff_stride, num_4x4_blocks_wide_txsize_lookup[tx_size] << 2);
+ break;
+ }
+ return sse;
+}
+
+static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
+ struct rdcost_block_args *args = arg;
+ MACROBLOCK *const x = args->x;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int64_t rd1, rd2, rd;
+ int rate;
+ int64_t dist;
+ int64_t sse;
+ int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col),
+ *(args->t_left + blk_row));
+
+ if (args->exit_early) return;
+
+ if (!is_inter_block(mbmi)) {
+ struct encode_b_args intra_arg = {
+ x, NULL, &mbmi->skip, args->t_above, args->t_left, 1
+ };
+ vp10_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, &intra_arg);
+
+ if (args->cpi->sf.use_transform_domain_distortion) {
+ dist_block(args->cpi, x, plane, block, blk_row, blk_col, tx_size, &dist,
+ &sse);
+ } else {
+ // Note that the encode block_intra call above already calls
+ // inv_txfm_add, so we can't just call dist_block here.
+ const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
+ const vpx_variance_fn_t variance = args->cpi->fn_ptr[tx_bsize].vf;
+
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+
+ const uint8_t *src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
+ const uint8_t *dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
+ const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+ unsigned int tmp;
+ sse = sum_squares_2d(diff, diff_stride, tx_size);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ sse = (int64_t)sse * 16;
+
+ variance(src, src_stride, dst, dst_stride, &tmp);
+ dist = (int64_t)tmp * 16;
+ }
+ } else {
+// full forward transform and quantization
+#if CONFIG_NEW_QUANT
+ vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, coeff_ctx);
+#else
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_FP);
+#endif // CONFIG_NEW_QUANT
+ if (x->plane[plane].eobs[block])
+ vp10_optimize_b(x, plane, block, tx_size, coeff_ctx);
+ dist_block(args->cpi, x, plane, block, blk_row, blk_col, tx_size, &dist,
+ &sse);
+ }
+
+ rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
+ if (args->this_rd + rd > args->best_rd) {
+ args->exit_early = 1;
+ return;
+ }
+
+ rate = rate_block(plane, block, blk_row, blk_col, tx_size, args);
+ rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
+
+ // TODO(jingning): temporarily enabled only for luma component
+ rd = VPXMIN(rd1, rd2);
+
+ args->this_rate += rate;
+ args->this_dist += dist;
+ args->this_sse += sse;
+ args->this_rd += rd;
+
+ if (args->this_rd > args->best_rd) {
+ args->exit_early = 1;
+ return;
+ }
+
+ args->skippable &= !x->plane[plane].eobs[block];
+}
+
+static void txfm_rd_in_plane(MACROBLOCK *x, const VP10_COMP *cpi, int *rate,
+ int64_t *distortion, int *skippable, int64_t *sse,
+ int64_t ref_best_rd, int plane, BLOCK_SIZE bsize,
+ TX_SIZE tx_size, int use_fast_coef_casting) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ TX_TYPE tx_type;
+ struct rdcost_block_args args;
+ vp10_zero(args);
+ args.x = x;
+ args.cpi = cpi;
+ args.best_rd = ref_best_rd;
+ args.use_fast_coef_costing = use_fast_coef_casting;
+ args.skippable = 1;
+
+ if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
+
+ vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
+
+ tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
+ args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
+ &args);
+ if (args.exit_early) {
+ *rate = INT_MAX;
+ *distortion = INT64_MAX;
+ *sse = INT64_MAX;
+ *skippable = 0;
+ } else {
+ *distortion = args.this_dist;
+ *rate = args.this_rate;
+ *sse = args.this_sse;
+ *skippable = args.skippable;
+ }
+}
+
+#if CONFIG_SUPERTX
+void vp10_txfm_rd_in_plane_supertx(MACROBLOCK *x, const VP10_COMP *cpi,
+ int *rate, int64_t *distortion,
+ int *skippable, int64_t *sse,
+ int64_t ref_best_rd, int plane,
+ BLOCK_SIZE bsize, TX_SIZE tx_size,
+ int use_fast_coef_casting) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ struct rdcost_block_args args;
+ TX_TYPE tx_type;
+
+ vp10_zero(args);
+ args.cpi = cpi;
+ args.x = x;
+ args.best_rd = ref_best_rd;
+ args.use_fast_coef_costing = use_fast_coef_casting;
+
+#if CONFIG_EXT_TX
+ assert(tx_size < TX_SIZES);
+#endif // CONFIG_EXT_TX
+
+ if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
+
+ vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
+
+ tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
+ args.so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+
+ block_rd_txfm(plane, 0, 0, 0, get_plane_block_size(bsize, pd), tx_size,
+ &args);
+
+ if (args.exit_early) {
+ *rate = INT_MAX;
+ *distortion = INT64_MAX;
+ *sse = INT64_MAX;
+ *skippable = 0;
+ } else {
+ *distortion = args.this_dist;
+ *rate = args.this_rate;
+ *sse = args.this_sse;
+ *skippable = !x->plane[plane].eobs[0];
+ }
+}
+#endif // CONFIG_SUPERTX
+
+static int64_t txfm_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *r, int64_t *d,
+ int *s, int64_t *sse, int64_t ref_best_rd,
+ BLOCK_SIZE bs, TX_TYPE tx_type, int tx_size) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int64_t rd = INT64_MAX;
+ vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
+ int s0, s1;
+ const int is_inter = is_inter_block(mbmi);
+ const int tx_size_ctx = get_tx_size_context(xd);
+ const int tx_size_cat =
+ is_inter ? inter_tx_size_cat_lookup[bs] : intra_tx_size_cat_lookup[bs];
+ const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
+ const int tx_select = cm->tx_mode == TX_MODE_SELECT;
+ const int r_tx_size =
+ cpi->tx_size_cost[tx_size_cat][tx_size_ctx][coded_tx_size];
+
+ assert(skip_prob > 0);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs)));
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+
+ s0 = vp10_cost_bit(skip_prob, 0);
+ s1 = vp10_cost_bit(skip_prob, 1);
+
+ mbmi->tx_type = tx_type;
+ mbmi->tx_size = tx_size;
+ txfm_rd_in_plane(x, cpi, r, d, s, sse, ref_best_rd, 0, bs, tx_size,
+ cpi->sf.use_fast_coef_costing);
+ if (*r == INT_MAX) return INT64_MAX;
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(tx_size, bs, is_inter) > 1 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ const int ext_tx_set = get_ext_tx_set(tx_size, bs, is_inter);
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ *r += cpi->inter_tx_type_costs
+ [ext_tx_set][txsize_sqr_map[mbmi->tx_size]][mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
+ *r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size][mbmi->mode]
+ [mbmi->tx_type];
+ }
+ }
+#else
+ if (tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
+ !FIXED_TX_TYPE) {
+ if (is_inter) {
+ *r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+ } else {
+ *r += cpi->intra_tx_type_costs[mbmi->tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]]
+ [mbmi->tx_type];
+ }
+ }
+#endif // CONFIG_EXT_TX
+
+ if (*s) {
+ if (is_inter) {
+ rd = RDCOST(x->rdmult, x->rddiv, s1, *sse);
+ } else {
+ rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, *sse);
+ }
+ } else {
+ rd = RDCOST(x->rdmult, x->rddiv, *r + s0 + r_tx_size * tx_select, *d);
+ }
+
+ if (tx_select) *r += r_tx_size;
+
+ if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !(*s))
+ rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, *sse));
+
+ return rd;
+}
+
+static int64_t choose_tx_size_fix_type(VP10_COMP *cpi, BLOCK_SIZE bs,
+ MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skip,
+ int64_t *psse, int64_t ref_best_rd,
+ TX_TYPE tx_type, int prune) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int r, s;
+ int64_t d, sse;
+ int64_t rd = INT64_MAX;
+ int n;
+ int start_tx, end_tx;
+ int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
+ const TX_SIZE max_tx_size = max_txsize_lookup[bs];
+ TX_SIZE best_tx_size = max_tx_size;
+ const int tx_select = cm->tx_mode == TX_MODE_SELECT;
+ const int is_inter = is_inter_block(mbmi);
+#if CONFIG_EXT_TX
+#if CONFIG_RECT_TX
+ int evaulate_rect_tx = 0;
+#endif // CONFIG_RECT_TX
+ int ext_tx_set;
+#endif // CONFIG_EXT_TX
+
+ if (tx_select) {
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ evaulate_rect_tx = is_rect_tx_allowed(mbmi);
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+ start_tx = max_tx_size;
+ end_tx = 0;
+ } else {
+ const TX_SIZE chosen_tx_size =
+ tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ evaulate_rect_tx = is_rect_tx(chosen_tx_size);
+ assert(IMPLIES(evaulate_rect_tx, is_rect_tx_allowed(mbmi)));
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+ start_tx = chosen_tx_size;
+ end_tx = chosen_tx_size;
+ }
+
+ *distortion = INT64_MAX;
+ *rate = INT_MAX;
+ *skip = 0;
+ *psse = INT64_MAX;
+
+ mbmi->tx_type = tx_type;
+
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ if (evaulate_rect_tx) {
+ const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
+ const int ext_tx_set = get_ext_tx_set(rect_tx_size, bs, 1);
+ if (ext_tx_used_inter[ext_tx_set][tx_type]) {
+ rd = txfm_yrd(cpi, x, &r, &d, &s, &sse, ref_best_rd, bs, tx_type,
+ rect_tx_size);
+ best_tx_size = rect_tx_size;
+ best_rd = rd;
+ *distortion = d;
+ *rate = r;
+ *skip = s;
+ *psse = sse;
+ }
+ }
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+
+ last_rd = INT64_MAX;
+ for (n = start_tx; n >= end_tx; --n) {
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ if (is_rect_tx(n)) break;
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+ if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, n)) continue;
+ if (!is_inter && x->use_default_intra_tx_type &&
+ tx_type != get_default_tx_type(0, xd, 0, n))
+ continue;
+ if (is_inter && x->use_default_inter_tx_type &&
+ tx_type != get_default_tx_type(0, xd, 0, n))
+ continue;
+ if (max_tx_size == TX_32X32 && n == TX_4X4) continue;
+#if CONFIG_EXT_TX
+ ext_tx_set = get_ext_tx_set(n, bs, is_inter);
+ if (is_inter) {
+ if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
+ if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
+ if (!do_tx_type_search(tx_type, prune)) continue;
+ }
+ } else {
+ if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
+ if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
+ }
+ if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue;
+ }
+#else // CONFIG_EXT_TX
+ if (n >= TX_32X32 && tx_type != DCT_DCT) continue;
+ if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
+ !do_tx_type_search(tx_type, prune))
+ continue;
+#endif // CONFIG_EXT_TX
+
+ rd = txfm_yrd(cpi, x, &r, &d, &s, &sse, ref_best_rd, bs, tx_type, n);
+
+ // Early termination in transform size search.
+ if (cpi->sf.tx_size_search_breakout &&
+ (rd == INT64_MAX || (s == 1 && tx_type != DCT_DCT && n < start_tx) ||
+ (n < (int)max_tx_size && rd > last_rd)))
+ break;
+
+ last_rd = rd;
+ if (rd < best_rd) {
+ best_tx_size = n;
+ best_rd = rd;
+ *distortion = d;
+ *rate = r;
+ *skip = s;
+ *psse = sse;
+ }
+ }
+ mbmi->tx_size = best_tx_size;
+
+ return best_rd;
+}
+
+#if CONFIG_EXT_INTER
+static int64_t estimate_yrd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bs, MACROBLOCK *x,
+ int *r, int64_t *d, int *s, int64_t *sse,
+ int64_t ref_best_rd) {
+ return txfm_yrd(cpi, x, r, d, s, sse, ref_best_rd, bs, DCT_DCT,
+ max_txsize_lookup[bs]);
+}
+#endif // CONFIG_EXT_INTER
+
+static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skip, int64_t *sse,
+ int64_t ref_best_rd, BLOCK_SIZE bs) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ TX_TYPE tx_type, best_tx_type = DCT_DCT;
+ int r, s;
+ int64_t d, psse, this_rd, best_rd = INT64_MAX;
+ vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
+ int s0 = vp10_cost_bit(skip_prob, 0);
+ int s1 = vp10_cost_bit(skip_prob, 1);
+ const int is_inter = is_inter_block(mbmi);
+ int prune = 0;
+#if CONFIG_EXT_TX
+ int ext_tx_set;
+#endif // CONFIG_EXT_TX
+
+ mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
+
+#if CONFIG_EXT_TX
+ ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter);
+#endif // CONFIG_EXT_TX
+
+ if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
+#if CONFIG_EXT_TX
+ prune = prune_tx_types(cpi, bs, x, xd, ext_tx_set);
+#else
+ prune = prune_tx_types(cpi, bs, x, xd, 0);
+#endif
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
+ !xd->lossless[mbmi->segment_id]) {
+ for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+ if (is_inter) {
+ if (x->use_default_inter_tx_type &&
+ tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
+ continue;
+ if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
+ if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
+ if (!do_tx_type_search(tx_type, prune)) continue;
+ }
+ } else {
+ if (x->use_default_intra_tx_type &&
+ tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
+ continue;
+ if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
+ if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
+ }
+ if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue;
+ }
+
+ mbmi->tx_type = tx_type;
+
+ txfm_rd_in_plane(x, cpi, &r, &d, &s, &psse, ref_best_rd, 0, bs,
+ mbmi->tx_size, cpi->sf.use_fast_coef_costing);
+
+ if (r == INT_MAX) continue;
+ if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1) {
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ r += cpi->inter_tx_type_costs[ext_tx_set][mbmi->tx_size]
+ [mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
+ r += cpi->intra_tx_type_costs[ext_tx_set][mbmi->tx_size][mbmi->mode]
+ [mbmi->tx_type];
+ }
+ }
+
+ if (s)
+ this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
+ else
+ this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d);
+ if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] && !s)
+ this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse));
+
+ if (this_rd < best_rd) {
+ best_rd = this_rd;
+ best_tx_type = mbmi->tx_type;
+ }
+ }
+ }
+
+#else // CONFIG_EXT_TX
+ if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id]) {
+ for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
+ if (!is_inter && x->use_default_intra_tx_type &&
+ tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
+ continue;
+ if (is_inter && x->use_default_inter_tx_type &&
+ tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
+ continue;
+ mbmi->tx_type = tx_type;
+ txfm_rd_in_plane(x, cpi, &r, &d, &s, &psse, ref_best_rd, 0, bs,
+ mbmi->tx_size, cpi->sf.use_fast_coef_costing);
+ if (r == INT_MAX) continue;
+ if (is_inter) {
+ r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+ if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
+ !do_tx_type_search(tx_type, prune))
+ continue;
+ } else {
+ r += cpi->intra_tx_type_costs[mbmi->tx_size]
+ [intra_mode_to_tx_type_context[mbmi->mode]]
+ [mbmi->tx_type];
+ }
+ if (s)
+ this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
+ else
+ this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d);
+ if (is_inter && !xd->lossless[mbmi->segment_id] && !s)
+ this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse));
+
+ if (this_rd < best_rd) {
+ best_rd = this_rd;
+ best_tx_type = mbmi->tx_type;
+ }
+ }
+ }
+#endif // CONFIG_EXT_TX
+ mbmi->tx_type = best_tx_type;
+
+ txfm_rd_in_plane(x, cpi, rate, distortion, skip, sse, ref_best_rd, 0, bs,
+ mbmi->tx_size, cpi->sf.use_fast_coef_costing);
+}
+
+static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skip,
+ int64_t *sse, int64_t ref_best_rd,
+ BLOCK_SIZE bs) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+
+ mbmi->tx_size = TX_4X4;
+ mbmi->tx_type = DCT_DCT;
+
+ txfm_rd_in_plane(x, cpi, rate, distortion, skip, sse, ref_best_rd, 0, bs,
+ mbmi->tx_size, cpi->sf.use_fast_coef_costing);
+}
+
+static void choose_tx_size_type_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int64_t *distortion,
+ int *skip, int64_t *psse,
+ int64_t ref_best_rd, BLOCK_SIZE bs) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int r, s;
+ int64_t d, sse;
+ int64_t rd = INT64_MAX;
+ int64_t best_rd = INT64_MAX;
+ TX_SIZE best_tx = max_txsize_lookup[bs];
+ const int is_inter = is_inter_block(mbmi);
+ TX_TYPE tx_type, best_tx_type = DCT_DCT;
+ int prune = 0;
+
+ if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
+ // passing -1 in for tx_type indicates that all 1D
+ // transforms should be considered for pruning
+ prune = prune_tx_types(cpi, bs, x, xd, -1);
+
+ *distortion = INT64_MAX;
+ *rate = INT_MAX;
+ *skip = 0;
+ *psse = INT64_MAX;
+
+ for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+#if CONFIG_REF_MV
+ if (tx_type != DCT_DCT && is_inter && mbmi->ref_mv_idx > 0) continue;
+#endif
+ rd = choose_tx_size_fix_type(cpi, bs, x, &r, &d, &s, &sse, ref_best_rd,
+ tx_type, prune);
+ if (rd < best_rd) {
+ best_rd = rd;
+ *distortion = d;
+ *rate = r;
+ *skip = s;
+ *psse = sse;
+ best_tx_type = tx_type;
+ best_tx = mbmi->tx_size;
+ }
+ }
+
+ mbmi->tx_size = best_tx;
+ mbmi->tx_type = best_tx_type;
+
+#if !CONFIG_EXT_TX
+ if (mbmi->tx_size >= TX_32X32) assert(mbmi->tx_type == DCT_DCT);
+#endif
+}
+
+static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skip, int64_t *psse,
+ BLOCK_SIZE bs, int64_t ref_best_rd) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ int64_t sse;
+ int64_t *ret_sse = psse ? psse : &sse;
+
+ assert(bs == xd->mi[0]->mbmi.sb_type);
+
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ choose_smallest_tx_size(cpi, x, rate, distortion, skip, ret_sse,
+ ref_best_rd, bs);
+ } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
+ choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
+ bs);
+ } else {
+ choose_tx_size_type_from_rd(cpi, x, rate, distortion, skip, ret_sse,
+ ref_best_rd, bs);
+ }
+}
+
+static int conditional_skipintra(PREDICTION_MODE mode,
+ PREDICTION_MODE best_intra_mode) {
+ if (mode == D117_PRED && best_intra_mode != V_PRED &&
+ best_intra_mode != D135_PRED)
+ return 1;
+ if (mode == D63_PRED && best_intra_mode != V_PRED &&
+ best_intra_mode != D45_PRED)
+ return 1;
+ if (mode == D207_PRED && best_intra_mode != H_PRED &&
+ best_intra_mode != D45_PRED)
+ return 1;
+ if (mode == D153_PRED && best_intra_mode != H_PRED &&
+ best_intra_mode != D135_PRED)
+ return 1;
+ return 0;
+}
+
+static int rd_pick_palette_intra_sby(
+ VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int palette_ctx,
+ int dc_mode_cost, PALETTE_MODE_INFO *palette_mode_info,
+ uint8_t *best_palette_color_map, TX_SIZE *best_tx, TX_TYPE *best_tx_type,
+ PREDICTION_MODE *mode_selected, int64_t *best_rd) {
+ int rate_overhead = 0;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mic = xd->mi[0];
+ const int rows = 4 * num_4x4_blocks_high_lookup[bsize];
+ const int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
+ int this_rate, this_rate_tokenonly, s, colors, n;
+ int64_t this_distortion, this_rd;
+ const int src_stride = x->plane[0].src.stride;
+ const uint8_t *const src = x->plane[0].src.buf;
+
+ assert(cpi->common.allow_screen_content_tools);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->common.use_highbitdepth)
+ colors = vp10_count_colors_highbd(src, src_stride, rows, cols,
+ cpi->common.bit_depth);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ colors = vp10_count_colors(src, src_stride, rows, cols);
+ palette_mode_info->palette_size[0] = 0;
+#if CONFIG_EXT_INTRA
+ mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+#endif // CONFIG_EXT_INTRA
+
+ if (colors > 1 && colors <= 64) {
+ int r, c, i, j, k;
+ const int max_itr = 50;
+ int color_ctx, color_idx = 0;
+ int color_order[PALETTE_MAX_SIZE];
+ float *const data = x->palette_buffer->kmeans_data_buf;
+ float centroids[PALETTE_MAX_SIZE];
+ uint8_t *const color_map = xd->plane[0].color_index_map;
+ float lb, ub, val;
+ MB_MODE_INFO *const mbmi = &mic->mbmi;
+ PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
+ if (cpi->common.use_highbitdepth)
+ lb = ub = src16[0];
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ lb = ub = src[0];
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->common.use_highbitdepth) {
+ for (r = 0; r < rows; ++r) {
+ for (c = 0; c < cols; ++c) {
+ val = src16[r * src_stride + c];
+ data[r * cols + c] = val;
+ if (val < lb)
+ lb = val;
+ else if (val > ub)
+ ub = val;
+ }
+ }
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ for (r = 0; r < rows; ++r) {
+ for (c = 0; c < cols; ++c) {
+ val = src[r * src_stride + c];
+ data[r * cols + c] = val;
+ if (val < lb)
+ lb = val;
+ else if (val > ub)
+ ub = val;
+ }
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ mbmi->mode = DC_PRED;
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+#endif // CONFIG_EXT_INTRA
+
+ if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return 0;
+
+ for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
+ --n) {
+ for (i = 0; i < n; ++i)
+ centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
+ vp10_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr);
+ k = vp10_remove_duplicates(centroids, n);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->common.use_highbitdepth)
+ for (i = 0; i < k; ++i)
+ pmi->palette_colors[i] =
+ clip_pixel_highbd((int)centroids[i], cpi->common.bit_depth);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ for (i = 0; i < k; ++i)
+ pmi->palette_colors[i] = clip_pixel((int)centroids[i]);
+ pmi->palette_size[0] = k;
+
+ vp10_calc_indices(data, centroids, color_map, rows * cols, k, 1);
+
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
+ bsize, *best_rd);
+ if (this_rate_tokenonly == INT_MAX) continue;
+
+ this_rate =
+ this_rate_tokenonly + dc_mode_cost +
+ cpi->common.bit_depth * k * vp10_cost_bit(128, 0) +
+ cpi->palette_y_size_cost[bsize - BLOCK_8X8][k - 2] +
+ write_uniform_cost(k, color_map[0]) +
+ vp10_cost_bit(
+ vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx],
+ 1);
+ for (i = 0; i < rows; ++i) {
+ for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
+ color_ctx = vp10_get_palette_color_context(color_map, cols, i, j, k,
+ color_order);
+ for (r = 0; r < k; ++r)
+ if (color_map[i * cols + j] == color_order[r]) {
+ color_idx = r;
+ break;
+ }
+ assert(color_idx >= 0 && color_idx < k);
+ this_rate += cpi->palette_y_color_cost[k - 2][color_ctx][color_idx];
+ }
+ }
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ *palette_mode_info = *pmi;
+ memcpy(best_palette_color_map, color_map,
+ rows * cols * sizeof(color_map[0]));
+ *mode_selected = DC_PRED;
+ *best_tx = mbmi->tx_size;
+ *best_tx_type = mbmi->tx_type;
+ rate_overhead = this_rate - this_rate_tokenonly;
+ }
+ }
+ }
+ return rate_overhead;
+}
+
+static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, int row,
+ int col, PREDICTION_MODE *best_mode,
+ const int *bmode_costs, ENTROPY_CONTEXT *a,
+ ENTROPY_CONTEXT *l, int *bestrate,
+ int *bestratey, int64_t *bestdistortion,
+ BLOCK_SIZE bsize, int64_t rd_thresh) {
+ PREDICTION_MODE mode;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int64_t best_rd = rd_thresh;
+ struct macroblock_plane *p = &x->plane[0];
+ struct macroblockd_plane *pd = &xd->plane[0];
+ const int src_stride = p->src.stride;
+ const int dst_stride = pd->dst.stride;
+ const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
+ uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
+ ENTROPY_CONTEXT ta[2], tempa[2];
+ ENTROPY_CONTEXT tl[2], templ[2];
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ uint8_t best_dst[8 * 8];
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint16_t best_dst16[8 * 8];
+#endif
+
+ memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
+ memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
+ xd->mi[0]->mbmi.tx_size = TX_4X4;
+ xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
+ int64_t this_rd;
+ int ratey = 0;
+ int64_t distortion = 0;
+ int rate = bmode_costs[mode];
+
+ if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
+
+ // Only do the oblique modes if the best so far is
+ // one of the neighboring directional modes
+ if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
+ if (conditional_skipintra(mode, *best_mode)) continue;
+ }
+
+ memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
+ memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
+
+ for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
+ for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
+ const int block = (row + idy) * 2 + (col + idx);
+ const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
+ uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
+ int16_t *const src_diff =
+ vp10_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
+ xd->mi[0]->bmi[block].as_mode = mode;
+ vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride, dst,
+ dst_stride, col + idx, row + idy, 0);
+ vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst,
+ dst_stride, xd->bd);
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+#if CONFIG_VAR_TX | CONFIG_NEW_QUANT
+ const int coeff_ctx =
+ combine_entropy_contexts(*(tempa + idx), *(templ + idy));
+#endif // CONFIG_VAR_TX | CONFIG_NEW_QUANT
+#if CONFIG_NEW_QUANT
+ vp10_xform_quant_fp_nuq(x, 0, block, row + idy, col + idx,
+ BLOCK_8X8, TX_4X4, coeff_ctx);
+#else
+ vp10_xform_quant(x, 0, block, row + idy, col + idx, BLOCK_8X8,
+ TX_4X4, VP10_XFORM_QUANT_FP);
+#endif // CONFIG_NEW_QUANT
+#if CONFIG_VAR_TX
+ ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
+ so->neighbors, cpi->sf.use_fast_coef_costing);
+ *(tempa + idx) = !(p->eobs[block] == 0);
+ *(templ + idy) = !(p->eobs[block] == 0);
+#else
+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
+ so->scan, so->neighbors,
+ cpi->sf.use_fast_coef_costing);
+#endif // CONFIG_VAR_TX
+ if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
+ goto next_highbd;
+ vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
+ dst_stride, p->eobs[block], xd->bd,
+ DCT_DCT, 1);
+ } else {
+ int64_t dist;
+ unsigned int tmp;
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+ const int coeff_ctx =
+ combine_entropy_contexts(*(tempa + idx), *(templ + idy));
+#if CONFIG_NEW_QUANT
+ vp10_xform_quant_fp_nuq(x, 0, block, row + idy, col + idx,
+ BLOCK_8X8, TX_4X4, coeff_ctx);
+#else
+ vp10_xform_quant(x, 0, block, row + idy, col + idx, BLOCK_8X8,
+ TX_4X4, VP10_XFORM_QUANT_FP);
+#endif // CONFIG_NEW_QUANT
+ vp10_optimize_b(x, 0, block, TX_4X4, coeff_ctx);
+#if CONFIG_VAR_TX
+ ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
+ so->neighbors, cpi->sf.use_fast_coef_costing);
+ *(tempa + idx) = !(p->eobs[block] == 0);
+ *(templ + idy) = !(p->eobs[block] == 0);
+#else
+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
+ so->scan, so->neighbors,
+ cpi->sf.use_fast_coef_costing);
+#endif // CONFIG_VAR_TX
+ vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
+ dst_stride, p->eobs[block], xd->bd,
+ tx_type, 0);
+ cpi->fn_ptr[BLOCK_4X4].vf(src, src_stride, dst, dst_stride, &tmp);
+ dist = (int64_t)tmp << 4;
+ distortion += dist;
+ if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
+ goto next_highbd;
+ }
+ }
+ }
+
+ rate += ratey;
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+
+ if (this_rd < best_rd) {
+ *bestrate = rate;
+ *bestratey = ratey;
+ *bestdistortion = distortion;
+ best_rd = this_rd;
+ *best_mode = mode;
+ memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
+ memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
+ for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
+ memcpy(best_dst16 + idy * 8,
+ CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
+ num_4x4_blocks_wide * 4 * sizeof(uint16_t));
+ }
+ }
+ next_highbd : {}
+ }
+
+ if (best_rd >= rd_thresh) return best_rd;
+
+ for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
+ memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
+ best_dst16 + idy * 8, num_4x4_blocks_wide * 4 * sizeof(uint16_t));
+ }
+
+ return best_rd;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
+ int64_t this_rd;
+ int ratey = 0;
+ int64_t distortion = 0;
+ int rate = bmode_costs[mode];
+
+ if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
+
+ // Only do the oblique modes if the best so far is
+ // one of the neighboring directional modes
+ if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
+ if (conditional_skipintra(mode, *best_mode)) continue;
+ }
+
+ memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
+ memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
+
+ for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
+ for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
+ const int block = (row + idy) * 2 + (col + idx);
+ const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
+ uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
+ int16_t *const src_diff =
+ vp10_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
+ xd->mi[0]->bmi[block].as_mode = mode;
+ vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride, dst,
+ dst_stride, col + idx, row + idy, 0);
+ vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
+
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+#if CONFIG_VAR_TX | CONFIG_NEW_QUANT
+ const int coeff_ctx =
+ combine_entropy_contexts(*(tempa + idx), *(templ + idy));
+#endif // CONFIG_VAR_TX | CONFIG_NEW_QUANT
+#if CONFIG_NEW_QUANT
+ vp10_xform_quant_fp_nuq(x, 0, block, row + idy, col + idx, BLOCK_8X8,
+ TX_4X4, coeff_ctx);
+#else
+ vp10_xform_quant(x, 0, block, row + idy, col + idx, BLOCK_8X8, TX_4X4,
+ VP10_XFORM_QUANT_B);
+#endif // CONFIG_NEW_QUANT
+#if CONFIG_VAR_TX
+ ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
+ so->neighbors, cpi->sf.use_fast_coef_costing);
+ *(tempa + idx) = !(p->eobs[block] == 0);
+ *(templ + idy) = !(p->eobs[block] == 0);
+#else
+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
+ so->scan, so->neighbors,
+ cpi->sf.use_fast_coef_costing);
+#endif
+ if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
+ goto next;
+ vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
+ dst_stride, p->eobs[block], DCT_DCT, 1);
+ } else {
+ int64_t dist;
+ unsigned int tmp;
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4);
+ const scan_order *so = get_scan(TX_4X4, tx_type, 0);
+ const int coeff_ctx =
+ combine_entropy_contexts(*(tempa + idx), *(templ + idy));
+#if CONFIG_NEW_QUANT
+ vp10_xform_quant_fp_nuq(x, 0, block, row + idy, col + idx, BLOCK_8X8,
+ TX_4X4, coeff_ctx);
+#else
+ vp10_xform_quant(x, 0, block, row + idy, col + idx, BLOCK_8X8, TX_4X4,
+ VP10_XFORM_QUANT_FP);
+#endif // CONFIG_NEW_QUANT
+ vp10_optimize_b(x, 0, block, TX_4X4, coeff_ctx);
+#if CONFIG_VAR_TX
+ ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan,
+ so->neighbors, cpi->sf.use_fast_coef_costing);
+ *(tempa + idx) = !(p->eobs[block] == 0);
+ *(templ + idy) = !(p->eobs[block] == 0);
+#else
+ ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
+ so->scan, so->neighbors,
+ cpi->sf.use_fast_coef_costing);
+#endif
+ vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
+ dst_stride, p->eobs[block], tx_type, 0);
+ cpi->fn_ptr[BLOCK_4X4].vf(src, src_stride, dst, dst_stride, &tmp);
+ dist = (int64_t)tmp << 4;
+ distortion += dist;
+ // To use the pixel domain distortion, the step below needs to be
+ // put behind the inv txfm. Compared to calculating the distortion
+ // in the frequency domain, the overhead of encoding effort is low.
+ if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
+ goto next;
+ }
+ }
+ }
+
+ rate += ratey;
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
+
+ if (this_rd < best_rd) {
+ *bestrate = rate;
+ *bestratey = ratey;
+ *bestdistortion = distortion;
+ best_rd = this_rd;
+ *best_mode = mode;
+ memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
+ memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
+ for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
+ memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
+ num_4x4_blocks_wide * 4);
+ }
+ next : {}
+ }
+
+ if (best_rd >= rd_thresh) return best_rd;
+
+ for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
+ memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
+ num_4x4_blocks_wide * 4);
+
+ return best_rd;
+}
+
+static int64_t rd_pick_intra_sub_8x8_y_mode(VP10_COMP *cpi, MACROBLOCK *mb,
+ int *rate, int *rate_y,
+ int64_t *distortion,
+ int64_t best_rd) {
+ int i, j;
+ const MACROBLOCKD *const xd = &mb->e_mbd;
+ MODE_INFO *const mic = xd->mi[0];
+ const MODE_INFO *above_mi = xd->above_mi;
+ const MODE_INFO *left_mi = xd->left_mi;
+ const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ int idx, idy;
+ int cost = 0;
+ int64_t total_distortion = 0;
+ int tot_rate_y = 0;
+ int64_t total_rd = 0;
+ const int *bmode_costs = cpi->mbmode_cost[0];
+
+#if CONFIG_EXT_INTRA
+ mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mic->mbmi.intra_filter = INTRA_FILTER_LINEAR;
+#endif // CONFIG_EXT_INTRA
+
+ // TODO(any): Add search of the tx_type to improve rd performance at the
+ // expense of speed.
+ mic->mbmi.tx_type = DCT_DCT;
+ mic->mbmi.tx_size = TX_4X4;
+
+ // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
+ for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
+ for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
+ PREDICTION_MODE best_mode = DC_PRED;
+ int r = INT_MAX, ry = INT_MAX;
+ int64_t d = INT64_MAX, this_rd = INT64_MAX;
+ i = idy * 2 + idx;
+ if (cpi->common.frame_type == KEY_FRAME) {
+ const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, i);
+ const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, i);
+
+ bmode_costs = cpi->y_mode_costs[A][L];
+ }
+
+ this_rd = rd_pick_intra4x4block(
+ cpi, mb, idy, idx, &best_mode, bmode_costs,
+ xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
+ &ry, &d, bsize, best_rd - total_rd);
+ if (this_rd >= best_rd - total_rd) return INT64_MAX;
+
+ total_rd += this_rd;
+ cost += r;
+ total_distortion += d;
+ tot_rate_y += ry;
+
+ mic->bmi[i].as_mode = best_mode;
+ for (j = 1; j < num_4x4_blocks_high; ++j)
+ mic->bmi[i + j * 2].as_mode = best_mode;
+ for (j = 1; j < num_4x4_blocks_wide; ++j)
+ mic->bmi[i + j].as_mode = best_mode;
+
+ if (total_rd >= best_rd) return INT64_MAX;
+ }
+ }
+ mic->mbmi.mode = mic->bmi[3].as_mode;
+
+ // Add in the cost of the transform type
+ if (!xd->lossless[mic->mbmi.segment_id]) {
+ int rate_tx_type = 0;
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(TX_4X4, bsize, 0) > 1) {
+ const int eset = get_ext_tx_set(TX_4X4, bsize, 0);
+ rate_tx_type =
+ cpi->intra_tx_type_costs[eset][TX_4X4][mic->mbmi.mode][mic->mbmi
+ .tx_type];
+ }
+#else
+ rate_tx_type = cpi->intra_tx_type_costs
+ [TX_4X4][intra_mode_to_tx_type_context[mic->mbmi.mode]]
+ [mic->mbmi.tx_type];
+#endif
+ assert(mic->mbmi.tx_size == TX_4X4);
+ cost += rate_tx_type;
+ tot_rate_y += rate_tx_type;
+ }
+
+ *rate = cost;
+ *rate_y = tot_rate_y;
+ *distortion = total_distortion;
+
+ return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
+}
+
+#if CONFIG_EXT_INTRA
+// Return 1 if an ext intra mode is selected; return 0 otherwise.
+static int rd_pick_ext_intra_sby(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int *rate_tokenonly, int64_t *distortion,
+ int *skippable, BLOCK_SIZE bsize,
+ int mode_cost, int64_t *best_rd,
+ uint16_t skip_mask) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mic = xd->mi[0];
+ MB_MODE_INFO *mbmi = &mic->mbmi;
+ int this_rate, this_rate_tokenonly, s;
+ int ext_intra_selected_flag = 0;
+ int64_t this_distortion, this_rd;
+ EXT_INTRA_MODE mode;
+ TX_SIZE best_tx_size = TX_4X4;
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+ TX_TYPE best_tx_type;
+
+ vp10_zero(ext_intra_mode_info);
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 1;
+ mbmi->mode = DC_PRED;
+ mbmi->palette_mode_info.palette_size[0] = 0;
+
+ for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
+ if (skip_mask & (1 << mode)) continue;
+ mbmi->ext_intra_mode_info.ext_intra_mode[0] = mode;
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
+ bsize, *best_rd);
+ if (this_rate_tokenonly == INT_MAX) continue;
+
+ this_rate = this_rate_tokenonly +
+ vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 1) +
+ write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ best_tx_size = mic->mbmi.tx_size;
+ ext_intra_mode_info = mbmi->ext_intra_mode_info;
+ best_tx_type = mic->mbmi.tx_type;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ ext_intra_selected_flag = 1;
+ }
+ }
+
+ if (ext_intra_selected_flag) {
+ mbmi->mode = DC_PRED;
+ mbmi->tx_size = best_tx_size;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] =
+ ext_intra_mode_info.use_ext_intra_mode[0];
+ mbmi->ext_intra_mode_info.ext_intra_mode[0] =
+ ext_intra_mode_info.ext_intra_mode[0];
+ mbmi->tx_type = best_tx_type;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static void pick_intra_angle_routine_sby(
+ VP10_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable, int *best_angle_delta,
+ TX_SIZE *best_tx_size, TX_TYPE *best_tx_type, INTRA_FILTER *best_filter,
+ BLOCK_SIZE bsize, int rate_overhead, int64_t *best_rd) {
+ int this_rate, this_rate_tokenonly, s;
+ int64_t this_distortion, this_rd;
+ MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
+ bsize, *best_rd);
+ if (this_rate_tokenonly == INT_MAX) return;
+
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ *best_angle_delta = mbmi->angle_delta[0];
+ *best_tx_size = mbmi->tx_size;
+ *best_filter = mbmi->intra_filter;
+ *best_tx_type = mbmi->tx_type;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+}
+
+static int64_t rd_pick_intra_angle_sby(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int *rate_tokenonly, int64_t *distortion,
+ int *skippable, BLOCK_SIZE bsize,
+ int rate_overhead, int64_t best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mic = xd->mi[0];
+ MB_MODE_INFO *mbmi = &mic->mbmi;
+ int this_rate, this_rate_tokenonly, s;
+ int angle_delta, best_angle_delta = 0, p_angle;
+ const int intra_filter_ctx = vp10_get_pred_context_intra_interp(xd);
+ INTRA_FILTER filter, best_filter = INTRA_FILTER_LINEAR;
+ const double rd_adjust = 1.2;
+ int64_t this_distortion, this_rd;
+ TX_SIZE best_tx_size = mic->mbmi.tx_size;
+ TX_TYPE best_tx_type = mbmi->tx_type;
+
+ if (ANGLE_FAST_SEARCH) {
+ int deltas_level1[3] = { 0, -2, 2 };
+ int deltas_level2[3][2] = {
+ { -1, 1 }, { -3, -1 }, { 1, 3 },
+ };
+ const int level1 = 3, level2 = 2;
+ int i, j, best_i = -1;
+
+ for (i = 0; i < level1; ++i) {
+ mic->mbmi.angle_delta[0] = deltas_level1[i];
+ p_angle =
+ mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+ for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
+ int64_t tmp_best_rd;
+ if ((FILTER_FAST_SEARCH || !vp10_is_intra_filter_switchable(p_angle)) &&
+ filter != INTRA_FILTER_LINEAR)
+ continue;
+ mic->mbmi.intra_filter = filter;
+ tmp_best_rd =
+ (i == 0 && filter == INTRA_FILTER_LINEAR && best_rd < INT64_MAX)
+ ? (int64_t)(best_rd * rd_adjust)
+ : best_rd;
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
+ NULL, bsize, tmp_best_rd);
+ if (this_rate_tokenonly == INT_MAX) {
+ if (i == 0 && filter == INTRA_FILTER_LINEAR)
+ return best_rd;
+ else
+ continue;
+ }
+ this_rate = this_rate_tokenonly + rate_overhead +
+ cpi->intra_filter_cost[intra_filter_ctx][filter];
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (i == 0 && filter == INTRA_FILTER_LINEAR && best_rd < INT64_MAX &&
+ this_rd > best_rd * rd_adjust)
+ return best_rd;
+ if (this_rd < best_rd) {
+ best_i = i;
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[0];
+ best_tx_size = mbmi->tx_size;
+ best_filter = mbmi->intra_filter;
+ best_tx_type = mbmi->tx_type;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+ }
+
+ if (best_i >= 0) {
+ for (j = 0; j < level2; ++j) {
+ mic->mbmi.angle_delta[0] = deltas_level2[best_i][j];
+ p_angle =
+ mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+ for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
+ mic->mbmi.intra_filter = filter;
+ if ((FILTER_FAST_SEARCH ||
+ !vp10_is_intra_filter_switchable(p_angle)) &&
+ filter != INTRA_FILTER_LINEAR)
+ continue;
+ pick_intra_angle_routine_sby(
+ cpi, x, rate, rate_tokenonly, distortion, skippable,
+ &best_angle_delta, &best_tx_size, &best_tx_type, &best_filter,
+ bsize,
+ rate_overhead + cpi->intra_filter_cost[intra_filter_ctx][filter],
+ &best_rd);
+ }
+ }
+ }
+ } else {
+ for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS;
+ ++angle_delta) {
+ mbmi->angle_delta[0] = angle_delta;
+ p_angle =
+ mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+ for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
+ mic->mbmi.intra_filter = filter;
+ if ((FILTER_FAST_SEARCH || !vp10_is_intra_filter_switchable(p_angle)) &&
+ filter != INTRA_FILTER_LINEAR)
+ continue;
+ pick_intra_angle_routine_sby(
+ cpi, x, rate, rate_tokenonly, distortion, skippable,
+ &best_angle_delta, &best_tx_size, &best_tx_type, &best_filter,
+ bsize,
+ rate_overhead + cpi->intra_filter_cost[intra_filter_ctx][filter],
+ &best_rd);
+ }
+ }
+ }
+
+ if (FILTER_FAST_SEARCH && *rate_tokenonly < INT_MAX) {
+ mbmi->angle_delta[0] = best_angle_delta;
+ p_angle = mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+ if (vp10_is_intra_filter_switchable(p_angle)) {
+ for (filter = INTRA_FILTER_LINEAR + 1; filter < INTRA_FILTERS; ++filter) {
+ mic->mbmi.intra_filter = filter;
+ pick_intra_angle_routine_sby(
+ cpi, x, rate, rate_tokenonly, distortion, skippable,
+ &best_angle_delta, &best_tx_size, &best_tx_type, &best_filter,
+ bsize,
+ rate_overhead + cpi->intra_filter_cost[intra_filter_ctx][filter],
+ &best_rd);
+ }
+ }
+ }
+
+ mbmi->tx_size = best_tx_size;
+ mbmi->angle_delta[0] = best_angle_delta;
+ mic->mbmi.intra_filter = best_filter;
+ mbmi->tx_type = best_tx_type;
+ return best_rd;
+}
+
+// Indices are sign, integer, and fractional part of the gradient value
+static const uint8_t gradient_to_angle_bin[2][7][16] = {
+ {
+ { 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0 },
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+ { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
+ { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
+ },
+ {
+ { 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4 },
+ { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3 },
+ { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
+ { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
+ { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
+ { 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
+ { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
+ },
+};
+
+static const uint8_t mode_to_angle_bin[INTRA_MODES] = {
+ 0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
+};
+
+static void angle_estimation(const uint8_t *src, int src_stride, int rows,
+ int cols, uint8_t *directional_mode_skip_mask) {
+ int i, r, c, index, dx, dy, temp, sn, remd, quot;
+ uint64_t hist[DIRECTIONAL_MODES];
+ uint64_t hist_sum = 0;
+
+ memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
+ src += src_stride;
+ for (r = 1; r < rows; ++r) {
+ for (c = 1; c < cols; ++c) {
+ dx = src[c] - src[c - 1];
+ dy = src[c] - src[c - src_stride];
+ temp = dx * dx + dy * dy;
+ if (dy == 0) {
+ index = 2;
+ } else {
+ sn = (dx > 0) ^ (dy > 0);
+ dx = abs(dx);
+ dy = abs(dy);
+ remd = dx % dy;
+ quot = dx / dy;
+ remd = remd * 16 / dy;
+ index = gradient_to_angle_bin[sn][VPXMIN(quot, 6)][VPXMIN(remd, 15)];
+ }
+ hist[index] += temp;
+ }
+ src += src_stride;
+ }
+
+ for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
+ for (i = 0; i < INTRA_MODES; ++i) {
+ if (i != DC_PRED && i != TM_PRED) {
+ int index = mode_to_angle_bin[i];
+ uint64_t score = 2 * hist[index];
+ int weight = 2;
+ if (index > 0) {
+ score += hist[index - 1];
+ weight += 1;
+ }
+ if (index < DIRECTIONAL_MODES - 1) {
+ score += hist[index + 1];
+ weight += 1;
+ }
+ if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
+ directional_mode_skip_mask[i] = 1;
+ }
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
+ int rows, int cols,
+ uint8_t *directional_mode_skip_mask) {
+ int i, r, c, index, dx, dy, temp, sn, remd, quot;
+ uint64_t hist[DIRECTIONAL_MODES];
+ uint64_t hist_sum = 0;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+
+ memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
+ src += src_stride;
+ for (r = 1; r < rows; ++r) {
+ for (c = 1; c < cols; ++c) {
+ dx = src[c] - src[c - 1];
+ dy = src[c] - src[c - src_stride];
+ temp = dx * dx + dy * dy;
+ if (dy == 0) {
+ index = 2;
+ } else {
+ sn = (dx > 0) ^ (dy > 0);
+ dx = abs(dx);
+ dy = abs(dy);
+ remd = dx % dy;
+ quot = dx / dy;
+ remd = remd * 16 / dy;
+ index = gradient_to_angle_bin[sn][VPXMIN(quot, 6)][VPXMIN(remd, 15)];
+ }
+ hist[index] += temp;
+ }
+ src += src_stride;
+ }
+
+ for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
+ for (i = 0; i < INTRA_MODES; ++i) {
+ if (i != DC_PRED && i != TM_PRED) {
+ int index = mode_to_angle_bin[i];
+ uint64_t score = 2 * hist[index];
+ int weight = 2;
+ if (index > 0) {
+ score += hist[index - 1];
+ weight += 1;
+ }
+ if (index < DIRECTIONAL_MODES - 1) {
+ score += hist[index + 1];
+ weight += 1;
+ }
+ if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
+ directional_mode_skip_mask[i] = 1;
+ }
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_INTRA
+
+// This function is used only for intra_only frames
+static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int *rate_tokenonly, int64_t *distortion,
+ int *skippable, BLOCK_SIZE bsize,
+ int64_t best_rd) {
+ uint8_t mode_idx;
+ PREDICTION_MODE mode_selected = DC_PRED;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mic = xd->mi[0];
+ int this_rate, this_rate_tokenonly, s;
+ int64_t this_distortion, this_rd;
+ TX_SIZE best_tx = TX_4X4;
+#if CONFIG_EXT_INTRA
+ const int intra_filter_ctx = vp10_get_pred_context_intra_interp(xd);
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+ int is_directional_mode, rate_overhead, best_angle_delta = 0;
+ INTRA_FILTER best_filter = INTRA_FILTER_LINEAR;
+ uint8_t directional_mode_skip_mask[INTRA_MODES];
+ uint16_t filter_intra_mode_skip_mask = (1 << FILTER_INTRA_MODES) - 1;
+ const int src_stride = x->plane[0].src.stride;
+ const uint8_t *src = x->plane[0].src.buf;
+ int beat_best_rd = 0;
+#endif // CONFIG_EXT_INTRA
+ TX_TYPE best_tx_type = DCT_DCT;
+ int *bmode_costs;
+ PALETTE_MODE_INFO palette_mode_info;
+ PALETTE_MODE_INFO *const pmi = &mic->mbmi.palette_mode_info;
+ uint8_t *best_palette_color_map =
+ cpi->common.allow_screen_content_tools
+ ? x->palette_buffer->best_palette_color_map
+ : NULL;
+ const int rows = 4 * num_4x4_blocks_high_lookup[bsize];
+ const int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
+ int palette_ctx = 0;
+ const MODE_INFO *above_mi = xd->above_mi;
+ const MODE_INFO *left_mi = xd->left_mi;
+ const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, 0);
+ const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0);
+ const PREDICTION_MODE FINAL_MODE_SEARCH = TM_PRED + 1;
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ bmode_costs = cpi->y_mode_costs[A][L];
+
+#if CONFIG_EXT_INTRA
+ ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mic->mbmi.angle_delta[0] = 0;
+ memset(directional_mode_skip_mask, 0,
+ sizeof(directional_mode_skip_mask[0]) * INTRA_MODES);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ highbd_angle_estimation(src, src_stride, rows, cols,
+ directional_mode_skip_mask);
+ else
+#endif
+ angle_estimation(src, src_stride, rows, cols, directional_mode_skip_mask);
+#endif // CONFIG_EXT_INTRA
+ palette_mode_info.palette_size[0] = 0;
+ pmi->palette_size[0] = 0;
+ if (above_mi)
+ palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ if (left_mi)
+ palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+
+ if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
+ x->use_default_intra_tx_type = 1;
+ else
+ x->use_default_intra_tx_type = 0;
+
+ /* Y Search for intra prediction mode */
+ for (mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) {
+ if (mode_idx == FINAL_MODE_SEARCH) {
+ if (x->use_default_intra_tx_type == 0) break;
+ mic->mbmi.mode = mode_selected;
+ x->use_default_intra_tx_type = 0;
+ } else {
+ mic->mbmi.mode = mode_idx;
+ }
+#if CONFIG_EXT_INTRA
+ is_directional_mode =
+ (mic->mbmi.mode != DC_PRED && mic->mbmi.mode != TM_PRED);
+ if (is_directional_mode && directional_mode_skip_mask[mic->mbmi.mode])
+ continue;
+ if (is_directional_mode) {
+ rate_overhead = bmode_costs[mic->mbmi.mode] +
+ write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0);
+ this_rate_tokenonly = INT_MAX;
+ this_rd = rd_pick_intra_angle_sby(cpi, x, &this_rate,
+ &this_rate_tokenonly, &this_distortion,
+ &s, bsize, rate_overhead, best_rd);
+ } else {
+ mic->mbmi.angle_delta[0] = 0;
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
+ bsize, best_rd);
+ }
+#else
+ super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
+ bsize, best_rd);
+#endif // CONFIG_EXT_INTRA
+
+ if (this_rate_tokenonly == INT_MAX) continue;
+
+ this_rate = this_rate_tokenonly + bmode_costs[mic->mbmi.mode];
+
+ if (!xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ // super_block_yrd above includes the cost of the tx_size in the
+ // tokenonly rate, but for intra blocks, tx_size is always coded
+ // (prediction granularity), so we account for it in the full rate,
+ // not the tokenonly rate.
+ this_rate_tokenonly -=
+ cpi->tx_size_cost[max_tx_size -
+ TX_8X8][get_tx_size_context(xd)][mic->mbmi.tx_size];
+ }
+ if (cpi->common.allow_screen_content_tools && mic->mbmi.mode == DC_PRED)
+ this_rate += vp10_cost_bit(
+ vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx], 0);
+#if CONFIG_EXT_INTRA
+ if (mic->mbmi.mode == DC_PRED && ALLOW_FILTER_INTRA_MODES)
+ this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[0], 0);
+ if (is_directional_mode) {
+ int p_angle;
+ this_rate +=
+ write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mic->mbmi.angle_delta[0]);
+ p_angle = mode_to_angle_map[mic->mbmi.mode] +
+ mic->mbmi.angle_delta[0] * ANGLE_STEP;
+ if (vp10_is_intra_filter_switchable(p_angle))
+ this_rate +=
+ cpi->intra_filter_cost[intra_filter_ctx][mic->mbmi.intra_filter];
+ }
+#endif // CONFIG_EXT_INTRA
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+#if CONFIG_EXT_INTRA
+ if (best_rd == INT64_MAX || this_rd < (best_rd + (best_rd >> 4))) {
+ filter_intra_mode_skip_mask ^= (1 << mic->mbmi.mode);
+ }
+#endif // CONFIG_EXT_INTRA
+
+ if (this_rd < best_rd) {
+ mode_selected = mic->mbmi.mode;
+ best_rd = this_rd;
+ best_tx = mic->mbmi.tx_size;
+#if CONFIG_EXT_INTRA
+ best_angle_delta = mic->mbmi.angle_delta[0];
+ best_filter = mic->mbmi.intra_filter;
+ beat_best_rd = 1;
+#endif // CONFIG_EXT_INTRA
+ best_tx_type = mic->mbmi.tx_type;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+
+ if (cpi->common.allow_screen_content_tools)
+ rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED],
+ &palette_mode_info, best_palette_color_map,
+ &best_tx, &best_tx_type, &mode_selected,
+ &best_rd);
+
+#if CONFIG_EXT_INTRA
+ if (ALLOW_FILTER_INTRA_MODES && beat_best_rd) {
+ if (rd_pick_ext_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
+ skippable, bsize, bmode_costs[DC_PRED], &best_rd,
+ filter_intra_mode_skip_mask)) {
+ mode_selected = mic->mbmi.mode;
+ best_tx = mic->mbmi.tx_size;
+ ext_intra_mode_info = mic->mbmi.ext_intra_mode_info;
+ best_tx_type = mic->mbmi.tx_type;
+ }
+ }
+
+ mic->mbmi.ext_intra_mode_info.use_ext_intra_mode[0] =
+ ext_intra_mode_info.use_ext_intra_mode[0];
+ if (ext_intra_mode_info.use_ext_intra_mode[0]) {
+ mic->mbmi.ext_intra_mode_info.ext_intra_mode[0] =
+ ext_intra_mode_info.ext_intra_mode[0];
+ palette_mode_info.palette_size[0] = 0;
+ }
+#endif // CONFIG_EXT_INTRA
+
+ mic->mbmi.mode = mode_selected;
+ mic->mbmi.tx_size = best_tx;
+#if CONFIG_EXT_INTRA
+ mic->mbmi.angle_delta[0] = best_angle_delta;
+ mic->mbmi.intra_filter = best_filter;
+#endif // CONFIG_EXT_INTRA
+ mic->mbmi.tx_type = best_tx_type;
+ pmi->palette_size[0] = palette_mode_info.palette_size[0];
+ if (palette_mode_info.palette_size[0] > 0) {
+ memcpy(pmi->palette_colors, palette_mode_info.palette_colors,
+ PALETTE_MAX_SIZE * sizeof(palette_mode_info.palette_colors[0]));
+ memcpy(xd->plane[0].color_index_map, best_palette_color_map,
+ rows * cols * sizeof(best_palette_color_map[0]));
+ }
+
+ return best_rd;
+}
+
+#if CONFIG_VAR_TX
+void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
+ int blk_row, int blk_col, int plane, int block,
+ int plane_bsize, int coeff_ctx, int *rate,
+ int64_t *dist, int64_t *bsse, int *skip) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ int64_t tmp;
+ tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+ TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+ const scan_order *const scan_order =
+ get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
+
+ BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
+ int bh = 4 * num_4x4_blocks_wide_lookup[txm_bsize];
+ int src_stride = p->src.stride;
+ uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col];
+ uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]);
+ uint8_t *rec_buffer;
+#else
+ DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+#if CONFIG_EXT_TX
+ assert(tx_size < TX_SIZES);
+#endif // CONFIG_EXT_TX
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+#if CONFIG_NEW_QUANT
+ vp10_xform_quant_fp_nuq(x, plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, coeff_ctx);
+#else
+ vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
+ VP10_XFORM_QUANT_FP);
+#endif // CONFIG_NEW_QUANT
+
+ vp10_optimize_b(x, plane, block, tx_size, coeff_ctx);
+
+// TODO(any): Use dist_block to compute distortion
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer16);
+ vpx_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL,
+ 0, NULL, 0, bh, bh, xd->bd);
+ } else {
+ rec_buffer = (uint8_t *)rec_buffer16;
+ vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0,
+ NULL, 0, bh, bh);
+ }
+#else
+ vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0, NULL,
+ 0, bh, bh);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (blk_row + (bh >> 2) > max_blocks_high ||
+ blk_col + (bh >> 2) > max_blocks_wide) {
+ int idx, idy;
+ int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
+ int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
+ tmp = 0;
+ for (idy = 0; idy < blocks_height; idy += 2) {
+ for (idx = 0; idx < blocks_width; idx += 2) {
+ const int16_t *d = diff + 4 * idy * diff_stride + 4 * idx;
+ tmp += vpx_sum_squares_2d_i16(d, diff_stride, 8);
+ }
+ }
+ } else {
+ tmp = vpx_sum_squares_2d_i16(diff, diff_stride, bh);
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ *bsse += tmp * 16;
+
+ if (p->eobs[block] > 0) {
+ INV_TXFM_PARAM inv_txfm_param;
+ inv_txfm_param.tx_type = tx_type;
+ inv_txfm_param.tx_size = tx_size;
+ inv_txfm_param.eob = p->eobs[block];
+ inv_txfm_param.lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ inv_txfm_param.bd = xd->bd;
+ highbd_inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param);
+ } else {
+ inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param);
+ }
+#else // CONFIG_VP9_HIGHBITDEPTH
+ inv_txfm_add(dqcoeff, rec_buffer, MAX_TX_SIZE, &inv_txfm_param);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if ((bh >> 2) + blk_col > max_blocks_wide ||
+ (bh >> 2) + blk_row > max_blocks_high) {
+ int idx, idy;
+ unsigned int this_dist;
+ int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row);
+ int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col);
+ tmp = 0;
+ for (idy = 0; idy < blocks_height; idy += 2) {
+ for (idx = 0; idx < blocks_width; idx += 2) {
+ uint8_t *const s = src + 4 * idy * src_stride + 4 * idx;
+ uint8_t *const r = rec_buffer + 4 * idy * MAX_TX_SIZE + 4 * idx;
+ cpi->fn_ptr[BLOCK_8X8].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist);
+ tmp += this_dist;
+ }
+ }
+ } else {
+ uint32_t this_dist;
+ cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, MAX_TX_SIZE,
+ &this_dist);
+ tmp = this_dist;
+ }
+ }
+ *dist += tmp * 16;
+ *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size, scan_order->scan,
+ scan_order->neighbors, 0);
+ *skip &= (p->eobs[block] == 0);
+}
+
+static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x, int blk_row,
+ int blk_col, int plane, int block, TX_SIZE tx_size,
+ BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta,
+ ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above,
+ TXFM_CONTEXT *tx_left, int *rate, int64_t *dist,
+ int64_t *bsse, int *skip, int64_t ref_best_rd,
+ int *is_cost_valid) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ TX_SIZE (*const inter_tx_size)
+ [MAX_MIB_SIZE] =
+ (TX_SIZE(*)[MAX_MIB_SIZE]) & mbmi->inter_tx_size[tx_row][tx_col];
+ const int bw = num_4x4_blocks_wide_lookup[plane_bsize];
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = bw;
+ int64_t this_rd = INT64_MAX;
+ ENTROPY_CONTEXT *pta = ta + blk_col;
+ ENTROPY_CONTEXT *ptl = tl + blk_row;
+ ENTROPY_CONTEXT stxa = 0, stxl = 0;
+ int coeff_ctx, i;
+ int ctx = txfm_partition_context(tx_above + (blk_col >> 1),
+ tx_left + (blk_row >> 1), tx_size);
+
+ int64_t sum_dist = 0, sum_bsse = 0;
+ int64_t sum_rd = INT64_MAX;
+ int sum_rate = vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1);
+ int all_skip = 1;
+ int tmp_eob = 0;
+ int zero_blk_rate;
+
+#if CONFIG_EXT_TX
+ assert(tx_size < TX_SIZES);
+#endif // CONFIG_EXT_TX
+
+ if (ref_best_rd < 0) {
+ *is_cost_valid = 0;
+ return;
+ }
+
+ switch (tx_size) {
+ case TX_4X4:
+ stxa = pta[0];
+ stxl = ptl[0];
+ break;
+ case TX_8X8:
+ stxa = !!*(const uint16_t *)&pta[0];
+ stxl = !!*(const uint16_t *)&ptl[0];
+ break;
+ case TX_16X16:
+ stxa = !!*(const uint32_t *)&pta[0];
+ stxl = !!*(const uint32_t *)&ptl[0];
+ break;
+ case TX_32X32:
+ stxa = !!*(const uint64_t *)&pta[0];
+ stxl = !!*(const uint64_t *)&ptl[0];
+ break;
+ default: assert(0 && "Invalid transform size."); break;
+ }
+ coeff_ctx = combine_entropy_contexts(stxa, stxl);
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ *rate = 0;
+ *dist = 0;
+ *bsse = 0;
+ *skip = 1;
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+ zero_blk_rate =
+ x->token_costs[tx_size][pd->plane_type][1][0][0][coeff_ctx][EOB_TOKEN];
+
+ if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
+ inter_tx_size[0][0] = tx_size;
+ vp10_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
+ plane_bsize, coeff_ctx, rate, dist, bsse, skip);
+
+ if ((RDCOST(x->rdmult, x->rddiv, *rate, *dist) >=
+ RDCOST(x->rdmult, x->rddiv, zero_blk_rate, *bsse) ||
+ *skip == 1) &&
+ !xd->lossless[mbmi->segment_id]) {
+ *rate = zero_blk_rate;
+ *dist = *bsse;
+ *skip = 1;
+ x->blk_skip[plane][blk_row * bw + blk_col] = 1;
+ p->eobs[block] = 0;
+ } else {
+ x->blk_skip[plane][blk_row * bw + blk_col] = 0;
+ *skip = 0;
+ }
+
+ if (tx_size > TX_4X4)
+ *rate += vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
+ this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
+ tmp_eob = p->eobs[block];
+ }
+
+ if (tx_size > TX_4X4) {
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ int bsl = b_height_log2_lookup[bsize];
+ int sub_step = num_4x4_blocks_txsize_lookup[tx_size - 1];
+ int i;
+ int this_rate;
+ int64_t this_dist;
+ int64_t this_bsse;
+ int this_skip;
+ int this_cost_valid = 1;
+ int64_t tmp_rd = 0;
+
+#if CONFIG_EXT_TX
+ assert(tx_size < TX_SIZES);
+#endif // CONFIG_EXT_TX
+ --bsl;
+ for (i = 0; i < 4 && this_cost_valid; ++i) {
+ int offsetr = (i >> 1) << bsl;
+ int offsetc = (i & 0x01) << bsl;
+ select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
+ block + i * sub_step, tx_size - 1, plane_bsize, ta, tl,
+ tx_above, tx_left, &this_rate, &this_dist, &this_bsse,
+ &this_skip, ref_best_rd - tmp_rd, &this_cost_valid);
+ sum_rate += this_rate;
+ sum_dist += this_dist;
+ sum_bsse += this_bsse;
+ all_skip &= this_skip;
+ tmp_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ if (this_rd < tmp_rd) break;
+ }
+ if (this_cost_valid) sum_rd = tmp_rd;
+ }
+
+ if (this_rd < sum_rd) {
+ int idx, idy;
+ for (i = 0; i < num_4x4_blocks_wide_txsize_lookup[tx_size]; ++i)
+ pta[i] = !(tmp_eob == 0);
+ for (i = 0; i < num_4x4_blocks_high_txsize_lookup[tx_size]; ++i)
+ ptl[i] = !(tmp_eob == 0);
+ txfm_partition_update(tx_above + (blk_col >> 1), tx_left + (blk_row >> 1),
+ tx_size);
+ inter_tx_size[0][0] = tx_size;
+ for (idy = 0; idy < num_4x4_blocks_high_txsize_lookup[tx_size] / 2; ++idy)
+ for (idx = 0; idx < num_4x4_blocks_wide_txsize_lookup[tx_size] / 2; ++idx)
+ inter_tx_size[idy][idx] = tx_size;
+ mbmi->tx_size = tx_size;
+ if (this_rd == INT64_MAX) *is_cost_valid = 0;
+ x->blk_skip[plane][blk_row * bw + blk_col] = *skip;
+ } else {
+ *rate = sum_rate;
+ *dist = sum_dist;
+ *bsse = sum_bsse;
+ *skip = all_skip;
+ if (sum_rd == INT64_MAX) *is_cost_valid = 0;
+ }
+}
+
+static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skippable, int64_t *sse,
+ BLOCK_SIZE bsize, int64_t ref_best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int is_cost_valid = 1;
+ int64_t this_rd = 0;
+
+ if (ref_best_rd < 0) is_cost_valid = 0;
+
+ *rate = 0;
+ *distortion = 0;
+ *sse = 0;
+ *skippable = 1;
+
+ if (is_cost_valid) {
+ const struct macroblockd_plane *const pd = &xd->plane[0];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+ int block = 0;
+ int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
+ ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
+ ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
+ TXFM_CONTEXT tx_above[MAX_MIB_SIZE];
+ TXFM_CONTEXT tx_left[MAX_MIB_SIZE];
+
+ int pnrate = 0, pnskip = 1;
+ int64_t pndist = 0, pnsse = 0;
+
+ vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctxa, ctxl);
+ memcpy(tx_above, xd->above_txfm_context,
+ sizeof(TXFM_CONTEXT) * (mi_width >> 1));
+ memcpy(tx_left, xd->left_txfm_context,
+ sizeof(TXFM_CONTEXT) * (mi_height >> 1));
+
+ for (idy = 0; idy < mi_height; idy += bh) {
+ for (idx = 0; idx < mi_width; idx += bh) {
+ select_tx_block(cpi, x, idy, idx, 0, block,
+ max_txsize_lookup[plane_bsize], plane_bsize, ctxa, ctxl,
+ tx_above, tx_left, &pnrate, &pndist, &pnsse, &pnskip,
+ ref_best_rd - this_rd, &is_cost_valid);
+ *rate += pnrate;
+ *distortion += pndist;
+ *sse += pnsse;
+ *skippable &= pnskip;
+ this_rd += VPXMIN(RDCOST(x->rdmult, x->rddiv, pnrate, pndist),
+ RDCOST(x->rdmult, x->rddiv, 0, pnsse));
+ block += step;
+ }
+ }
+ }
+
+ this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
+ RDCOST(x->rdmult, x->rddiv, 0, *sse));
+ if (this_rd > ref_best_rd) is_cost_valid = 0;
+
+ if (!is_cost_valid) {
+ // reset cost value
+ *rate = INT_MAX;
+ *distortion = INT64_MAX;
+ *sse = INT64_MAX;
+ *skippable = 0;
+ }
+}
+
+static int64_t select_tx_size_fix_type(const VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int64_t *dist, int *skippable,
+ int64_t *sse, BLOCK_SIZE bsize,
+ int64_t ref_best_rd, TX_TYPE tx_type) {
+ const VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ const int is_inter = is_inter_block(mbmi);
+#if CONFIG_EXT_TX
+ int ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter);
+#endif // CONFIG_EXT_TX
+ vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
+ int s0 = vp10_cost_bit(skip_prob, 0);
+ int s1 = vp10_cost_bit(skip_prob, 1);
+ int64_t rd;
+
+ mbmi->tx_type = tx_type;
+ inter_block_yrd(cpi, x, rate, dist, skippable, sse, bsize, ref_best_rd);
+
+ if (*rate == INT_MAX) return INT64_MAX;
+
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(max_tx_size, bsize, is_inter) > 1 &&
+ !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ if (is_inter) {
+ if (ext_tx_set > 0)
+ *rate +=
+ cpi->inter_tx_type_costs[ext_tx_set][max_tx_size][mbmi->tx_type];
+ } else {
+ if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
+ *rate += cpi->intra_tx_type_costs[ext_tx_set][max_tx_size][mbmi->mode]
+ [mbmi->tx_type];
+ }
+ }
+#else // CONFIG_EXT_TX
+ if (max_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
+ if (is_inter)
+ *rate += cpi->inter_tx_type_costs[max_tx_size][mbmi->tx_type];
+ else
+ *rate += cpi->intra_tx_type_costs
+ [max_tx_size][intra_mode_to_tx_type_context[mbmi->mode]]
+ [mbmi->tx_type];
+ }
+#endif // CONFIG_EXT_TX
+
+ if (*skippable)
+ rd = RDCOST(x->rdmult, x->rddiv, s1, *sse);
+ else
+ rd = RDCOST(x->rdmult, x->rddiv, *rate + s0, *dist);
+
+ if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !(*skippable))
+ rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, *sse));
+
+ return rd;
+}
+
+static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skippable,
+ int64_t *sse, BLOCK_SIZE bsize,
+ int64_t ref_best_rd) {
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int64_t rd = INT64_MAX;
+ int64_t best_rd = INT64_MAX;
+ TX_TYPE tx_type, best_tx_type = DCT_DCT;
+ const int is_inter = is_inter_block(mbmi);
+ TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
+ TX_SIZE best_tx = TX_SIZES;
+ uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
+ const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
+ int idx, idy;
+ int prune = 0;
+#if CONFIG_EXT_TX
+ int ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter);
+#endif // CONFIG_EXT_TX
+
+ if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
+#if CONFIG_EXT_TX
+ prune = prune_tx_types(cpi, bsize, x, xd, ext_tx_set);
+#else
+ prune = prune_tx_types(cpi, bsize, x, xd, 0);
+#endif
+
+ *distortion = INT64_MAX;
+ *rate = INT_MAX;
+ *skippable = 0;
+ *sse = INT64_MAX;
+
+ for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
+ int this_rate = 0;
+ int this_skip = 1;
+ int64_t this_dist = 0;
+ int64_t this_sse = 0;
+#if CONFIG_EXT_TX
+ if (is_inter) {
+ if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
+ if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
+ if (!do_tx_type_search(tx_type, prune)) continue;
+ }
+ } else {
+ if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
+ if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
+ }
+ if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue;
+ }
+#else // CONFIG_EXT_TX
+ if (max_tx_size >= TX_32X32 && tx_type != DCT_DCT) continue;
+ if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
+ !do_tx_type_search(tx_type, prune))
+ continue;
+#endif // CONFIG_EXT_TX
+ if (is_inter && x->use_default_inter_tx_type &&
+ tx_type != get_default_tx_type(0, xd, 0, max_tx_size))
+ continue;
+
+ rd = select_tx_size_fix_type(cpi, x, &this_rate, &this_dist, &this_skip,
+ &this_sse, bsize, ref_best_rd, tx_type);
+
+ if (rd < best_rd) {
+ best_rd = rd;
+ *distortion = this_dist;
+ *rate = this_rate;
+ *skippable = this_skip;
+ *sse = this_sse;
+ best_tx_type = mbmi->tx_type;
+ best_tx = mbmi->tx_size;
+ memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
+ }
+ }
+
+ mbmi->tx_type = best_tx_type;
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ mbmi->inter_tx_size[idy][idx] = best_tx_size[idy][idx];
+ mbmi->tx_size = best_tx;
+ memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
+}
+
+static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x, int blk_row,
+ int blk_col, int plane, int block, TX_SIZE tx_size,
+ BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *above_ctx,
+ ENTROPY_CONTEXT *left_ctx, int *rate, int64_t *dist,
+ int64_t *bsse, int *skip) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ struct macroblock_plane *const p = &x->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ TX_SIZE plane_tx_size;
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+#if CONFIG_EXT_TX
+ assert(tx_size < TX_SIZES);
+#endif // CONFIG_EXT_TX
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+ plane_tx_size = plane ? get_uv_tx_size_impl(
+ mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0)
+ : mbmi->inter_tx_size[tx_row][tx_col];
+
+ if (tx_size == plane_tx_size) {
+ int coeff_ctx, i;
+ ENTROPY_CONTEXT *ta = above_ctx + blk_col;
+ ENTROPY_CONTEXT *tl = left_ctx + blk_row;
+ switch (tx_size) {
+ case TX_4X4: break;
+ case TX_8X8:
+ ta[0] = !!*(const uint16_t *)&ta[0];
+ tl[0] = !!*(const uint16_t *)&tl[0];
+ break;
+ case TX_16X16:
+ ta[0] = !!*(const uint32_t *)&ta[0];
+ tl[0] = !!*(const uint32_t *)&tl[0];
+ break;
+ case TX_32X32:
+ ta[0] = !!*(const uint64_t *)&ta[0];
+ tl[0] = !!*(const uint64_t *)&tl[0];
+ break;
+ default: assert(0 && "Invalid transform size."); break;
+ }
+ coeff_ctx = combine_entropy_contexts(ta[0], tl[0]);
+ vp10_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
+ plane_bsize, coeff_ctx, rate, dist, bsse, skip);
+ for (i = 0; i < num_4x4_blocks_wide_txsize_lookup[tx_size]; ++i)
+ ta[i] = !(p->eobs[block] == 0);
+ for (i = 0; i < num_4x4_blocks_high_txsize_lookup[tx_size]; ++i)
+ tl[i] = !(p->eobs[block] == 0);
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ int offsetr = (i >> 1) << bsl;
+ int offsetc = (i & 0x01) << bsl;
+ tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane,
+ block + i * step, tx_size - 1, plane_bsize, above_ctx,
+ left_ctx, rate, dist, bsse, skip);
+ }
+ }
+}
+
+// Return value 0: early termination triggered, no valid rd cost available;
+// 1: rd cost values are valid.
+static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skippable, int64_t *sse,
+ BLOCK_SIZE bsize, int64_t ref_best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ int plane;
+ int is_cost_valid = 1;
+ int64_t this_rd;
+
+ if (ref_best_rd < 0) is_cost_valid = 0;
+
+ if (is_inter_block(mbmi) && is_cost_valid) {
+ int plane;
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane)
+ vp10_subtract_plane(x, bsize, plane);
+ }
+
+ *rate = 0;
+ *distortion = 0;
+ *sse = 0;
+ *skippable = 1;
+
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+ int block = 0;
+ int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
+ int pnrate = 0, pnskip = 1;
+ int64_t pndist = 0, pnsse = 0;
+ ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
+ ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
+
+ vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl);
+
+ for (idy = 0; idy < mi_height; idy += bh) {
+ for (idx = 0; idx < mi_width; idx += bh) {
+ tx_block_rd(cpi, x, idy, idx, plane, block,
+ max_txsize_lookup[plane_bsize], plane_bsize, ta, tl,
+ &pnrate, &pndist, &pnsse, &pnskip);
+ block += step;
+ }
+ }
+
+ if (pnrate == INT_MAX) {
+ is_cost_valid = 0;
+ break;
+ }
+
+ *rate += pnrate;
+ *distortion += pndist;
+ *sse += pnsse;
+ *skippable &= pnskip;
+
+ this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion),
+ RDCOST(x->rdmult, x->rddiv, 0, *sse));
+
+ if (this_rd > ref_best_rd) {
+ is_cost_valid = 0;
+ break;
+ }
+ }
+
+ if (!is_cost_valid) {
+ // reset cost value
+ *rate = INT_MAX;
+ *distortion = INT64_MAX;
+ *sse = INT64_MAX;
+ *skippable = 0;
+ }
+
+ return is_cost_valid;
+}
+#endif // CONFIG_VAR_TX
+
+// Return value 0: early termination triggered, no valid rd cost available;
+// 1: rd cost values are valid.
+static int super_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int64_t *distortion, int *skippable, int64_t *sse,
+ BLOCK_SIZE bsize, int64_t ref_best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
+ int plane;
+ int pnrate = 0, pnskip = 1;
+ int64_t pndist = 0, pnsse = 0;
+ int is_cost_valid = 1;
+
+ if (ref_best_rd < 0) is_cost_valid = 0;
+
+ if (is_inter_block(mbmi) && is_cost_valid) {
+ int plane;
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane)
+ vp10_subtract_plane(x, bsize, plane);
+ }
+
+ *rate = 0;
+ *distortion = 0;
+ *sse = 0;
+ *skippable = 1;
+
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+ txfm_rd_in_plane(x, cpi, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd,
+ plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing);
+ if (pnrate == INT_MAX) {
+ is_cost_valid = 0;
+ break;
+ }
+ *rate += pnrate;
+ *distortion += pndist;
+ *sse += pnsse;
+ *skippable &= pnskip;
+ if (RDCOST(x->rdmult, x->rddiv, *rate, *distortion) > ref_best_rd &&
+ RDCOST(x->rdmult, x->rddiv, 0, *sse) > ref_best_rd) {
+ is_cost_valid = 0;
+ break;
+ }
+ }
+
+ if (!is_cost_valid) {
+ // reset cost value
+ *rate = INT_MAX;
+ *distortion = INT64_MAX;
+ *sse = INT64_MAX;
+ *skippable = 0;
+ }
+
+ return is_cost_valid;
+}
+
+static void rd_pick_palette_intra_sbuv(
+ VP10_COMP *cpi, MACROBLOCK *x, int dc_mode_cost,
+ PALETTE_MODE_INFO *palette_mode_info, uint8_t *best_palette_color_map,
+ PREDICTION_MODE *mode_selected, int64_t *best_rd, int *rate,
+ int *rate_tokenonly, int64_t *distortion, int *skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const int rows =
+ (4 * num_4x4_blocks_high_lookup[bsize]) >> (xd->plane[1].subsampling_y);
+ const int cols =
+ (4 * num_4x4_blocks_wide_lookup[bsize]) >> (xd->plane[1].subsampling_x);
+ int this_rate, this_rate_tokenonly, s;
+ int64_t this_distortion, this_rd;
+ int colors_u, colors_v, colors;
+ const int src_stride = x->plane[1].src.stride;
+ const uint8_t *const src_u = x->plane[1].src.buf;
+ const uint8_t *const src_v = x->plane[2].src.buf;
+
+ if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return;
+
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->common.use_highbitdepth) {
+ colors_u = vp10_count_colors_highbd(src_u, src_stride, rows, cols,
+ cpi->common.bit_depth);
+ colors_v = vp10_count_colors_highbd(src_v, src_stride, rows, cols,
+ cpi->common.bit_depth);
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ colors_u = vp10_count_colors(src_u, src_stride, rows, cols);
+ colors_v = vp10_count_colors(src_v, src_stride, rows, cols);
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ colors = colors_u > colors_v ? colors_u : colors_v;
+ if (colors > 1 && colors <= 64) {
+ int r, c, n, i, j;
+ const int max_itr = 50;
+ int color_ctx, color_idx = 0;
+ int color_order[PALETTE_MAX_SIZE];
+ int64_t this_sse;
+ float lb_u, ub_u, val_u;
+ float lb_v, ub_v, val_v;
+ float *const data = x->palette_buffer->kmeans_data_buf;
+ float centroids[2 * PALETTE_MAX_SIZE];
+ uint8_t *const color_map = xd->plane[1].color_index_map;
+ PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u);
+ uint16_t *src_v16 = CONVERT_TO_SHORTPTR(src_v);
+ if (cpi->common.use_highbitdepth) {
+ lb_u = src_u16[0];
+ ub_u = src_u16[0];
+ lb_v = src_v16[0];
+ ub_v = src_v16[0];
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ lb_u = src_u[0];
+ ub_u = src_u[0];
+ lb_v = src_v[0];
+ ub_v = src_v[0];
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ mbmi->uv_mode = DC_PRED;
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
+ for (r = 0; r < rows; ++r) {
+ for (c = 0; c < cols; ++c) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->common.use_highbitdepth) {
+ val_u = src_u16[r * src_stride + c];
+ val_v = src_v16[r * src_stride + c];
+ data[(r * cols + c) * 2] = val_u;
+ data[(r * cols + c) * 2 + 1] = val_v;
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ val_u = src_u[r * src_stride + c];
+ val_v = src_v[r * src_stride + c];
+ data[(r * cols + c) * 2] = val_u;
+ data[(r * cols + c) * 2 + 1] = val_v;
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ if (val_u < lb_u)
+ lb_u = val_u;
+ else if (val_u > ub_u)
+ ub_u = val_u;
+ if (val_v < lb_v)
+ lb_v = val_v;
+ else if (val_v > ub_v)
+ ub_v = val_v;
+ }
+ }
+
+ for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
+ --n) {
+ for (i = 0; i < n; ++i) {
+ centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2;
+ centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;
+ }
+ vp10_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
+ pmi->palette_size[1] = n;
+ for (i = 1; i < 3; ++i) {
+ for (j = 0; j < n; ++j) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->common.use_highbitdepth)
+ pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = clip_pixel_highbd(
+ (int)centroids[j * 2 + i - 1], cpi->common.bit_depth);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
+ clip_pixel((int)centroids[j * 2 + i - 1]);
+ }
+ }
+
+ super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
+ &this_sse, bsize, *best_rd);
+ if (this_rate_tokenonly == INT_MAX) continue;
+ this_rate =
+ this_rate_tokenonly + dc_mode_cost +
+ 2 * cpi->common.bit_depth * n * vp10_cost_bit(128, 0) +
+ cpi->palette_uv_size_cost[bsize - BLOCK_8X8][n - 2] +
+ write_uniform_cost(n, color_map[0]) +
+ vp10_cost_bit(
+ vp10_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 1);
+
+ for (i = 0; i < rows; ++i) {
+ for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
+ color_ctx = vp10_get_palette_color_context(color_map, cols, i, j, n,
+ color_order);
+ for (r = 0; r < n; ++r)
+ if (color_map[i * cols + j] == color_order[r]) {
+ color_idx = r;
+ break;
+ }
+ assert(color_idx >= 0 && color_idx < n);
+ this_rate += cpi->palette_uv_color_cost[n - 2][color_ctx][color_idx];
+ }
+ }
+
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ *palette_mode_info = *pmi;
+ memcpy(best_palette_color_map, color_map,
+ rows * cols * sizeof(best_palette_color_map[0]));
+ *mode_selected = DC_PRED;
+ *rate = this_rate;
+ *distortion = this_distortion;
+ *rate_tokenonly = this_rate_tokenonly;
+ *skippable = s;
+ }
+ }
+ }
+}
+
+#if CONFIG_EXT_INTRA
+// Return 1 if an ext intra mode is selected; return 0 otherwise.
+static int rd_pick_ext_intra_sbuv(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int *rate_tokenonly, int64_t *distortion,
+ int *skippable, BLOCK_SIZE bsize,
+ int64_t *best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ int ext_intra_selected_flag = 0;
+ int this_rate_tokenonly, this_rate, s;
+ int64_t this_distortion, this_sse, this_rd;
+ EXT_INTRA_MODE mode;
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+
+ vp10_zero(ext_intra_mode_info);
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 1;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->palette_mode_info.palette_size[1] = 0;
+
+ for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] = mode;
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
+ &this_sse, bsize, *best_rd))
+ continue;
+
+ this_rate = this_rate_tokenonly +
+ vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 1) +
+ cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
+ write_uniform_cost(FILTER_INTRA_MODES, mode);
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ ext_intra_mode_info = mbmi->ext_intra_mode_info;
+ ext_intra_selected_flag = 1;
+ }
+ }
+
+ if (ext_intra_selected_flag) {
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ ext_intra_mode_info.use_ext_intra_mode[1];
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ ext_intra_mode_info.ext_intra_mode[1];
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static void pick_intra_angle_routine_sbuv(VP10_COMP *cpi, MACROBLOCK *x,
+ int *rate, int *rate_tokenonly,
+ int64_t *distortion, int *skippable,
+ int *best_angle_delta,
+ BLOCK_SIZE bsize, int rate_overhead,
+ int64_t *best_rd) {
+ MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
+ int this_rate_tokenonly, this_rate, s;
+ int64_t this_distortion, this_sse, this_rd;
+
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
+ &this_sse, bsize, *best_rd))
+ return;
+
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (this_rd < *best_rd) {
+ *best_rd = this_rd;
+ *best_angle_delta = mbmi->angle_delta[1];
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+}
+
+static int rd_pick_intra_angle_sbuv(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int *rate_tokenonly, int64_t *distortion,
+ int *skippable, BLOCK_SIZE bsize,
+ int rate_overhead, int64_t best_rd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ int this_rate_tokenonly, this_rate, s;
+ int64_t this_distortion, this_sse, this_rd;
+ int angle_delta, best_angle_delta = 0;
+ const double rd_adjust = 1.2;
+
+ *rate_tokenonly = INT_MAX;
+ if (ANGLE_FAST_SEARCH) {
+ int deltas_level1[3] = { 0, -2, 2 };
+ int deltas_level2[3][2] = {
+ { -1, 1 }, { -3, -1 }, { 1, 3 },
+ };
+ const int level1 = 3, level2 = 2;
+ int i, j, best_i = -1;
+
+ for (i = 0; i < level1; ++i) {
+ int64_t tmp_best_rd;
+ mbmi->angle_delta[1] = deltas_level1[i];
+ tmp_best_rd = (i == 0 && best_rd < INT64_MAX)
+ ? (int64_t)(best_rd * rd_adjust)
+ : best_rd;
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
+ &this_sse, bsize, tmp_best_rd)) {
+ if (i == 0)
+ break;
+ else
+ continue;
+ }
+ this_rate = this_rate_tokenonly + rate_overhead;
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+ if (i == 0 && best_rd < INT64_MAX && this_rd > best_rd * rd_adjust) break;
+ if (this_rd < best_rd) {
+ best_i = i;
+ best_rd = this_rd;
+ best_angle_delta = mbmi->angle_delta[1];
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+
+ if (best_i >= 0) {
+ for (j = 0; j < level2; ++j) {
+ mbmi->angle_delta[1] = deltas_level2[best_i][j];
+ pick_intra_angle_routine_sbuv(cpi, x, rate, rate_tokenonly, distortion,
+ skippable, &best_angle_delta, bsize,
+ rate_overhead, &best_rd);
+ }
+ }
+ } else {
+ for (angle_delta = -MAX_ANGLE_DELTAS; angle_delta <= MAX_ANGLE_DELTAS;
+ ++angle_delta) {
+ mbmi->angle_delta[1] = angle_delta;
+ pick_intra_angle_routine_sbuv(cpi, x, rate, rate_tokenonly, distortion,
+ skippable, &best_angle_delta, bsize,
+ rate_overhead, &best_rd);
+ }
+ }
+
+ mbmi->angle_delta[1] = best_angle_delta;
+ return *rate_tokenonly != INT_MAX;
+}
+#endif // CONFIG_EXT_INTRA
+
+static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int *rate_tokenonly, int64_t *distortion,
+ int *skippable, BLOCK_SIZE bsize,
+ TX_SIZE max_tx_size) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ PREDICTION_MODE mode;
+ PREDICTION_MODE mode_selected = DC_PRED;
+ int64_t best_rd = INT64_MAX, this_rd;
+ int this_rate_tokenonly, this_rate, s;
+ int64_t this_distortion, this_sse;
+ const int rows =
+ (4 * num_4x4_blocks_high_lookup[bsize]) >> (xd->plane[1].subsampling_y);
+ const int cols =
+ (4 * num_4x4_blocks_wide_lookup[bsize]) >> (xd->plane[1].subsampling_x);
+ PALETTE_MODE_INFO palette_mode_info;
+ PALETTE_MODE_INFO *const pmi = &xd->mi[0]->mbmi.palette_mode_info;
+ uint8_t *best_palette_color_map = NULL;
+#if CONFIG_EXT_INTRA
+ int is_directional_mode, rate_overhead, best_angle_delta = 0;
+ EXT_INTRA_MODE_INFO ext_intra_mode_info;
+
+ ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
+ palette_mode_info.palette_size[1] = 0;
+ pmi->palette_size[1] = 0;
+ for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
+ if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue;
+
+ mbmi->uv_mode = mode;
+#if CONFIG_EXT_INTRA
+ is_directional_mode = (mode != DC_PRED && mode != TM_PRED);
+ rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] +
+ write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0);
+ mbmi->angle_delta[1] = 0;
+ if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode) {
+ if (!rd_pick_intra_angle_sbuv(cpi, x, &this_rate, &this_rate_tokenonly,
+ &this_distortion, &s, bsize, rate_overhead,
+ best_rd))
+ continue;
+ } else {
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
+ &this_sse, bsize, best_rd))
+ continue;
+ }
+ this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mbmi->mode][mode];
+ if (mbmi->sb_type >= BLOCK_8X8 && is_directional_mode)
+ this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
+ if (mbmi->sb_type >= BLOCK_8X8 && mode == DC_PRED &&
+ ALLOW_FILTER_INTRA_MODES)
+ this_rate += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1], 0);
+#else
+ if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
+ &this_sse, bsize, best_rd))
+ continue;
+ this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mbmi->mode][mode];
+#endif // CONFIG_EXT_INTRA
+ if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8 &&
+ mode == DC_PRED)
+ this_rate += vp10_cost_bit(
+ vp10_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0);
+
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < best_rd) {
+ mode_selected = mode;
+#if CONFIG_EXT_INTRA
+ best_angle_delta = mbmi->angle_delta[1];
+#endif // CONFIG_EXT_INTRA
+ best_rd = this_rd;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ *skippable = s;
+ }
+ }
+
+ if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8) {
+ best_palette_color_map = x->palette_buffer->best_palette_color_map;
+ rd_pick_palette_intra_sbuv(
+ cpi, x, cpi->intra_uv_mode_cost[mbmi->mode][DC_PRED],
+ &palette_mode_info, best_palette_color_map, &mode_selected, &best_rd,
+ rate, rate_tokenonly, distortion, skippable);
+ }
+
+#if CONFIG_EXT_INTRA
+ if (mbmi->sb_type >= BLOCK_8X8 && ALLOW_FILTER_INTRA_MODES) {
+ if (rd_pick_ext_intra_sbuv(cpi, x, rate, rate_tokenonly, distortion,
+ skippable, bsize, &best_rd)) {
+ mode_selected = mbmi->uv_mode;
+ ext_intra_mode_info = mbmi->ext_intra_mode_info;
+ }
+ }
+
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ ext_intra_mode_info.use_ext_intra_mode[1];
+ if (ext_intra_mode_info.use_ext_intra_mode[1]) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ ext_intra_mode_info.ext_intra_mode[1];
+ palette_mode_info.palette_size[1] = 0;
+ }
+ mbmi->angle_delta[1] = best_angle_delta;
+#endif // CONFIG_EXT_INTRA
+ mbmi->uv_mode = mode_selected;
+ pmi->palette_size[1] = palette_mode_info.palette_size[1];
+ if (palette_mode_info.palette_size[1] > 0) {
+ memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
+ palette_mode_info.palette_colors + PALETTE_MAX_SIZE,
+ 2 * PALETTE_MAX_SIZE * sizeof(palette_mode_info.palette_colors[0]));
+ memcpy(xd->plane[1].color_index_map, best_palette_color_map,
+ rows * cols * sizeof(best_palette_color_map[0]));
+ }
+
+ return best_rd;
+}
+
+static int64_t rd_sbuv_dcpred(const VP10_COMP *cpi, MACROBLOCK *x, int *rate,
+ int *rate_tokenonly, int64_t *distortion,
+ int *skippable, BLOCK_SIZE bsize) {
+ int64_t unused;
+
+ x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
+ super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused,
+ bsize, INT64_MAX);
+ *rate = *rate_tokenonly +
+ cpi->intra_uv_mode_cost[x->e_mbd.mi[0]->mbmi.mode][DC_PRED];
+ return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
+}
+
+static void choose_intra_uv_mode(VP10_COMP *cpi, MACROBLOCK *const x,
+ PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
+ TX_SIZE max_tx_size, int *rate_uv,
+ int *rate_uv_tokenonly, int64_t *dist_uv,
+ int *skip_uv, PREDICTION_MODE *mode_uv) {
+ // Use an estimated rd for uv_intra based on DC_PRED if the
+ // appropriate speed flag is set.
+ if (cpi->sf.use_uv_intra_rd_estimate) {
+ rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
+ bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
+ // Else do a proper rd search for each possible transform size that may
+ // be considered in the main rd loop.
+ } else {
+ (void)ctx;
+ rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
+ skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
+ max_tx_size);
+ }
+ *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
+}
+
+static int cost_mv_ref(const VP10_COMP *cpi, PREDICTION_MODE mode,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ int is_compound,
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ int16_t mode_context) {
+#if CONFIG_REF_MV
+ int mode_cost = 0;
+#if CONFIG_EXT_INTER
+ int16_t mode_ctx =
+ is_compound ? mode_context : (mode_context & NEWMV_CTX_MASK);
+#else
+ int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
+#endif // CONFIG_EXT_INTER
+ int16_t is_all_zero_mv = mode_context & (1 << ALL_ZERO_FLAG_OFFSET);
+
+ assert(is_inter_mode(mode));
+
+#if CONFIG_EXT_INTER
+ if (is_compound) {
+ return cpi
+ ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
+ } else {
+ if (mode == NEWMV || mode == NEWFROMNEARMV) {
+#else
+ if (mode == NEWMV) {
+#endif // CONFIG_EXT_INTER
+ mode_cost = cpi->newmv_mode_cost[mode_ctx][0];
+#if CONFIG_EXT_INTER
+ if (!is_compound)
+ mode_cost += cpi->new2mv_mode_cost[mode == NEWFROMNEARMV];
+#endif // CONFIG_EXT_INTER
+ return mode_cost;
+ } else {
+ mode_cost = cpi->newmv_mode_cost[mode_ctx][1];
+ mode_ctx = (mode_context >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
+
+ if (is_all_zero_mv) return mode_cost;
+
+ if (mode == ZEROMV) {
+ mode_cost += cpi->zeromv_mode_cost[mode_ctx][0];
+ return mode_cost;
+ } else {
+ mode_cost += cpi->zeromv_mode_cost[mode_ctx][1];
+ mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
+
+ if (mode_context & (1 << SKIP_NEARESTMV_OFFSET)) mode_ctx = 6;
+ if (mode_context & (1 << SKIP_NEARMV_OFFSET)) mode_ctx = 7;
+ if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) mode_ctx = 8;
+
+ mode_cost += cpi->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
+ return mode_cost;
+ }
+ }
+#if CONFIG_EXT_INTER
+ }
+#endif // CONFIG_EXT_INTER
+#else
+ assert(is_inter_mode(mode));
+#if CONFIG_EXT_INTER
+ if (is_inter_compound_mode(mode)) {
+ return cpi
+ ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
+ } else {
+#endif // CONFIG_EXT_INTER
+ return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
+#if CONFIG_EXT_INTER
+ }
+#endif // CONFIG_EXT_INTER
+#endif
+}
+
+static int set_and_cost_bmi_mvs(
+ VP10_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, int i, PREDICTION_MODE mode,
+ int_mv this_mv[2], int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME],
+ int_mv seg_mvs[TOTAL_REFS_PER_FRAME],
+#if CONFIG_EXT_INTER
+ int_mv compound_seg_newmvs[2],
+#endif // CONFIG_EXT_INTER
+ int_mv *best_ref_mv[2], const int *mvjcost, int *mvcost[2]) {
+ MODE_INFO *const mic = xd->mi[0];
+ const MB_MODE_INFO *const mbmi = &mic->mbmi;
+ const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+ int thismvcost = 0;
+ int idx, idy;
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
+ const int is_compound = has_second_ref(mbmi);
+ int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
+
+ switch (mode) {
+ case NEWMV:
+#if CONFIG_EXT_INTER
+ case NEWFROMNEARMV:
+#endif // CONFIG_EXT_INTER
+ this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
+#if CONFIG_EXT_INTER
+ if (!cpi->common.allow_high_precision_mv ||
+ !vp10_use_mv_hp(&best_ref_mv[0]->as_mv))
+ lower_mv_precision(&this_mv[0].as_mv, 0);
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_REF_MV
+ for (idx = 0; idx < 1 + is_compound; ++idx) {
+ this_mv[idx] = seg_mvs[mbmi->ref_frame[idx]];
+ vp10_set_mvcost(x, mbmi->ref_frame[idx]);
+ thismvcost +=
+ vp10_mv_bit_cost(&this_mv[idx].as_mv, &best_ref_mv[idx]->as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT_SUB);
+ }
+ (void)mvjcost;
+ (void)mvcost;
+#else
+ thismvcost += vp10_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
+ mvjcost, mvcost, MV_COST_WEIGHT_SUB);
+#if !CONFIG_EXT_INTER
+ if (is_compound) {
+ this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
+ thismvcost +=
+ vp10_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv, mvjcost,
+ mvcost, MV_COST_WEIGHT_SUB);
+ }
+#endif // !CONFIG_EXT_INTER
+#endif
+ break;
+ case NEARMV:
+ case NEARESTMV:
+ this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
+ if (is_compound)
+ this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
+ break;
+ case ZEROMV:
+ this_mv[0].as_int = 0;
+ if (is_compound) this_mv[1].as_int = 0;
+ break;
+#if CONFIG_EXT_INTER
+ case NEW_NEWMV:
+ if (compound_seg_newmvs[0].as_int == INVALID_MV ||
+ compound_seg_newmvs[1].as_int == INVALID_MV) {
+ this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
+ this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
+ } else {
+ this_mv[0].as_int = compound_seg_newmvs[0].as_int;
+ this_mv[1].as_int = compound_seg_newmvs[1].as_int;
+ }
+ if (!cpi->common.allow_high_precision_mv ||
+ !vp10_use_mv_hp(&best_ref_mv[0]->as_mv))
+ lower_mv_precision(&this_mv[0].as_mv, 0);
+ if (!cpi->common.allow_high_precision_mv ||
+ !vp10_use_mv_hp(&best_ref_mv[1]->as_mv))
+ lower_mv_precision(&this_mv[1].as_mv, 0);
+ thismvcost += vp10_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
+ mvjcost, mvcost, MV_COST_WEIGHT_SUB);
+ thismvcost += vp10_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
+ mvjcost, mvcost, MV_COST_WEIGHT_SUB);
+ break;
+ case NEW_NEARMV:
+ case NEW_NEARESTMV:
+ this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
+ if (!cpi->common.allow_high_precision_mv ||
+ !vp10_use_mv_hp(&best_ref_mv[0]->as_mv))
+ lower_mv_precision(&this_mv[0].as_mv, 0);
+ thismvcost += vp10_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
+ mvjcost, mvcost, MV_COST_WEIGHT_SUB);
+ this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
+ break;
+ case NEAR_NEWMV:
+ case NEAREST_NEWMV:
+ this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
+ this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
+ if (!cpi->common.allow_high_precision_mv ||
+ !vp10_use_mv_hp(&best_ref_mv[1]->as_mv))
+ lower_mv_precision(&this_mv[1].as_mv, 0);
+ thismvcost += vp10_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
+ mvjcost, mvcost, MV_COST_WEIGHT_SUB);
+ break;
+ case NEAREST_NEARMV:
+ case NEAR_NEARESTMV:
+ case NEAREST_NEARESTMV:
+ case NEAR_NEARMV:
+ this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
+ this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
+ break;
+ case ZERO_ZEROMV:
+ this_mv[0].as_int = 0;
+ this_mv[1].as_int = 0;
+ break;
+#endif // CONFIG_EXT_INTER
+ default: break;
+ }
+
+ mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
+ if (is_compound) mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
+
+ mic->bmi[i].as_mode = mode;
+
+#if CONFIG_REF_MV
+ if (mode == NEWMV) {
+ mic->bmi[i].pred_mv_s8[0].as_int = best_ref_mv[0]->as_int;
+ if (is_compound) mic->bmi[i].pred_mv_s8[1].as_int = best_ref_mv[1]->as_int;
+ } else {
+ mic->bmi[i].pred_mv_s8[0].as_int = this_mv[0].as_int;
+ if (is_compound) mic->bmi[i].pred_mv_s8[1].as_int = this_mv[1].as_int;
+ }
+#endif
+
+ for (idy = 0; idy < num_4x4_blocks_high; ++idy)
+ for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
+ memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i]));
+
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (is_compound)
+ mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
+ else
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
+ mbmi->ref_frame, mbmi->sb_type, i);
+#endif
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ return cost_mv_ref(cpi, mode, is_compound, mode_ctx) + thismvcost;
+#else
+ return cost_mv_ref(cpi, mode, mode_ctx) + thismvcost;
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+}
+
+static int64_t encode_inter_mb_segment(VP10_COMP *cpi, MACROBLOCK *x,
+ int64_t best_yrd, int i, int *labelyrate,
+ int64_t *distortion, int64_t *sse,
+ ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
+ int ir, int ic, int mi_row, int mi_col) {
+ int k;
+ MACROBLOCKD *xd = &x->e_mbd;
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ struct macroblock_plane *const p = &x->plane[0];
+ MODE_INFO *const mi = xd->mi[0];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
+ const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
+ int idx, idy;
+ const uint8_t *const src =
+ &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
+ uint8_t *const dst =
+ &pd->dst.buf[vp10_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)];
+ int64_t thisdistortion = 0, thissse = 0;
+ int thisrate = 0;
+ TX_SIZE tx_size = mi->mbmi.tx_size;
+
+ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, tx_size);
+ const scan_order *so = get_scan(tx_size, tx_type, 1);
+ const int num_4x4_w = num_4x4_blocks_wide_txsize_lookup[tx_size];
+ const int num_4x4_h = num_4x4_blocks_high_txsize_lookup[tx_size];
+
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ assert(IMPLIES(xd->lossless[mi->mbmi.segment_id], tx_size == TX_4X4));
+ assert(IMPLIES(!xd->lossless[mi->mbmi.segment_id],
+ tx_size == max_txsize_rect_lookup[mi->mbmi.sb_type]));
+#else
+ assert(tx_size == TX_4X4);
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+ assert(tx_type == DCT_DCT);
+
+ vp10_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vpx_highbd_subtract_block(
+ height, width,
+ vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8, src,
+ p->src.stride, dst, pd->dst.stride, xd->bd);
+ } else {
+ vpx_subtract_block(height, width, vp10_raster_block_offset_int16(
+ BLOCK_8X8, i, p->src_diff),
+ 8, src, p->src.stride, dst, pd->dst.stride);
+ }
+#else
+ vpx_subtract_block(height, width,
+ vp10_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
+ 8, src, p->src.stride, dst, pd->dst.stride);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ k = i;
+ for (idy = 0; idy < height / 4; idy += num_4x4_h) {
+ for (idx = 0; idx < width / 4; idx += num_4x4_w) {
+ int64_t dist, ssz, rd, rd1, rd2;
+ int block;
+ int coeff_ctx;
+ k += (idy * 2 + idx);
+ if (tx_size == TX_4X4)
+ block = k;
+ else
+ block = (i ? 2 : 0);
+ coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)), *(tl + (k >> 1)));
+#if CONFIG_NEW_QUANT
+ vp10_xform_quant_fp_nuq(x, 0, block, idy + (i >> 1), idx + (i & 0x01),
+ BLOCK_8X8, tx_size, coeff_ctx);
+#else
+ vp10_xform_quant(x, 0, block, idy + (i >> 1), idx + (i & 0x01), BLOCK_8X8,
+ tx_size, VP10_XFORM_QUANT_FP);
+#endif // CONFIG_NEW_QUANT
+ if (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0)
+ vp10_optimize_b(x, 0, block, tx_size, coeff_ctx);
+ dist_block(cpi, x, 0, block, idy + (i >> 1), idx + (i & 0x1), tx_size,
+ &dist, &ssz);
+ thisdistortion += dist;
+ thissse += ssz;
+#if CONFIG_VAR_TX
+ thisrate += cost_coeffs(x, 0, block, coeff_ctx, tx_size, so->scan,
+ so->neighbors, cpi->sf.use_fast_coef_costing);
+ *(ta + (k & 1)) = !(p->eobs[block] == 0);
+ *(tl + (k >> 1)) = !(p->eobs[block] == 0);
+#else
+ thisrate +=
+ cost_coeffs(x, 0, block, ta + (k & 1), tl + (k >> 1), tx_size,
+ so->scan, so->neighbors, cpi->sf.use_fast_coef_costing);
+#if CONFIG_EXT_TX
+ if (tx_size == TX_8X4) {
+ *(ta + (k & 1) + 1) = *(ta + (k & 1));
+ }
+ if (tx_size == TX_4X8) {
+ *(tl + (k >> 1) + 1) = *(tl + (k >> 1));
+ }
+#endif // CONFIG_EXT_TX
+#endif // CONFIG_VAR_TX
+ rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion);
+ rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse);
+ rd = VPXMIN(rd1, rd2);
+ if (rd >= best_yrd) return INT64_MAX;
+ }
+ }
+
+ *distortion = thisdistortion;
+ *labelyrate = thisrate;
+ *sse = thissse;
+
+ return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
+}
+
+typedef struct {
+ int eobs;
+ int brate;
+ int byrate;
+ int64_t bdist;
+ int64_t bsse;
+ int64_t brdcost;
+ int_mv mvs[2];
+#if CONFIG_REF_MV
+ int_mv pred_mv[2];
+#endif
+#if CONFIG_EXT_INTER
+ int_mv ref_mv[2];
+#endif // CONFIG_EXT_INTER
+ ENTROPY_CONTEXT ta[2];
+ ENTROPY_CONTEXT tl[2];
+} SEG_RDSTAT;
+
+typedef struct {
+ int_mv *ref_mv[2];
+ int_mv mvp;
+
+ int64_t segment_rd;
+ int r;
+ int64_t d;
+ int64_t sse;
+ int segment_yrate;
+ PREDICTION_MODE modes[4];
+#if CONFIG_EXT_INTER
+ SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
+#else
+ SEG_RDSTAT rdstat[4][INTER_MODES];
+#endif // CONFIG_EXT_INTER
+ int mvthresh;
+} BEST_SEG_INFO;
+
+static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
+ return (mv->row >> 3) < x->mv_row_min || (mv->row >> 3) > x->mv_row_max ||
+ (mv->col >> 3) < x->mv_col_min || (mv->col >> 3) > x->mv_col_max;
+}
+
+static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
+ MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
+ struct macroblock_plane *const p = &x->plane[0];
+ struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
+
+ p->src.buf =
+ &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
+ assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
+ pd->pre[0].buf =
+ &pd->pre[0]
+ .buf[vp10_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)];
+ if (has_second_ref(mbmi))
+ pd->pre[1].buf =
+ &pd->pre[1]
+ .buf[vp10_raster_block_offset(BLOCK_8X8, i, pd->pre[1].stride)];
+}
+
+static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
+ struct buf_2d orig_pre[2]) {
+ MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
+ x->plane[0].src = orig_src;
+ x->e_mbd.plane[0].pre[0] = orig_pre[0];
+ if (has_second_ref(mbmi)) x->e_mbd.plane[0].pre[1] = orig_pre[1];
+}
+
+// Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
+// TODO(aconverse): Find out if this is still productive then clean up or remove
+static int check_best_zero_mv(
+ const VP10_COMP *cpi, const int16_t mode_context[TOTAL_REFS_PER_FRAME],
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME],
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode,
+ const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block) {
+
+#if !CONFIG_EXT_INTER
+ assert(ref_frames[1] != INTRA_FRAME); // Just sanity check
+#endif
+
+ if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
+ frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
+ (ref_frames[1] <= INTRA_FRAME ||
+ frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
+#if CONFIG_REF_MV
+ int16_t rfc =
+ vp10_mode_context_analyzer(mode_context, ref_frames, bsize, block);
+#else
+ int16_t rfc = mode_context[ref_frames[0]];
+#endif
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ int c1 = cost_mv_ref(cpi, NEARMV, ref_frames[1] > INTRA_FRAME, rfc);
+ int c2 = cost_mv_ref(cpi, NEARESTMV, ref_frames[1] > INTRA_FRAME, rfc);
+ int c3 = cost_mv_ref(cpi, ZEROMV, ref_frames[1] > INTRA_FRAME, rfc);
+#else
+ int c1 = cost_mv_ref(cpi, NEARMV, rfc);
+ int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
+ int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+
+#if !CONFIG_REF_MV
+ (void)bsize;
+ (void)block;
+#endif
+
+ if (this_mode == NEARMV) {
+ if (c1 > c3) return 0;
+ } else if (this_mode == NEARESTMV) {
+ if (c2 > c3) return 0;
+ } else {
+ assert(this_mode == ZEROMV);
+ if (ref_frames[1] <= INTRA_FRAME) {
+ if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
+ (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
+ return 0;
+ } else {
+ if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
+ frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
+ (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
+ frame_mv[NEARMV][ref_frames[1]].as_int == 0))
+ return 0;
+ }
+ }
+ }
+#if CONFIG_EXT_INTER
+ else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAREST_NEARMV ||
+ this_mode == NEAR_NEARESTMV || this_mode == NEAR_NEARMV ||
+ this_mode == ZERO_ZEROMV) &&
+ frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
+ frame_mv[this_mode][ref_frames[1]].as_int == 0) {
+#if CONFIG_REF_MV
+ int16_t rfc = compound_mode_context[ref_frames[0]];
+ int c1 = cost_mv_ref(cpi, NEAREST_NEARMV, 1, rfc);
+ int c2 = cost_mv_ref(cpi, NEAREST_NEARESTMV, 1, rfc);
+ int c3 = cost_mv_ref(cpi, ZERO_ZEROMV, 1, rfc);
+ int c4 = cost_mv_ref(cpi, NEAR_NEARESTMV, 1, rfc);
+ int c5 = cost_mv_ref(cpi, NEAR_NEARMV, 1, rfc);
+#else
+ int16_t rfc = mode_context[ref_frames[0]];
+ int c1 = cost_mv_ref(cpi, NEAREST_NEARMV, rfc);
+ int c2 = cost_mv_ref(cpi, NEAREST_NEARESTMV, rfc);
+ int c3 = cost_mv_ref(cpi, ZERO_ZEROMV, rfc);
+ int c4 = cost_mv_ref(cpi, NEAR_NEARESTMV, rfc);
+ int c5 = cost_mv_ref(cpi, NEAR_NEARMV, rfc);
+#endif
+
+ if (this_mode == NEAREST_NEARMV) {
+ if (c1 > c3) return 0;
+ } else if (this_mode == NEAREST_NEARESTMV) {
+ if (c2 > c3) return 0;
+ } else if (this_mode == NEAR_NEARESTMV) {
+ if (c4 > c3) return 0;
+ } else if (this_mode == NEAR_NEARMV) {
+ if (c5 > c3) return 0;
+ } else {
+ assert(this_mode == ZERO_ZEROMV);
+ if ((c3 >= c2 && frame_mv[NEAREST_NEARESTMV][ref_frames[0]].as_int == 0 &&
+ frame_mv[NEAREST_NEARESTMV][ref_frames[1]].as_int == 0) ||
+ (c3 >= c1 && frame_mv[NEAREST_NEARMV][ref_frames[0]].as_int == 0 &&
+ frame_mv[NEAREST_NEARMV][ref_frames[1]].as_int == 0) ||
+ (c3 >= c5 && frame_mv[NEAR_NEARMV][ref_frames[0]].as_int == 0 &&
+ frame_mv[NEAR_NEARMV][ref_frames[1]].as_int == 0) ||
+ (c3 >= c4 && frame_mv[NEAR_NEARESTMV][ref_frames[0]].as_int == 0 &&
+ frame_mv[NEAR_NEARESTMV][ref_frames[1]].as_int == 0))
+ return 0;
+ }
+ }
+#endif // CONFIG_EXT_INTER
+ return 1;
+}
+
+static void joint_motion_search(VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
+ int_mv *frame_mv, int mi_row, int mi_col,
+#if CONFIG_EXT_INTER
+ int_mv *ref_mv_sub8x8[2],
+#endif
+ int_mv single_newmv[TOTAL_REFS_PER_FRAME],
+ int *rate_mv, const int block) {
+ const VP10_COMMON *const cm = &cpi->common;
+ const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ const int refs[2] = { mbmi->ref_frame[0],
+ mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
+ int_mv ref_mv[2];
+ int ite, ref;
+#if CONFIG_DUAL_FILTER
+ INTERP_FILTER interp_filter[4] = {
+ mbmi->interp_filter[0], mbmi->interp_filter[1], mbmi->interp_filter[2],
+ mbmi->interp_filter[3],
+ };
+#else
+ const INTERP_FILTER interp_filter = mbmi->interp_filter;
+#endif
+ struct scale_factors sf;
+
+ // Do joint motion search in compound mode to get more accurate mv.
+ struct buf_2d backup_yv12[2][MAX_MB_PLANE];
+ int last_besterr[2] = { INT_MAX, INT_MAX };
+ const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
+ vp10_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
+ vp10_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
+ };
+
+// Prediction buffer from second frame.
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
+ uint8_t *second_pred;
+#else
+ DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ for (ref = 0; ref < 2; ++ref) {
+#if CONFIG_EXT_INTER
+ if (bsize < BLOCK_8X8 && ref_mv_sub8x8 != NULL)
+ ref_mv[ref].as_int = ref_mv_sub8x8[ref]->as_int;
+ else
+#endif // CONFIG_EXT_INTER
+ ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
+
+ if (scaled_ref_frame[ref]) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[ref][i] = xd->plane[i].pre[ref];
+ vp10_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
+ NULL);
+ }
+
+ frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
+ }
+
+// Since we have scaled the reference frames to match the size of the current
+// frame we must use a unit scaling factor during mode selection.
+#if CONFIG_VP9_HIGHBITDEPTH
+ vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
+ cm->height, cm->use_highbitdepth);
+#else
+ vp10_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
+ cm->height);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // Allow joint search multiple times iteratively for each reference frame
+ // and break out of the search loop if it couldn't find a better mv.
+ for (ite = 0; ite < 4; ite++) {
+ struct buf_2d ref_yv12[2];
+ int bestsme = INT_MAX;
+ int sadpb = x->sadperbit16;
+ MV *const best_mv = &x->best_mv.as_mv;
+ int search_range = 3;
+
+ int tmp_col_min = x->mv_col_min;
+ int tmp_col_max = x->mv_col_max;
+ int tmp_row_min = x->mv_row_min;
+ int tmp_row_max = x->mv_row_max;
+ int id = ite % 2; // Even iterations search in the first reference frame,
+ // odd iterations search in the second. The predictor
+ // found for the 'other' reference frame is factored in.
+
+ // Initialized here because of compiler problem in Visual Studio.
+ ref_yv12[0] = xd->plane[0].pre[0];
+ ref_yv12[1] = xd->plane[0].pre[1];
+
+#if CONFIG_DUAL_FILTER
+ // reload the filter types
+ interp_filter[0] =
+ (id == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0];
+ interp_filter[1] =
+ (id == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1];
+#endif
+
+// Get the prediction block from the 'other' reference frame.
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
+ vp10_highbd_build_inter_predictor(
+ ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
+ &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, interp_filter,
+ MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd);
+ } else {
+ second_pred = (uint8_t *)second_pred_alloc_16;
+ vp10_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
+ second_pred, pw, &frame_mv[refs[!id]].as_mv,
+ &sf, pw, ph, 0, interp_filter, MV_PRECISION_Q3,
+ mi_col * MI_SIZE, mi_row * MI_SIZE);
+ }
+#else
+ vp10_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
+ second_pred, pw, &frame_mv[refs[!id]].as_mv, &sf,
+ pw, ph, 0, interp_filter, MV_PRECISION_Q3,
+ mi_col * MI_SIZE, mi_row * MI_SIZE);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // Do compound motion search on the current reference frame.
+ if (id) xd->plane[0].pre[0] = ref_yv12[id];
+ vp10_set_mv_search_range(x, &ref_mv[id].as_mv);
+
+ // Use the mv result from the single mode as mv predictor.
+ *best_mv = frame_mv[refs[id]].as_mv;
+
+ best_mv->col >>= 3;
+ best_mv->row >>= 3;
+
+#if CONFIG_REF_MV
+ vp10_set_mvcost(x, refs[id]);
+#endif
+
+ // Small-range full-pixel motion search.
+ bestsme =
+ vp10_refining_search_8p_c(x, sadpb, search_range, &cpi->fn_ptr[bsize],
+ &ref_mv[id].as_mv, second_pred);
+ if (bestsme < INT_MAX)
+ bestsme = vp10_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv,
+ second_pred, &cpi->fn_ptr[bsize], 1);
+
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+
+ if (bestsme < INT_MAX) {
+ int dis; /* TODO: use dis in distortion calculation later. */
+ unsigned int sse;
+ if (cpi->sf.use_upsampled_references) {
+ // Use up-sampled reference frames.
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ struct buf_2d backup_pred = pd->pre[0];
+ const YV12_BUFFER_CONFIG *upsampled_ref =
+ get_upsampled_ref(cpi, refs[id]);
+
+ // Set pred for Y plane
+ setup_pred_plane(&pd->pre[0], upsampled_ref->y_buffer,
+ upsampled_ref->y_crop_width,
+ upsampled_ref->y_crop_height, upsampled_ref->y_stride,
+ (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
+ pd->subsampling_y);
+
+ // If bsize < BLOCK_8X8, adjust pred pointer for this block
+ if (bsize < BLOCK_8X8)
+ pd->pre[0].buf =
+ &pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, block,
+ pd->pre[0].stride))
+ << 3];
+
+ bestsme = cpi->find_fractional_mv_step(
+ x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
+ x->errorperbit, &cpi->fn_ptr[bsize], 0,
+ cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
+ &dis, &sse, second_pred, pw, ph, 1);
+
+ // Restore the reference frames.
+ pd->pre[0] = backup_pred;
+ } else {
+ (void)block;
+ bestsme = cpi->find_fractional_mv_step(
+ x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
+ x->errorperbit, &cpi->fn_ptr[bsize], 0,
+ cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
+ &dis, &sse, second_pred, pw, ph, 0);
+ }
+ }
+
+ // Restore the pointer to the first (possibly scaled) prediction buffer.
+ if (id) xd->plane[0].pre[0] = ref_yv12[0];
+
+ if (bestsme < last_besterr[id]) {
+ frame_mv[refs[id]].as_mv = *best_mv;
+ last_besterr[id] = bestsme;
+ } else {
+ break;
+ }
+ }
+
+ *rate_mv = 0;
+
+ for (ref = 0; ref < 2; ++ref) {
+ if (scaled_ref_frame[ref]) {
+ // Restore the prediction frame pointers to their unscaled versions.
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[ref] = backup_yv12[ref][i];
+ }
+#if CONFIG_REF_MV
+ vp10_set_mvcost(x, refs[ref]);
+#endif
+#if CONFIG_EXT_INTER
+ if (bsize >= BLOCK_8X8)
+#endif // CONFIG_EXT_INTER
+ *rate_mv += vp10_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+#if CONFIG_EXT_INTER
+ else
+ *rate_mv += vp10_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
+ &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost,
+ x->mvcost, MV_COST_WEIGHT);
+#endif // CONFIG_EXT_INTER
+ }
+}
+
+static int64_t rd_pick_best_sub8x8_mode(
+ VP10_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv,
+ int_mv *second_best_ref_mv, int64_t best_rd, int *returntotrate,
+ int *returnyrate, int64_t *returndistortion, int *skippable, int64_t *psse,
+ int mvthresh,
+#if CONFIG_EXT_INTER
+ int_mv seg_mvs[4][2][TOTAL_REFS_PER_FRAME],
+ int_mv compound_seg_newmvs[4][2],
+#else
+ int_mv seg_mvs[4][TOTAL_REFS_PER_FRAME],
+#endif // CONFIG_EXT_INTER
+ BEST_SEG_INFO *bsi_buf, int filter_idx, int mi_row, int mi_col) {
+ BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
+#if CONFIG_REF_MV
+ int_mv tmp_ref_mv[2];
+#endif
+ MACROBLOCKD *xd = &x->e_mbd;
+ MODE_INFO *mi = xd->mi[0];
+ MB_MODE_INFO *mbmi = &mi->mbmi;
+ int mode_idx;
+ int k, br = 0, idx, idy;
+ int64_t bd = 0, block_sse = 0;
+ PREDICTION_MODE this_mode;
+ VP10_COMMON *cm = &cpi->common;
+ struct macroblock_plane *const p = &x->plane[0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ const int label_count = 4;
+ int64_t this_segment_rd = 0;
+ int label_mv_thresh;
+ int segmentyrate = 0;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
+ const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ ENTROPY_CONTEXT t_above[2], t_left[2];
+ int subpelmv = 1, have_ref = 0;
+ const int has_second_rf = has_second_ref(mbmi);
+ const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
+ MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+#if CONFIG_EXT_TX && CONFIG_RECT_TX
+ mbmi->tx_size =
+ xd->lossless[mbmi->segment_id] ? TX_4X4 : max_txsize_rect_lookup[bsize];
+#else
+ mbmi->tx_size = TX_4X4;
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+
+ vp10_zero(*bsi);
+
+ bsi->segment_rd = best_rd;
+ bsi->ref_mv[0] = best_ref_mv;
+ bsi->ref_mv[1] = second_best_ref_mv;
+ bsi->mvp.as_int = best_ref_mv->as_int;
+ bsi->mvthresh = mvthresh;
+
+ for (idx = 0; idx < 4; ++idx) bsi->modes[idx] = ZEROMV;
+
+#if CONFIG_REFMV
+ for (idx = 0; idx < 4; ++idx) {
+ for (k = NEARESTMV; k <= NEWMV; ++k) {
+ bsi->rdstat[idx][INTER_OFFSET(k)].pred_mv[0].as_int = INVALID_MV;
+ bsi->rdstat[idx][INTER_OFFSET(k)].pred_mv[1].as_int = INVALID_MV;
+
+ bsi->rdstat[idx][INTER_OFFSET(k)].mvs[0].as_int = INVALID_MV;
+ bsi->rdstat[idx][INTER_OFFSET(k)].mvs[1].as_int = INVALID_MV;
+ }
+ }
+#endif
+
+ memcpy(t_above, pd->above_context, sizeof(t_above));
+ memcpy(t_left, pd->left_context, sizeof(t_left));
+
+ // 64 makes this threshold really big effectively
+ // making it so that we very rarely check mvs on
+ // segments. setting this to 1 would make mv thresh
+ // roughly equal to what it is for macroblocks
+ label_mv_thresh = 1 * bsi->mvthresh / label_count;
+
+ // Segmentation method overheads
+ for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
+ for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
+ // TODO(jingning,rbultje): rewrite the rate-distortion optimization
+ // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
+ int_mv mode_mv[MB_MODE_COUNT][2];
+ int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
+ PREDICTION_MODE mode_selected = ZEROMV;
+ int64_t best_rd = INT64_MAX;
+ const int i = idy * 2 + idx;
+ int ref;
+#if CONFIG_REF_MV
+ CANDIDATE_MV ref_mv_stack[2][MAX_REF_MV_STACK_SIZE];
+ uint8_t ref_mv_count[2];
+#endif
+#if CONFIG_EXT_INTER
+ int mv_idx;
+ int_mv ref_mvs_sub8x8[2][2];
+#endif // CONFIG_EXT_INTER
+
+ for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+ const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+#if CONFIG_EXT_INTER
+ int_mv mv_ref_list[MAX_MV_REF_CANDIDATES];
+ vp10_update_mv_context(xd, mi, frame, mv_ref_list, i, mi_row, mi_col,
+ NULL);
+#endif // CONFIG_EXT_INTER
+ frame_mv[ZEROMV][frame].as_int = 0;
+ vp10_append_sub8x8_mvs_for_idx(cm, xd, i, ref, mi_row, mi_col,
+#if CONFIG_REF_MV
+ ref_mv_stack[ref], &ref_mv_count[ref],
+#endif
+#if CONFIG_EXT_INTER
+ mv_ref_list,
+#endif // CONFIG_EXT_INTER
+ &frame_mv[NEARESTMV][frame],
+ &frame_mv[NEARMV][frame]);
+
+#if CONFIG_REF_MV
+ tmp_ref_mv[ref] = frame_mv[NEARESTMV][mbmi->ref_frame[ref]];
+ lower_mv_precision(&tmp_ref_mv[ref].as_mv, cm->allow_high_precision_mv);
+ bsi->ref_mv[ref] = &tmp_ref_mv[ref];
+ mbmi_ext->ref_mvs[frame][0] = tmp_ref_mv[ref];
+#endif
+
+#if CONFIG_EXT_INTER
+ mv_ref_list[0].as_int = frame_mv[NEARESTMV][frame].as_int;
+ mv_ref_list[1].as_int = frame_mv[NEARMV][frame].as_int;
+ vp10_find_best_ref_mvs(cm->allow_high_precision_mv, mv_ref_list,
+ &ref_mvs_sub8x8[0][ref],
+ &ref_mvs_sub8x8[1][ref]);
+
+ if (has_second_rf) {
+ frame_mv[ZERO_ZEROMV][frame].as_int = 0;
+ frame_mv[NEAREST_NEARESTMV][frame].as_int =
+ frame_mv[NEARESTMV][frame].as_int;
+
+ if (ref == 0) {
+ frame_mv[NEAREST_NEARMV][frame].as_int =
+ frame_mv[NEARESTMV][frame].as_int;
+ frame_mv[NEAR_NEARESTMV][frame].as_int =
+ frame_mv[NEARMV][frame].as_int;
+ frame_mv[NEAREST_NEWMV][frame].as_int =
+ frame_mv[NEARESTMV][frame].as_int;
+ frame_mv[NEAR_NEWMV][frame].as_int = frame_mv[NEARMV][frame].as_int;
+ frame_mv[NEAR_NEARMV][frame].as_int =
+ frame_mv[NEARMV][frame].as_int;
+ } else if (ref == 1) {
+ frame_mv[NEAREST_NEARMV][frame].as_int =
+ frame_mv[NEARMV][frame].as_int;
+ frame_mv[NEAR_NEARESTMV][frame].as_int =
+ frame_mv[NEARESTMV][frame].as_int;
+ frame_mv[NEW_NEARESTMV][frame].as_int =
+ frame_mv[NEARESTMV][frame].as_int;
+ frame_mv[NEW_NEARMV][frame].as_int = frame_mv[NEARMV][frame].as_int;
+ frame_mv[NEAR_NEARMV][frame].as_int =
+ frame_mv[NEARMV][frame].as_int;
+ }
+ }
+#endif // CONFIG_EXT_INTER
+ }
+
+// search for the best motion vector on this segment
+#if CONFIG_EXT_INTER
+ for (this_mode = (has_second_rf ? NEAREST_NEARESTMV : NEARESTMV);
+ this_mode <= (has_second_rf ? NEW_NEWMV : NEWFROMNEARMV);
+ ++this_mode)
+#else
+ for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode)
+#endif // CONFIG_EXT_INTER
+ {
+ const struct buf_2d orig_src = x->plane[0].src;
+ struct buf_2d orig_pre[2];
+ // This flag controls if the motion estimation will kick off. When it
+ // is set to a non-zero value, the encoder will force motion estimation.
+ int run_mv_search = 0;
+
+ mode_idx = INTER_OFFSET(this_mode);
+#if CONFIG_EXT_INTER
+ mv_idx = (this_mode == NEWFROMNEARMV) ? 1 : 0;
+
+ for (ref = 0; ref < 1 + has_second_rf; ++ref)
+ bsi->ref_mv[ref]->as_int = ref_mvs_sub8x8[mv_idx][ref].as_int;
+#endif // CONFIG_EXT_INTER
+ bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
+ if (!(inter_mode_mask & (1 << this_mode))) continue;
+
+#if CONFIG_REF_MV
+ run_mv_search = 2;
+#if !CONFIG_EXT_INTER
+ if (filter_idx > 0 && this_mode == NEWMV) {
+ BEST_SEG_INFO *ref_bsi = bsi_buf;
+ SEG_RDSTAT *ref_rdstat = &ref_bsi->rdstat[i][mode_idx];
+
+ if (has_second_rf) {
+ if (seg_mvs[i][mbmi->ref_frame[0]].as_int ==
+ ref_rdstat->mvs[0].as_int &&
+ ref_rdstat->mvs[0].as_int != INVALID_MV)
+ if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int)
+ --run_mv_search;
+
+ if (seg_mvs[i][mbmi->ref_frame[1]].as_int ==
+ ref_rdstat->mvs[1].as_int &&
+ ref_rdstat->mvs[1].as_int != INVALID_MV)
+ if (bsi->ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int)
+ --run_mv_search;
+ } else {
+ if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int &&
+ ref_rdstat->mvs[0].as_int != INVALID_MV) {
+ run_mv_search = 0;
+ seg_mvs[i][mbmi->ref_frame[0]].as_int = ref_rdstat->mvs[0].as_int;
+ }
+ }
+
+ if (run_mv_search != 0 && filter_idx > 1) {
+ ref_bsi = bsi_buf + 1;
+ ref_rdstat = &ref_bsi->rdstat[i][mode_idx];
+ run_mv_search = 2;
+
+ if (has_second_rf) {
+ if (seg_mvs[i][mbmi->ref_frame[0]].as_int ==
+ ref_rdstat->mvs[0].as_int &&
+ ref_rdstat->mvs[0].as_int != INVALID_MV)
+ if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int)
+ --run_mv_search;
+
+ if (seg_mvs[i][mbmi->ref_frame[1]].as_int ==
+ ref_rdstat->mvs[1].as_int &&
+ ref_rdstat->mvs[1].as_int != INVALID_MV)
+ if (bsi->ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int)
+ --run_mv_search;
+ } else {
+ if (bsi->ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int &&
+ ref_rdstat->mvs[0].as_int != INVALID_MV) {
+ run_mv_search = 0;
+ seg_mvs[i][mbmi->ref_frame[0]].as_int =
+ ref_rdstat->mvs[0].as_int;
+ }
+ }
+ }
+ }
+#endif // CONFIG_EXT_INTER
+#endif // CONFIG_REF_MV
+
+ if (!check_best_zero_mv(cpi, mbmi_ext->mode_context,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ mbmi_ext->compound_mode_context,
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ frame_mv, this_mode, mbmi->ref_frame, bsize, i))
+ continue;
+
+ memcpy(orig_pre, pd->pre, sizeof(orig_pre));
+ memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
+ sizeof(bsi->rdstat[i][mode_idx].ta));
+ memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
+ sizeof(bsi->rdstat[i][mode_idx].tl));
+
+ // motion search for newmv (single predictor case only)
+ if (!has_second_rf &&
+#if CONFIG_EXT_INTER
+ have_newmv_in_inter_mode(this_mode) &&
+ (seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV ||
+ vp10_use_mv_hp(&bsi->ref_mv[0]->as_mv) == 0)
+#else
+ this_mode == NEWMV &&
+ (seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV ||
+ run_mv_search)
+#endif // CONFIG_EXT_INTER
+ ) {
+ int step_param = 0;
+ int bestsme = INT_MAX;
+ int sadpb = x->sadperbit4;
+ MV mvp_full;
+ int max_mv;
+ int cost_list[5];
+ int tmp_col_min = x->mv_col_min;
+ int tmp_col_max = x->mv_col_max;
+ int tmp_row_min = x->mv_row_min;
+ int tmp_row_max = x->mv_row_max;
+
+ /* Is the best so far sufficiently good that we cant justify doing
+ * and new motion search. */
+ if (best_rd < label_mv_thresh) break;
+
+ if (cpi->oxcf.mode != BEST) {
+#if CONFIG_EXT_INTER
+ bsi->mvp.as_int = bsi->ref_mv[0]->as_int;
+#else
+// use previous block's result as next block's MV predictor.
+#if !CONFIG_REF_MV
+ if (i > 0) {
+ bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
+ if (i == 2) bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
+ }
+#endif
+#endif // CONFIG_EXT_INTER
+ }
+ if (i == 0)
+ max_mv = x->max_mv_context[mbmi->ref_frame[0]];
+ else
+ max_mv =
+ VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
+
+ if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
+ // Take wtd average of the step_params based on the last frame's
+ // max mv magnitude and the best ref mvs of the current block for
+ // the given reference.
+ step_param =
+ (vp10_init_search_range(max_mv) + cpi->mv_step_param) / 2;
+ } else {
+ step_param = cpi->mv_step_param;
+ }
+
+#if CONFIG_REF_MV
+ mvp_full.row = bsi->ref_mv[0]->as_mv.row >> 3;
+ mvp_full.col = bsi->ref_mv[0]->as_mv.col >> 3;
+#else
+ mvp_full.row = bsi->mvp.as_mv.row >> 3;
+ mvp_full.col = bsi->mvp.as_mv.col >> 3;
+#endif
+
+ if (cpi->sf.adaptive_motion_search) {
+ mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
+ mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
+ step_param = VPXMAX(step_param, 8);
+ }
+
+ // adjust src pointer for this block
+ mi_buf_shift(x, i);
+
+ vp10_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
+
+ x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
+
+#if CONFIG_REF_MV
+ vp10_set_mvcost(x, mbmi->ref_frame[0]);
+#endif
+ bestsme = vp10_full_pixel_search(
+ cpi, x, bsize, &mvp_full, step_param, sadpb,
+ cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
+ &bsi->ref_mv[0]->as_mv, INT_MAX, 1);
+
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+
+ if (bestsme < INT_MAX) {
+ int distortion;
+ if (cpi->sf.use_upsampled_references) {
+ int best_mv_var;
+ const int try_second =
+ x->second_best_mv.as_int != INVALID_MV &&
+ x->second_best_mv.as_int != x->best_mv.as_int;
+ const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
+ // Use up-sampled reference frames.
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ struct buf_2d backup_pred = pd->pre[0];
+ const YV12_BUFFER_CONFIG *upsampled_ref =
+ get_upsampled_ref(cpi, mbmi->ref_frame[0]);
+
+ // Set pred for Y plane
+ setup_pred_plane(
+ &pd->pre[0], upsampled_ref->y_buffer,
+ upsampled_ref->y_crop_width, upsampled_ref->y_crop_height,
+ upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), NULL,
+ pd->subsampling_x, pd->subsampling_y);
+
+ // adjust pred pointer for this block
+ pd->pre[0].buf =
+ &pd->pre[0].buf[(vp10_raster_block_offset(BLOCK_8X8, i,
+ pd->pre[0].stride))
+ << 3];
+
+ best_mv_var = cpi->find_fractional_mv_step(
+ x, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv,
+ x->errorperbit, &cpi->fn_ptr[bsize],
+ cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step,
+ cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
+ &distortion, &x->pred_sse[mbmi->ref_frame[0]], NULL, pw, ph,
+ 1);
+
+ if (try_second) {
+ int this_var;
+ MV best_mv = x->best_mv.as_mv;
+ const MV ref_mv = bsi->ref_mv[0]->as_mv;
+ const int minc = VPXMAX(x->mv_col_min * 8, ref_mv.col - MV_MAX);
+ const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv.col + MV_MAX);
+ const int minr = VPXMAX(x->mv_row_min * 8, ref_mv.row - MV_MAX);
+ const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv.row + MV_MAX);
+
+ x->best_mv = x->second_best_mv;
+ if (x->best_mv.as_mv.row * 8 <= maxr &&
+ x->best_mv.as_mv.row * 8 >= minr &&
+ x->best_mv.as_mv.col * 8 <= maxc &&
+ x->best_mv.as_mv.col * 8 >= minc) {
+ this_var = cpi->find_fractional_mv_step(
+ x, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv,
+ x->errorperbit, &cpi->fn_ptr[bsize],
+ cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step,
+ cond_cost_list(cpi, cost_list), x->nmvjointcost,
+ x->mvcost, &distortion, &x->pred_sse[mbmi->ref_frame[0]],
+ NULL, pw, ph, 1);
+ if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
+ x->best_mv.as_mv = best_mv;
+ }
+ }
+
+ // Restore the reference frames.
+ pd->pre[0] = backup_pred;
+ } else {
+ cpi->find_fractional_mv_step(
+ x, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv,
+ x->errorperbit, &cpi->fn_ptr[bsize],
+ cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step,
+ cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
+ &distortion, &x->pred_sse[mbmi->ref_frame[0]], NULL, 0, 0, 0);
+ }
+
+// save motion search result for use in compound prediction
+#if CONFIG_EXT_INTER
+ seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
+#else
+ seg_mvs[i][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv;
+#endif // CONFIG_EXT_INTER
+ }
+
+ if (cpi->sf.adaptive_motion_search)
+ x->pred_mv[mbmi->ref_frame[0]] = x->best_mv.as_mv;
+
+#if CONFIG_EXT_INTER
+ mode_mv[this_mode][0] = x->best_mv;
+#else
+ mode_mv[NEWMV][0] = x->best_mv;
+#endif // CONFIG_EXT_INTER
+
+ // restore src pointers
+ mi_buf_restore(x, orig_src, orig_pre);
+ }
+
+ if (has_second_rf) {
+#if CONFIG_EXT_INTER
+ if (seg_mvs[i][mv_idx][mbmi->ref_frame[1]].as_int == INVALID_MV ||
+ seg_mvs[i][mv_idx][mbmi->ref_frame[0]].as_int == INVALID_MV)
+#else
+ if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
+ seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
+#endif // CONFIG_EXT_INTER
+ continue;
+ }
+
+#if CONFIG_DUAL_FILTER
+ (void)run_mv_search;
+#endif
+
+ if (has_second_rf &&
+#if CONFIG_EXT_INTER
+ this_mode == NEW_NEWMV &&
+#else
+ this_mode == NEWMV &&
+#endif // CONFIG_EXT_INTER
+#if CONFIG_DUAL_FILTER
+ (mbmi->interp_filter[0] == EIGHTTAP_REGULAR || run_mv_search))
+#else
+ (mbmi->interp_filter == EIGHTTAP_REGULAR || run_mv_search))
+#endif
+ {
+ // adjust src pointers
+ mi_buf_shift(x, i);
+ if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
+ int rate_mv;
+ joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row,
+ mi_col,
+#if CONFIG_EXT_INTER
+ bsi->ref_mv, seg_mvs[i][mv_idx],
+#else
+ seg_mvs[i],
+#endif // CONFIG_EXT_INTER
+ &rate_mv, i);
+#if CONFIG_EXT_INTER
+ compound_seg_newmvs[i][0].as_int =
+ frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
+ compound_seg_newmvs[i][1].as_int =
+ frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
+#else
+ seg_mvs[i][mbmi->ref_frame[0]].as_int =
+ frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
+ seg_mvs[i][mbmi->ref_frame[1]].as_int =
+ frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
+#endif // CONFIG_EXT_INTER
+ }
+ // restore src pointers
+ mi_buf_restore(x, orig_src, orig_pre);
+ }
+
+ bsi->rdstat[i][mode_idx].brate = set_and_cost_bmi_mvs(
+ cpi, x, xd, i, this_mode, mode_mv[this_mode], frame_mv,
+#if CONFIG_EXT_INTER
+ seg_mvs[i][mv_idx], compound_seg_newmvs[i],
+#else
+ seg_mvs[i],
+#endif // CONFIG_EXT_INTER
+ bsi->ref_mv, x->nmvjointcost, x->mvcost);
+
+ for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+ bsi->rdstat[i][mode_idx].mvs[ref].as_int =
+ mode_mv[this_mode][ref].as_int;
+ if (num_4x4_blocks_wide > 1)
+ bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
+ mode_mv[this_mode][ref].as_int;
+ if (num_4x4_blocks_high > 1)
+ bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
+ mode_mv[this_mode][ref].as_int;
+#if CONFIG_REF_MV
+ bsi->rdstat[i][mode_idx].pred_mv[ref].as_int =
+ mi->bmi[i].pred_mv_s8[ref].as_int;
+ if (num_4x4_blocks_wide > 1)
+ bsi->rdstat[i + 1][mode_idx].pred_mv[ref].as_int =
+ mi->bmi[i].pred_mv_s8[ref].as_int;
+ if (num_4x4_blocks_high > 1)
+ bsi->rdstat[i + 2][mode_idx].pred_mv[ref].as_int =
+ mi->bmi[i].pred_mv_s8[ref].as_int;
+#endif
+#if CONFIG_EXT_INTER
+ bsi->rdstat[i][mode_idx].ref_mv[ref].as_int =
+ bsi->ref_mv[ref]->as_int;
+ if (num_4x4_blocks_wide > 1)
+ bsi->rdstat[i + 1][mode_idx].ref_mv[ref].as_int =
+ bsi->ref_mv[ref]->as_int;
+ if (num_4x4_blocks_high > 1)
+ bsi->rdstat[i + 2][mode_idx].ref_mv[ref].as_int =
+ bsi->ref_mv[ref]->as_int;
+#endif // CONFIG_EXT_INTER
+ }
+
+ // Trap vectors that reach beyond the UMV borders
+ if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
+ (has_second_rf && mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
+ continue;
+
+ if (filter_idx > 0) {
+ BEST_SEG_INFO *ref_bsi = bsi_buf;
+ subpelmv = 0;
+ have_ref = 1;
+
+ for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+ subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
+#if CONFIG_EXT_INTER
+ if (have_newmv_in_inter_mode(this_mode))
+ have_ref &= ((mode_mv[this_mode][ref].as_int ==
+ ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int) &&
+ (bsi->ref_mv[ref]->as_int ==
+ ref_bsi->rdstat[i][mode_idx].ref_mv[ref].as_int));
+ else
+#endif // CONFIG_EXT_INTER
+ have_ref &= mode_mv[this_mode][ref].as_int ==
+ ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
+ }
+
+ have_ref &= ref_bsi->rdstat[i][mode_idx].brate > 0;
+
+ if (filter_idx > 1 && !subpelmv && !have_ref) {
+ ref_bsi = bsi_buf + 1;
+ have_ref = 1;
+ for (ref = 0; ref < 1 + has_second_rf; ++ref)
+#if CONFIG_EXT_INTER
+ if (have_newmv_in_inter_mode(this_mode))
+ have_ref &= ((mode_mv[this_mode][ref].as_int ==
+ ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int) &&
+ (bsi->ref_mv[ref]->as_int ==
+ ref_bsi->rdstat[i][mode_idx].ref_mv[ref].as_int));
+ else
+#endif // CONFIG_EXT_INTER
+ have_ref &= mode_mv[this_mode][ref].as_int ==
+ ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
+
+ have_ref &= ref_bsi->rdstat[i][mode_idx].brate > 0;
+ }
+
+ if (!subpelmv && have_ref &&
+ ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
+#if CONFIG_REF_MV
+ bsi->rdstat[i][mode_idx].byrate =
+ ref_bsi->rdstat[i][mode_idx].byrate;
+ bsi->rdstat[i][mode_idx].bdist = ref_bsi->rdstat[i][mode_idx].bdist;
+ bsi->rdstat[i][mode_idx].bsse = ref_bsi->rdstat[i][mode_idx].bsse;
+ bsi->rdstat[i][mode_idx].brate +=
+ ref_bsi->rdstat[i][mode_idx].byrate;
+ bsi->rdstat[i][mode_idx].eobs = ref_bsi->rdstat[i][mode_idx].eobs;
+
+ bsi->rdstat[i][mode_idx].brdcost =
+ RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate,
+ bsi->rdstat[i][mode_idx].bdist);
+
+ memcpy(bsi->rdstat[i][mode_idx].ta, ref_bsi->rdstat[i][mode_idx].ta,
+ sizeof(bsi->rdstat[i][mode_idx].ta));
+ memcpy(bsi->rdstat[i][mode_idx].tl, ref_bsi->rdstat[i][mode_idx].tl,
+ sizeof(bsi->rdstat[i][mode_idx].tl));
+#else
+ memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
+ sizeof(SEG_RDSTAT));
+#endif
+ if (num_4x4_blocks_wide > 1)
+ bsi->rdstat[i + 1][mode_idx].eobs =
+ ref_bsi->rdstat[i + 1][mode_idx].eobs;
+ if (num_4x4_blocks_high > 1)
+ bsi->rdstat[i + 2][mode_idx].eobs =
+ ref_bsi->rdstat[i + 2][mode_idx].eobs;
+
+ if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
+#if CONFIG_REF_MV
+ // If the NEWMV mode is using the same motion vector as the
+ // NEARESTMV mode, skip the rest rate-distortion calculations
+ // and use the inferred motion vector modes.
+ if (this_mode == NEWMV) {
+ if (has_second_rf) {
+ if (bsi->rdstat[i][mode_idx].mvs[0].as_int ==
+ bsi->ref_mv[0]->as_int &&
+ bsi->rdstat[i][mode_idx].mvs[1].as_int ==
+ bsi->ref_mv[1]->as_int)
+ continue;
+ } else {
+ if (bsi->rdstat[i][mode_idx].mvs[0].as_int ==
+ bsi->ref_mv[0]->as_int)
+ continue;
+ }
+ }
+#endif
+ mode_selected = this_mode;
+ best_rd = bsi->rdstat[i][mode_idx].brdcost;
+ }
+ continue;
+ }
+ }
+
+ bsi->rdstat[i][mode_idx].brdcost = encode_inter_mb_segment(
+ cpi, x, bsi->segment_rd - this_segment_rd, i,
+ &bsi->rdstat[i][mode_idx].byrate, &bsi->rdstat[i][mode_idx].bdist,
+ &bsi->rdstat[i][mode_idx].bsse, bsi->rdstat[i][mode_idx].ta,
+ bsi->rdstat[i][mode_idx].tl, idy, idx, mi_row, mi_col);
+
+ if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
+ bsi->rdstat[i][mode_idx].brdcost +=
+ RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate, 0);
+ bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
+ bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
+ if (num_4x4_blocks_wide > 1)
+ bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
+ if (num_4x4_blocks_high > 1)
+ bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
+ }
+
+ if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
+#if CONFIG_REF_MV
+ // If the NEWMV mode is using the same motion vector as the
+ // NEARESTMV mode, skip the rest rate-distortion calculations
+ // and use the inferred motion vector modes.
+ if (this_mode == NEWMV) {
+ if (has_second_rf) {
+ if (bsi->rdstat[i][mode_idx].mvs[0].as_int ==
+ bsi->ref_mv[0]->as_int &&
+ bsi->rdstat[i][mode_idx].mvs[1].as_int ==
+ bsi->ref_mv[1]->as_int)
+ continue;
+ } else {
+ if (bsi->rdstat[i][mode_idx].mvs[0].as_int ==
+ bsi->ref_mv[0]->as_int)
+ continue;
+ }
+ }
+#endif
+ mode_selected = this_mode;
+ best_rd = bsi->rdstat[i][mode_idx].brdcost;
+ }
+ } /*for each 4x4 mode*/
+
+ if (best_rd == INT64_MAX) {
+ int iy, midx;
+ for (iy = i + 1; iy < 4; ++iy)
+#if CONFIG_EXT_INTER
+ for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx)
+#else
+ for (midx = 0; midx < INTER_MODES; ++midx)
+#endif // CONFIG_EXT_INTER
+ bsi->rdstat[iy][midx].brdcost = INT64_MAX;
+ bsi->segment_rd = INT64_MAX;
+ return INT64_MAX;
+ }
+
+ mode_idx = INTER_OFFSET(mode_selected);
+ memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
+ memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
+
+#if CONFIG_EXT_INTER
+ mv_idx = (mode_selected == NEWFROMNEARMV) ? 1 : 0;
+ bsi->ref_mv[0]->as_int = bsi->rdstat[i][mode_idx].ref_mv[0].as_int;
+ if (has_second_rf)
+ bsi->ref_mv[1]->as_int = bsi->rdstat[i][mode_idx].ref_mv[1].as_int;
+#endif // CONFIG_EXT_INTER
+ set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
+ frame_mv,
+#if CONFIG_EXT_INTER
+ seg_mvs[i][mv_idx], compound_seg_newmvs[i],
+#else
+ seg_mvs[i],
+#endif // CONFIG_EXT_INTER
+ bsi->ref_mv, x->nmvjointcost, x->mvcost);
+
+ br += bsi->rdstat[i][mode_idx].brate;
+ bd += bsi->rdstat[i][mode_idx].bdist;
+ block_sse += bsi->rdstat[i][mode_idx].bsse;
+ segmentyrate += bsi->rdstat[i][mode_idx].byrate;
+ this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
+
+ if (this_segment_rd > bsi->segment_rd) {
+ int iy, midx;
+ for (iy = i + 1; iy < 4; ++iy)
+#if CONFIG_EXT_INTER
+ for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx)
+#else
+ for (midx = 0; midx < INTER_MODES; ++midx)
+#endif // CONFIG_EXT_INTER
+ bsi->rdstat[iy][midx].brdcost = INT64_MAX;
+ bsi->segment_rd = INT64_MAX;
+ return INT64_MAX;
+ }
+ }
+ } /* for each label */
+
+ bsi->r = br;
+ bsi->d = bd;
+ bsi->segment_yrate = segmentyrate;
+ bsi->segment_rd = this_segment_rd;
+ bsi->sse = block_sse;
+
+ // update the coding decisions
+ for (k = 0; k < 4; ++k) bsi->modes[k] = mi->bmi[k].as_mode;
+
+ if (bsi->segment_rd > best_rd) return INT64_MAX;
+ /* set it to the best */
+ for (idx = 0; idx < 4; idx++) {
+ mode_idx = INTER_OFFSET(bsi->modes[idx]);
+ mi->bmi[idx].as_mv[0].as_int = bsi->rdstat[idx][mode_idx].mvs[0].as_int;
+ if (has_second_ref(mbmi))
+ mi->bmi[idx].as_mv[1].as_int = bsi->rdstat[idx][mode_idx].mvs[1].as_int;
+#if CONFIG_REF_MV
+ mi->bmi[idx].pred_mv_s8[0] = bsi->rdstat[idx][mode_idx].pred_mv[0];
+ if (has_second_ref(mbmi))
+ mi->bmi[idx].pred_mv_s8[1] = bsi->rdstat[idx][mode_idx].pred_mv[1];
+#endif
+#if CONFIG_EXT_INTER
+ mi->bmi[idx].ref_mv[0].as_int = bsi->rdstat[idx][mode_idx].ref_mv[0].as_int;
+ if (has_second_rf)
+ mi->bmi[idx].ref_mv[1].as_int =
+ bsi->rdstat[idx][mode_idx].ref_mv[1].as_int;
+#endif // CONFIG_EXT_INTER
+ x->plane[0].eobs[idx] = bsi->rdstat[idx][mode_idx].eobs;
+ mi->bmi[idx].as_mode = bsi->modes[idx];
+ }
+
+ /*
+ * used to set mbmi->mv.as_int
+ */
+ *returntotrate = bsi->r;
+ *returndistortion = bsi->d;
+ *returnyrate = bsi->segment_yrate;
+ *skippable = vp10_is_skippable_in_plane(x, BLOCK_8X8, 0);
+ *psse = bsi->sse;
+ mbmi->mode = bsi->modes[3];
+
+ return bsi->segment_rd;
+}
+
+static void estimate_ref_frame_costs(const VP10_COMMON *cm,
+ const MACROBLOCKD *xd, int segment_id,
+ unsigned int *ref_costs_single,
+ unsigned int *ref_costs_comp,
+ vpx_prob *comp_mode_p) {
+ int seg_ref_active =
+ segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
+ if (seg_ref_active) {
+ memset(ref_costs_single, 0,
+ TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_single));
+ memset(ref_costs_comp, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_comp));
+ *comp_mode_p = 128;
+ } else {
+ vpx_prob intra_inter_p = vp10_get_intra_inter_prob(cm, xd);
+ vpx_prob comp_inter_p = 128;
+
+ if (cm->reference_mode == REFERENCE_MODE_SELECT) {
+ comp_inter_p = vp10_get_reference_mode_prob(cm, xd);
+ *comp_mode_p = comp_inter_p;
+ } else {
+ *comp_mode_p = 128;
+ }
+
+ ref_costs_single[INTRA_FRAME] = vp10_cost_bit(intra_inter_p, 0);
+
+ if (cm->reference_mode != COMPOUND_REFERENCE) {
+ vpx_prob ref_single_p1 = vp10_get_pred_prob_single_ref_p1(cm, xd);
+ vpx_prob ref_single_p2 = vp10_get_pred_prob_single_ref_p2(cm, xd);
+#if CONFIG_EXT_REFS
+ vpx_prob ref_single_p3 = vp10_get_pred_prob_single_ref_p3(cm, xd);
+ vpx_prob ref_single_p4 = vp10_get_pred_prob_single_ref_p4(cm, xd);
+ vpx_prob ref_single_p5 = vp10_get_pred_prob_single_ref_p5(cm, xd);
+#endif // CONFIG_EXT_REFS
+
+ unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
+
+ ref_costs_single[LAST_FRAME] =
+#if CONFIG_EXT_REFS
+ ref_costs_single[LAST2_FRAME] = ref_costs_single[LAST3_FRAME] =
+ ref_costs_single[BWDREF_FRAME] =
+#endif // CONFIG_EXT_REFS
+ ref_costs_single[GOLDEN_FRAME] =
+ ref_costs_single[ALTREF_FRAME] = base_cost;
+
+#if CONFIG_EXT_REFS
+ ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p1, 0);
+ ref_costs_single[LAST2_FRAME] += vp10_cost_bit(ref_single_p1, 0);
+ ref_costs_single[LAST3_FRAME] += vp10_cost_bit(ref_single_p1, 0);
+ ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p1, 0);
+ ref_costs_single[BWDREF_FRAME] += vp10_cost_bit(ref_single_p1, 1);
+ ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p1, 1);
+
+ ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p3, 0);
+ ref_costs_single[LAST2_FRAME] += vp10_cost_bit(ref_single_p3, 0);
+ ref_costs_single[LAST3_FRAME] += vp10_cost_bit(ref_single_p3, 1);
+ ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p3, 1);
+
+ ref_costs_single[BWDREF_FRAME] += vp10_cost_bit(ref_single_p2, 0);
+ ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p2, 1);
+
+ ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p4, 0);
+ ref_costs_single[LAST2_FRAME] += vp10_cost_bit(ref_single_p4, 1);
+
+ ref_costs_single[LAST3_FRAME] += vp10_cost_bit(ref_single_p5, 0);
+ ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p5, 1);
+#else
+ ref_costs_single[LAST_FRAME] += vp10_cost_bit(ref_single_p1, 0);
+ ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p1, 1);
+ ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p1, 1);
+
+ ref_costs_single[GOLDEN_FRAME] += vp10_cost_bit(ref_single_p2, 0);
+ ref_costs_single[ALTREF_FRAME] += vp10_cost_bit(ref_single_p2, 1);
+#endif // CONFIG_EXT_REFS
+ } else {
+ ref_costs_single[LAST_FRAME] = 512;
+#if CONFIG_EXT_REFS
+ ref_costs_single[LAST2_FRAME] = 512;
+ ref_costs_single[LAST3_FRAME] = 512;
+ ref_costs_single[BWDREF_FRAME] = 512;
+#endif // CONFIG_EXT_REFS
+ ref_costs_single[GOLDEN_FRAME] = 512;
+ ref_costs_single[ALTREF_FRAME] = 512;
+ }
+
+ if (cm->reference_mode != SINGLE_REFERENCE) {
+ vpx_prob ref_comp_p = vp10_get_pred_prob_comp_ref_p(cm, xd);
+#if CONFIG_EXT_REFS
+ vpx_prob ref_comp_p1 = vp10_get_pred_prob_comp_ref_p1(cm, xd);
+ vpx_prob ref_comp_p2 = vp10_get_pred_prob_comp_ref_p2(cm, xd);
+ vpx_prob bwdref_comp_p = vp10_get_pred_prob_comp_bwdref_p(cm, xd);
+#endif // CONFIG_EXT_REFS
+
+ unsigned int base_cost = vp10_cost_bit(intra_inter_p, 1);
+
+ ref_costs_comp[LAST_FRAME] =
+#if CONFIG_EXT_REFS
+ ref_costs_comp[LAST2_FRAME] = ref_costs_comp[LAST3_FRAME] =
+#endif // CONFIG_EXT_REFS
+ ref_costs_comp[GOLDEN_FRAME] = base_cost;
+
+#if CONFIG_EXT_REFS
+ ref_costs_comp[BWDREF_FRAME] = ref_costs_comp[ALTREF_FRAME] = 0;
+#endif // CONFIG_EXT_REFS
+
+#if CONFIG_EXT_REFS
+ ref_costs_comp[LAST_FRAME] += vp10_cost_bit(ref_comp_p, 0);
+ ref_costs_comp[LAST2_FRAME] += vp10_cost_bit(ref_comp_p, 0);
+ ref_costs_comp[LAST3_FRAME] += vp10_cost_bit(ref_comp_p, 1);
+ ref_costs_comp[GOLDEN_FRAME] += vp10_cost_bit(ref_comp_p, 1);
+
+ ref_costs_comp[LAST_FRAME] += vp10_cost_bit(ref_comp_p1, 1);
+ ref_costs_comp[LAST2_FRAME] += vp10_cost_bit(ref_comp_p1, 0);
+
+ ref_costs_comp[LAST3_FRAME] += vp10_cost_bit(ref_comp_p2, 0);
+ ref_costs_comp[GOLDEN_FRAME] += vp10_cost_bit(ref_comp_p2, 1);
+
+ // NOTE(zoeliu): BWDREF and ALTREF each add an extra cost by coding 1
+ // more bit.
+ ref_costs_comp[BWDREF_FRAME] += vp10_cost_bit(bwdref_comp_p, 0);
+ ref_costs_comp[ALTREF_FRAME] += vp10_cost_bit(bwdref_comp_p, 1);
+#else
+ ref_costs_comp[LAST_FRAME] += vp10_cost_bit(ref_comp_p, 0);
+ ref_costs_comp[GOLDEN_FRAME] += vp10_cost_bit(ref_comp_p, 1);
+#endif // CONFIG_EXT_REFS
+ } else {
+ ref_costs_comp[LAST_FRAME] = 512;
+#if CONFIG_EXT_REFS
+ ref_costs_comp[LAST2_FRAME] = 512;
+ ref_costs_comp[LAST3_FRAME] = 512;
+ ref_costs_comp[BWDREF_FRAME] = 512;
+ ref_costs_comp[ALTREF_FRAME] = 512;
+#endif // CONFIG_EXT_REFS
+ ref_costs_comp[GOLDEN_FRAME] = 512;
+ }
+ }
+}
+
+static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
+ int mode_index,
+ int64_t comp_pred_diff[REFERENCE_MODES],
+ int skippable) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+
+ // Take a snapshot of the coding context so it can be
+ // restored if we decide to encode this way
+ ctx->skip = x->skip;
+ ctx->skippable = skippable;
+ ctx->best_mode_index = mode_index;
+ ctx->mic = *xd->mi[0];
+ ctx->mbmi_ext = *x->mbmi_ext;
+ ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
+ ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
+ ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
+}
+
+static void setup_buffer_inter(
+ VP10_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
+ BLOCK_SIZE block_size, int mi_row, int mi_col,
+ int_mv frame_nearest_mv[TOTAL_REFS_PER_FRAME],
+ int_mv frame_near_mv[TOTAL_REFS_PER_FRAME],
+ struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]) {
+ const VP10_COMMON *cm = &cpi->common;
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mi = xd->mi[0];
+ int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
+ const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
+ MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+
+ assert(yv12 != NULL);
+
+ // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
+ // use the UV scaling factors.
+ vp10_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
+
+ // Gets an initial list of candidate vectors from neighbours and orders them
+ vp10_find_mv_refs(
+ cm, xd, mi, ref_frame,
+#if CONFIG_REF_MV
+ &mbmi_ext->ref_mv_count[ref_frame], mbmi_ext->ref_mv_stack[ref_frame],
+#if CONFIG_EXT_INTER
+ mbmi_ext->compound_mode_context,
+#endif // CONFIG_EXT_INTER
+#endif
+ candidates, mi_row, mi_col, NULL, NULL, mbmi_ext->mode_context);
+
+ // Candidate refinement carried out at encoder and decoder
+ vp10_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
+ &frame_nearest_mv[ref_frame],
+ &frame_near_mv[ref_frame]);
+
+ // Further refinement that is encode side only to test the top few candidates
+ // in full and choose the best as the centre point for subsequent searches.
+ // The current implementation doesn't support scaling.
+ if (!vp10_is_scaled(sf) && block_size >= BLOCK_8X8)
+ vp10_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
+ block_size);
+}
+
+static void single_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+#if CONFIG_EXT_INTER
+ int ref_idx, int mv_idx,
+#endif // CONFIG_EXT_INTER
+ int *rate_mv) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ const VP10_COMMON *cm = &cpi->common;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
+ int bestsme = INT_MAX;
+ int step_param;
+ int sadpb = x->sadperbit16;
+ MV mvp_full;
+#if CONFIG_EXT_INTER
+ int ref = mbmi->ref_frame[ref_idx];
+ MV ref_mv = x->mbmi_ext->ref_mvs[ref][mv_idx].as_mv;
+#else
+ int ref = mbmi->ref_frame[0];
+ MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
+ int ref_idx = 0;
+#endif // CONFIG_EXT_INTER
+
+ int tmp_col_min = x->mv_col_min;
+ int tmp_col_max = x->mv_col_max;
+ int tmp_row_min = x->mv_row_min;
+ int tmp_row_max = x->mv_row_max;
+ int cost_list[5];
+
+ const YV12_BUFFER_CONFIG *scaled_ref_frame =
+ vp10_get_scaled_ref_frame(cpi, ref);
+
+ MV pred_mv[3];
+ pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
+ pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
+ pred_mv[2] = x->pred_mv[ref];
+
+#if CONFIG_REF_MV
+ vp10_set_mvcost(x, ref);
+#endif
+
+ if (scaled_ref_frame) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[i] = xd->plane[i].pre[ref_idx];
+
+ vp10_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
+ }
+
+ // Work out the size of the first step in the mv step search.
+ // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
+ if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
+ // Take wtd average of the step_params based on the last frame's
+ // max mv magnitude and that based on the best ref mvs of the current
+ // block for the given reference.
+ step_param =
+ (vp10_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
+ 2;
+ } else {
+ step_param = cpi->mv_step_param;
+ }
+
+ if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) {
+ int boffset =
+ 2 * (b_width_log2_lookup[cm->sb_size] -
+ VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
+ step_param = VPXMAX(step_param, boffset);
+ }
+
+ if (cpi->sf.adaptive_motion_search) {
+ int bwl = b_width_log2_lookup[bsize];
+ int bhl = b_height_log2_lookup[bsize];
+ int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
+
+ if (tlevel < 5) step_param += 2;
+
+ // prev_mv_sad is not setup for dynamically scaled frames.
+ if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
+ int i;
+ for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
+ if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
+ x->pred_mv[ref].row = 0;
+ x->pred_mv[ref].col = 0;
+ x->best_mv.as_int = INVALID_MV;
+
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ xd->plane[i].pre[ref_idx] = backup_yv12[i];
+ }
+ return;
+ }
+ }
+ }
+ }
+
+ vp10_set_mv_search_range(x, &ref_mv);
+
+ mvp_full = pred_mv[x->mv_best_ref_index[ref]];
+
+ mvp_full.col >>= 3;
+ mvp_full.row >>= 3;
+
+ x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
+
+ bestsme = vp10_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
+ cond_cost_list(cpi, cost_list), &ref_mv,
+ INT_MAX, 1);
+
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+
+ if (bestsme < INT_MAX) {
+ int dis; /* TODO: use dis in distortion calculation later. */
+ if (cpi->sf.use_upsampled_references) {
+ int best_mv_var;
+ const int try_second = x->second_best_mv.as_int != INVALID_MV &&
+ x->second_best_mv.as_int != x->best_mv.as_int;
+ const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
+ // Use up-sampled reference frames.
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ struct buf_2d backup_pred = pd->pre[ref_idx];
+ const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
+
+ // Set pred for Y plane
+ setup_pred_plane(&pd->pre[ref_idx], upsampled_ref->y_buffer,
+ upsampled_ref->y_crop_width,
+ upsampled_ref->y_crop_height, upsampled_ref->y_stride,
+ (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
+ pd->subsampling_y);
+
+ best_mv_var = cpi->find_fractional_mv_step(
+ x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
+ &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph, 1);
+
+ if (try_second) {
+ const int minc = VPXMAX(x->mv_col_min * 8, ref_mv.col - MV_MAX);
+ const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv.col + MV_MAX);
+ const int minr = VPXMAX(x->mv_row_min * 8, ref_mv.row - MV_MAX);
+ const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv.row + MV_MAX);
+ int this_var;
+ MV best_mv = x->best_mv.as_mv;
+
+ x->best_mv = x->second_best_mv;
+ if (x->best_mv.as_mv.row * 8 <= maxr &&
+ x->best_mv.as_mv.row * 8 >= minr &&
+ x->best_mv.as_mv.col * 8 <= maxc &&
+ x->best_mv.as_mv.col * 8 >= minc) {
+ this_var = cpi->find_fractional_mv_step(
+ x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
+ &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph,
+ 1);
+ if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
+ x->best_mv.as_mv = best_mv;
+ }
+ }
+
+ // Restore the reference frames.
+ pd->pre[ref_idx] = backup_pred;
+ } else {
+ cpi->find_fractional_mv_step(
+ x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
+ &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0, 0);
+ }
+ }
+ *rate_mv = vp10_mv_bit_cost(&x->best_mv.as_mv, &ref_mv, x->nmvjointcost,
+ x->mvcost, MV_COST_WEIGHT);
+
+ if (cpi->sf.adaptive_motion_search) x->pred_mv[ref] = x->best_mv.as_mv;
+
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[ref_idx] = backup_yv12[i];
+ }
+}
+
+static INLINE void restore_dst_buf(MACROBLOCKD *xd,
+ uint8_t *orig_dst[MAX_MB_PLANE],
+ int orig_dst_stride[MAX_MB_PLANE]) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = orig_dst[i];
+ xd->plane[i].dst.stride = orig_dst_stride[i];
+ }
+}
+
+#if CONFIG_OBMC
+static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ const int32_t *wsrc, const int32_t *mask,
+#if CONFIG_EXT_INTER
+ int ref_idx, int mv_idx,
+#endif // CONFIG_EXT_INTER
+ int_mv *tmp_mv, int_mv pred_mv,
+ int *rate_mv) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ const VP10_COMMON *cm = &cpi->common;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
+ int bestsme = INT_MAX;
+ int step_param;
+ int sadpb = x->sadperbit16;
+ MV mvp_full;
+#if CONFIG_EXT_INTER
+ int ref = mbmi->ref_frame[ref_idx];
+ MV ref_mv = x->mbmi_ext->ref_mvs[ref][mv_idx].as_mv;
+#else
+ int ref = mbmi->ref_frame[0];
+ MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
+ int ref_idx = 0;
+#endif // CONFIG_EXT_INTER
+
+ int tmp_col_min = x->mv_col_min;
+ int tmp_col_max = x->mv_col_max;
+ int tmp_row_min = x->mv_row_min;
+ int tmp_row_max = x->mv_row_max;
+
+ const YV12_BUFFER_CONFIG *scaled_ref_frame =
+ vp10_get_scaled_ref_frame(cpi, ref);
+
+#if CONFIG_REF_MV
+ vp10_set_mvcost(x, ref);
+#endif
+
+ if (scaled_ref_frame) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[i] = xd->plane[i].pre[ref_idx];
+
+ vp10_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
+ }
+
+ // Work out the size of the first step in the mv step search.
+ // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
+ if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
+ // Take wtd average of the step_params based on the last frame's
+ // max mv magnitude and that based on the best ref mvs of the current
+ // block for the given reference.
+ step_param =
+ (vp10_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
+ 2;
+ } else {
+ step_param = cpi->mv_step_param;
+ }
+
+ if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) {
+ int boffset =
+ 2 * (b_width_log2_lookup[cm->sb_size] -
+ VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
+ step_param = VPXMAX(step_param, boffset);
+ }
+
+ if (cpi->sf.adaptive_motion_search) {
+ int bwl = b_width_log2_lookup[bsize];
+ int bhl = b_height_log2_lookup[bsize];
+ int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
+
+ if (tlevel < 5) step_param += 2;
+
+ // prev_mv_sad is not setup for dynamically scaled frames.
+ if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
+ int i;
+ for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
+ if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
+ x->pred_mv[ref].row = 0;
+ x->pred_mv[ref].col = 0;
+ tmp_mv->as_int = INVALID_MV;
+
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ xd->plane[i].pre[ref_idx] = backup_yv12[i];
+ }
+ return;
+ }
+ }
+ }
+ }
+
+ vp10_set_mv_search_range(x, &ref_mv);
+
+ mvp_full = pred_mv.as_mv;
+ mvp_full.col >>= 3;
+ mvp_full.row >>= 3;
+
+ bestsme = vp10_obmc_full_pixel_diamond(
+ cpi, x, wsrc, mask, &mvp_full, step_param, sadpb,
+ MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
+ &tmp_mv->as_mv, ref_idx);
+
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+
+ if (bestsme < INT_MAX) {
+ int dis;
+ vp10_find_best_obmc_sub_pixel_tree_up(
+ cpi, x, wsrc, mask, mi_row, mi_col, &tmp_mv->as_mv, &ref_mv,
+ cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize],
+ cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step,
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], ref_idx,
+ cpi->sf.use_upsampled_references);
+ }
+ *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
+ x->mvcost, MV_COST_WEIGHT);
+
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[ref_idx] = backup_yv12[i];
+ }
+}
+#endif // CONFIG_OBMC
+
+#if CONFIG_EXT_INTER
+static void do_masked_motion_search(VP10_COMP *cpi, MACROBLOCK *x,
+ const uint8_t *mask, int mask_stride,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int_mv *tmp_mv, int *rate_mv, int ref_idx,
+ int mv_idx) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ const VP10_COMMON *cm = &cpi->common;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
+ int bestsme = INT_MAX;
+ int step_param;
+ int sadpb = x->sadperbit16;
+ MV mvp_full;
+ int ref = mbmi->ref_frame[ref_idx];
+ MV ref_mv = x->mbmi_ext->ref_mvs[ref][mv_idx].as_mv;
+
+ int tmp_col_min = x->mv_col_min;
+ int tmp_col_max = x->mv_col_max;
+ int tmp_row_min = x->mv_row_min;
+ int tmp_row_max = x->mv_row_max;
+
+ const YV12_BUFFER_CONFIG *scaled_ref_frame =
+ vp10_get_scaled_ref_frame(cpi, ref);
+
+ MV pred_mv[3];
+ pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
+ pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
+ pred_mv[2] = x->pred_mv[ref];
+
+#if CONFIG_REF_MV
+ vp10_set_mvcost(x, ref);
+#endif
+
+ if (scaled_ref_frame) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[i] = xd->plane[i].pre[ref_idx];
+
+ vp10_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
+ }
+
+ vp10_set_mv_search_range(x, &ref_mv);
+
+ // Work out the size of the first step in the mv step search.
+ // 0 here is maximum length first step. 1 is MAX >> 1 etc.
+ if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
+ // Take wtd average of the step_params based on the last frame's
+ // max mv magnitude and that based on the best ref mvs of the current
+ // block for the given reference.
+ step_param =
+ (vp10_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
+ 2;
+ } else {
+ step_param = cpi->mv_step_param;
+ }
+
+ // TODO(debargha): is show_frame needed here?
+ if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size && cm->show_frame) {
+ int boffset =
+ 2 * (b_width_log2_lookup[cm->sb_size] -
+ VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
+ step_param = VPXMAX(step_param, boffset);
+ }
+
+ if (cpi->sf.adaptive_motion_search) {
+ int bwl = b_width_log2_lookup[bsize];
+ int bhl = b_height_log2_lookup[bsize];
+ int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
+
+ if (tlevel < 5) step_param += 2;
+
+ // prev_mv_sad is not setup for dynamically scaled frames.
+ if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
+ int i;
+ for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
+ if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
+ x->pred_mv[ref].row = 0;
+ x->pred_mv[ref].col = 0;
+ tmp_mv->as_int = INVALID_MV;
+
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ xd->plane[i].pre[ref_idx] = backup_yv12[i];
+ }
+ return;
+ }
+ }
+ }
+ }
+
+ mvp_full = pred_mv[x->mv_best_ref_index[ref]];
+
+ mvp_full.col >>= 3;
+ mvp_full.row >>= 3;
+
+ bestsme = vp10_masked_full_pixel_diamond(
+ cpi, x, mask, mask_stride, &mvp_full, step_param, sadpb,
+ MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
+ &tmp_mv->as_mv, ref_idx);
+
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+
+ if (bestsme < INT_MAX) {
+ int dis; /* TODO: use dis in distortion calculation later. */
+ vp10_find_best_masked_sub_pixel_tree_up(
+ cpi, x, mask, mask_stride, mi_row, mi_col, &tmp_mv->as_mv, &ref_mv,
+ cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize],
+ cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step,
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], ref_idx,
+ cpi->sf.use_upsampled_references);
+ }
+ *rate_mv = vp10_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
+ x->mvcost, MV_COST_WEIGHT);
+
+ if (cpi->sf.adaptive_motion_search && cm->show_frame)
+ x->pred_mv[ref] = tmp_mv->as_mv;
+
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[ref_idx] = backup_yv12[i];
+ }
+}
+
+static void do_masked_motion_search_indexed(VP10_COMP *cpi, MACROBLOCK *x,
+ int wedge_index, int wedge_sign,
+ BLOCK_SIZE bsize, int mi_row,
+ int mi_col, int_mv *tmp_mv,
+ int *rate_mv, int mv_idx[2],
+ int which) {
+ // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ BLOCK_SIZE sb_type = mbmi->sb_type;
+ const uint8_t *mask;
+ const int mask_stride = 4 * num_4x4_blocks_wide_lookup[bsize];
+ mask = vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
+
+ if (which == 0 || which == 2)
+ do_masked_motion_search(cpi, x, mask, mask_stride, bsize, mi_row, mi_col,
+ &tmp_mv[0], &rate_mv[0], 0, mv_idx[0]);
+
+ if (which == 1 || which == 2) {
+ // get the negative mask
+ mask = vp10_get_contiguous_soft_mask(wedge_index, !wedge_sign, sb_type);
+ do_masked_motion_search(cpi, x, mask, mask_stride, bsize, mi_row, mi_col,
+ &tmp_mv[1], &rate_mv[1], 1, mv_idx[1]);
+ }
+}
+#endif // CONFIG_EXT_INTER
+
+// In some situations we want to discount tha pparent cost of a new motion
+// vector. Where there is a subtle motion field and especially where there is
+// low spatial complexity then it can be hard to cover the cost of a new motion
+// vector in a single block, even if that motion vector reduces distortion.
+// However, once established that vector may be usable through the nearest and
+// near mv modes to reduce distortion in subsequent blocks and also improve
+// visual quality.
+static int discount_newmv_test(const VP10_COMP *cpi, int this_mode,
+ int_mv this_mv,
+ int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
+ int ref_frame) {
+ return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) &&
+ (this_mv.as_int != 0) &&
+ ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
+ (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
+ ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
+ (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
+}
+
+#define LEFT_TOP_MARGIN ((VPX_ENC_BORDER_IN_PIXELS - VPX_INTERP_EXTEND) << 3)
+#define RIGHT_BOTTOM_MARGIN \
+ ((VPX_ENC_BORDER_IN_PIXELS - VPX_INTERP_EXTEND) << 3)
+
+// TODO(jingning): this mv clamping function should be block size dependent.
+static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
+ clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
+ xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
+ xd->mb_to_top_edge - LEFT_TOP_MARGIN,
+ xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
+}
+
+#if CONFIG_EXT_INTER
+static int estimate_wedge_sign(const VP10_COMP *cpi, const MACROBLOCK *x,
+ const BLOCK_SIZE bsize, const uint8_t *pred0,
+ int stride0, const uint8_t *pred1, int stride1) {
+ const struct macroblock_plane *const p = &x->plane[0];
+ const uint8_t *src = p->src.buf;
+ int src_stride = p->src.stride;
+ const int f_index = bsize - BLOCK_8X8;
+ const int bw = 4 << (b_width_log2_lookup[bsize]);
+ const int bh = 4 << (b_height_log2_lookup[bsize]);
+ uint32_t esq[2][4], var;
+ int64_t tl, br;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ pred0 = CONVERT_TO_BYTEPTR(pred0);
+ pred1 = CONVERT_TO_BYTEPTR(pred1);
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ var = cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
+ var = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2,
+ stride0, &esq[0][1]);
+ var = cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
+ pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
+ var = cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
+ pred0 + bh / 2 * stride0 + bw / 2, stride0,
+ &esq[0][3]);
+ var = cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
+ var = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2,
+ stride1, &esq[1][1]);
+ var = cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
+ pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
+ var = cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
+ pred1 + bh / 2 * stride1 + bw / 2, stride0,
+ &esq[1][3]);
+ (void)var;
+
+ tl = (int64_t)(esq[0][0] + esq[0][1] + esq[0][2]) -
+ (int64_t)(esq[1][0] + esq[1][1] + esq[1][2]);
+ br = (int64_t)(esq[1][3] + esq[1][1] + esq[1][2]) -
+ (int64_t)(esq[0][3] + esq[0][1] + esq[0][2]);
+ return (tl + br > 0);
+}
+#endif // CONFIG_EXT_INTER
+
+#if !CONFIG_DUAL_FILTER
+static INTERP_FILTER predict_interp_filter(
+ const VP10_COMP *cpi, const MACROBLOCK *x, const BLOCK_SIZE bsize,
+ const int mi_row, const int mi_col,
+ INTERP_FILTER (*single_filter)[TOTAL_REFS_PER_FRAME]) {
+ INTERP_FILTER best_filter = SWITCHABLE;
+ const VP10_COMMON *cm = &cpi->common;
+ const MACROBLOCKD *xd = &x->e_mbd;
+ int bsl = mi_width_log2_lookup[bsize];
+ int pred_filter_search =
+ cpi->sf.cb_pred_filter_search
+ ? (((mi_row + mi_col) >> bsl) +
+ get_chessboard_index(cm->current_video_frame)) &
+ 0x1
+ : 0;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ const int is_comp_pred = has_second_ref(mbmi);
+ const int this_mode = mbmi->mode;
+ int refs[2] = { mbmi->ref_frame[0],
+ (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
+#if CONFIG_DUAL_FILTER
+ (void)pred_filter_search;
+ return SWITCHABLE;
+#else
+ if (pred_filter_search) {
+ INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
+ if (xd->up_available) af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
+ if (xd->left_available) lf = xd->mi[-1]->mbmi.interp_filter;
+
+#if CONFIG_EXT_INTER
+ if ((this_mode != NEWMV && this_mode != NEWFROMNEARMV &&
+ this_mode != NEW_NEWMV) ||
+ (af == lf))
+#else
+ if ((this_mode != NEWMV) || (af == lf))
+#endif // CONFIG_EXT_INTER
+ best_filter = af;
+ }
+#endif
+ if (is_comp_pred) {
+ if (cpi->sf.adaptive_mode_search) {
+#if CONFIG_EXT_INTER
+ switch (this_mode) {
+ case NEAREST_NEARESTMV:
+ if (single_filter[NEARESTMV][refs[0]] ==
+ single_filter[NEARESTMV][refs[1]])
+ best_filter = single_filter[NEARESTMV][refs[0]];
+ break;
+ case NEAREST_NEARMV:
+ if (single_filter[NEARESTMV][refs[0]] ==
+ single_filter[NEARMV][refs[1]])
+ best_filter = single_filter[NEARESTMV][refs[0]];
+ break;
+ case NEAR_NEARESTMV:
+ if (single_filter[NEARMV][refs[0]] ==
+ single_filter[NEARESTMV][refs[1]])
+ best_filter = single_filter[NEARMV][refs[0]];
+ break;
+ case NEAR_NEARMV:
+ if (single_filter[NEARMV][refs[0]] == single_filter[NEARMV][refs[1]])
+ best_filter = single_filter[NEARMV][refs[0]];
+ break;
+ case ZERO_ZEROMV:
+ if (single_filter[ZEROMV][refs[0]] == single_filter[ZEROMV][refs[1]])
+ best_filter = single_filter[ZEROMV][refs[0]];
+ break;
+ case NEW_NEWMV:
+ if (single_filter[NEWMV][refs[0]] == single_filter[NEWMV][refs[1]])
+ best_filter = single_filter[NEWMV][refs[0]];
+ break;
+ case NEAREST_NEWMV:
+ if (single_filter[NEARESTMV][refs[0]] ==
+ single_filter[NEWMV][refs[1]])
+ best_filter = single_filter[NEARESTMV][refs[0]];
+ break;
+ case NEAR_NEWMV:
+ if (single_filter[NEARMV][refs[0]] == single_filter[NEWMV][refs[1]])
+ best_filter = single_filter[NEARMV][refs[0]];
+ break;
+ case NEW_NEARESTMV:
+ if (single_filter[NEWMV][refs[0]] ==
+ single_filter[NEARESTMV][refs[1]])
+ best_filter = single_filter[NEWMV][refs[0]];
+ break;
+ case NEW_NEARMV:
+ if (single_filter[NEWMV][refs[0]] == single_filter[NEARMV][refs[1]])
+ best_filter = single_filter[NEWMV][refs[0]];
+ break;
+ default:
+ if (single_filter[this_mode][refs[0]] ==
+ single_filter[this_mode][refs[1]])
+ best_filter = single_filter[this_mode][refs[0]];
+ break;
+ }
+#else
+ if (single_filter[this_mode][refs[0]] ==
+ single_filter[this_mode][refs[1]])
+ best_filter = single_filter[this_mode][refs[0]];
+#endif // CONFIG_EXT_INTER
+ }
+ }
+ if (cm->interp_filter != BILINEAR) {
+ if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
+ best_filter = EIGHTTAP_REGULAR;
+ }
+#if CONFIG_EXT_INTERP
+ else if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE) {
+ best_filter = EIGHTTAP_REGULAR;
+ }
+#endif
+ }
+ return best_filter;
+}
+#endif
+
+#if CONFIG_EXT_INTER
+// Choose the best wedge index and sign
+static int64_t pick_wedge(const VP10_COMP *const cpi, const MACROBLOCK *const x,
+ const BLOCK_SIZE bsize, const uint8_t *const p0,
+ const uint8_t *const p1, int *const best_wedge_sign,
+ int *const best_wedge_index) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const src = &x->plane[0].src;
+ const int bw = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const int bh = 4 * num_4x4_blocks_high_lookup[bsize];
+ const int N = bw * bh;
+ int rate;
+ int64_t dist;
+ int64_t rd, best_rd = INT64_MAX;
+ int wedge_index;
+ int wedge_sign;
+ int wedge_types = (1 << get_wedge_bits_lookup(bsize));
+ const uint8_t *mask;
+ uint64_t sse;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
+ const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
+#else
+ const int bd_round = 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
+
+ int64_t sign_limit;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (hbd) {
+ vpx_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
+ CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
+ vpx_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
+ CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
+ vpx_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
+ CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
+ } else // NOLINT
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ {
+ vpx_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
+ vpx_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
+ vpx_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
+ }
+
+ sign_limit = ((int64_t)vpx_sum_squares_i16(r0, N) -
+ (int64_t)vpx_sum_squares_i16(r1, N)) *
+ (1 << WEDGE_WEIGHT_BITS) / 2;
+
+ vp10_wedge_compute_delta_squares(ds, r0, r1, N);
+
+ for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+ mask = vp10_get_contiguous_soft_mask(wedge_index, 0, bsize);
+ wedge_sign = vp10_wedge_sign_from_residuals(ds, mask, N, sign_limit);
+
+ mask = vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
+ sse = vp10_wedge_sse_from_residuals(r1, d10, mask, N);
+ sse = ROUND_POWER_OF_TWO(sse, bd_round);
+
+ model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
+ rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+
+ if (rd < best_rd) {
+ *best_wedge_index = wedge_index;
+ *best_wedge_sign = wedge_sign;
+ best_rd = rd;
+ }
+ }
+
+ return best_rd;
+}
+
+// Choose the best wedge index the specified sign
+static int64_t pick_wedge_fixed_sign(
+ const VP10_COMP *const cpi, const MACROBLOCK *const x,
+ const BLOCK_SIZE bsize, const uint8_t *const p0, const uint8_t *const p1,
+ const int wedge_sign, int *const best_wedge_index) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const src = &x->plane[0].src;
+ const int bw = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const int bh = 4 * num_4x4_blocks_high_lookup[bsize];
+ const int N = bw * bh;
+ int rate;
+ int64_t dist;
+ int64_t rd, best_rd = INT64_MAX;
+ int wedge_index;
+ int wedge_types = (1 << get_wedge_bits_lookup(bsize));
+ const uint8_t *mask;
+ uint64_t sse;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
+ const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
+#else
+ const int bd_round = 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (hbd) {
+ vpx_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
+ CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
+ vpx_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
+ CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
+ } else // NOLINT
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ {
+ vpx_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
+ vpx_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
+ }
+
+ for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
+ mask = vp10_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
+ sse = vp10_wedge_sse_from_residuals(r1, d10, mask, N);
+ sse = ROUND_POWER_OF_TWO(sse, bd_round);
+
+ model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
+ rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+
+ if (rd < best_rd) {
+ *best_wedge_index = wedge_index;
+ best_rd = rd;
+ }
+ }
+
+ return best_rd;
+}
+
+static int64_t pick_interinter_wedge(const VP10_COMP *const cpi,
+ const MACROBLOCK *const x,
+ const BLOCK_SIZE bsize,
+ const uint8_t *const p0,
+ const uint8_t *const p1) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const int bw = 4 * num_4x4_blocks_wide_lookup[bsize];
+
+ int64_t rd;
+ int wedge_index = -1;
+ int wedge_sign = 0;
+
+ assert(is_interinter_wedge_used(bsize));
+
+ if (cpi->sf.fast_wedge_sign_estimate) {
+ wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw);
+ rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, wedge_sign, &wedge_index);
+ } else {
+ rd = pick_wedge(cpi, x, bsize, p0, p1, &wedge_sign, &wedge_index);
+ }
+
+ mbmi->interinter_wedge_sign = wedge_sign;
+ mbmi->interinter_wedge_index = wedge_index;
+ return rd;
+}
+
+static int64_t pick_interintra_wedge(const VP10_COMP *const cpi,
+ const MACROBLOCK *const x,
+ const BLOCK_SIZE bsize,
+ const uint8_t *const p0,
+ const uint8_t *const p1) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+
+ int64_t rd;
+ int wedge_index = -1;
+
+ assert(is_interintra_wedge_used(bsize));
+
+ rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, 0, &wedge_index);
+
+ mbmi->interintra_wedge_sign = 0;
+ mbmi->interintra_wedge_index = wedge_index;
+ return rd;
+}
+#endif // CONFIG_EXT_INTER
+
+static int64_t handle_inter_mode(
+ VP10_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2,
+ int64_t *distortion, int *skippable, int *rate_y, int *rate_uv,
+ int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
+ int mi_col,
+#if CONFIG_OBMC
+ uint8_t *dst_buf1[3], int dst_stride1[3], uint8_t *dst_buf2[3],
+ int dst_stride2[3], const int32_t *const wsrc, const int32_t *const mask2d,
+#endif // CONFIG_OBMC
+#if CONFIG_EXT_INTER
+ int_mv single_newmvs[2][TOTAL_REFS_PER_FRAME],
+ int single_newmvs_rate[2][TOTAL_REFS_PER_FRAME],
+ int *compmode_interintra_cost, int *compmode_wedge_cost,
+ int64_t (*const modelled_rd)[TOTAL_REFS_PER_FRAME],
+#else
+ int_mv single_newmv[TOTAL_REFS_PER_FRAME],
+#endif // CONFIG_EXT_INTER
+ INTERP_FILTER (*single_filter)[TOTAL_REFS_PER_FRAME],
+ int (*single_skippable)[TOTAL_REFS_PER_FRAME], int64_t *psse,
+ const int64_t ref_best_rd) {
+ VP10_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+ const int is_comp_pred = has_second_ref(mbmi);
+ const int this_mode = mbmi->mode;
+ int_mv *frame_mv = mode_mv[this_mode];
+ int i;
+ int refs[2] = { mbmi->ref_frame[0],
+ (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
+ int_mv cur_mv[2];
+ int rate_mv = 0;
+#if CONFIG_EXT_INTER
+ const int bw = 4 * num_4x4_blocks_wide_lookup[bsize];
+ int mv_idx = (this_mode == NEWFROMNEARMV) ? 1 : 0;
+ int_mv single_newmv[TOTAL_REFS_PER_FRAME];
+ const unsigned int *const interintra_mode_cost =
+ cpi->interintra_mode_cost[size_group_lookup[bsize]];
+ const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
+#if CONFIG_REF_MV
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+#endif
+#endif // CONFIG_EXT_INTER
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf_[MAX_MB_PLANE * MAX_SB_SQUARE]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ uint8_t *tmp_buf;
+
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ int allow_motvar =
+#if CONFIG_EXT_INTER
+ !is_comp_interintra_pred &&
+#endif // CONFIG_EXT_INTER
+ is_motvar_allowed(mbmi);
+ int rate2_nocoeff = 0, best_rate2 = INT_MAX, best_skippable, best_xskip,
+ best_disable_skip = 0;
+ int best_rate_y, best_rate_uv;
+#if CONFIG_VAR_TX
+ uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
+#endif // CONFIG_VAR_TX
+ int64_t best_distortion = INT64_MAX;
+ MB_MODE_INFO best_mbmi;
+#if CONFIG_EXT_INTER
+ int rate2_bmc_nocoeff;
+ int rate_mv_bmc;
+ MB_MODE_INFO best_bmc_mbmi;
+#endif // CONFIG_EXT_INTER
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+ int pred_exists = 0;
+ int intpel_mv;
+ int64_t rd, tmp_rd, best_rd = INT64_MAX;
+ int best_needs_copy = 0;
+ uint8_t *orig_dst[MAX_MB_PLANE];
+ int orig_dst_stride[MAX_MB_PLANE];
+ int rs = 0;
+#if CONFIG_DUAL_FILTER
+ // Index use case:
+ // {0, 1} -> (vertical, horizontal) filter types for the first ref frame
+ // {2, 3} -> (vertical, horizontal) filter types for the second ref frame
+ INTERP_FILTER best_filter[4] = {
+ SWITCHABLE, SWITCHABLE, SWITCHABLE, SWITCHABLE,
+ };
+#else
+ INTERP_FILTER best_filter = SWITCHABLE;
+#endif
+
+ int skip_txfm_sb = 0;
+ int64_t skip_sse_sb = INT64_MAX;
+ int64_t distortion_y = 0, distortion_uv = 0;
+ int16_t mode_ctx = mbmi_ext->mode_context[refs[0]];
+
+#if CONFIG_EXT_INTER
+ *compmode_interintra_cost = 0;
+ mbmi->use_wedge_interintra = 0;
+ *compmode_wedge_cost = 0;
+ mbmi->use_wedge_interinter = 0;
+
+ // is_comp_interintra_pred implies !is_comp_pred
+ assert(!is_comp_interintra_pred || (!is_comp_pred));
+ // is_comp_interintra_pred implies is_interintra_allowed(mbmi->sb_type)
+ assert(!is_comp_interintra_pred || is_interintra_allowed(mbmi));
+#endif // CONFIG_EXT_INTER
+
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (is_comp_pred)
+ mode_ctx = mbmi_ext->compound_mode_context[refs[0]];
+ else
+#endif // CONFIG_EXT_INTER
+ mode_ctx = vp10_mode_context_analyzer(mbmi_ext->mode_context,
+ mbmi->ref_frame, bsize, -1);
+#endif
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf_);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ tmp_buf = tmp_buf_;
+
+ if (is_comp_pred) {
+ if (frame_mv[refs[0]].as_int == INVALID_MV ||
+ frame_mv[refs[1]].as_int == INVALID_MV)
+ return INT64_MAX;
+ }
+
+ if (have_newmv_in_inter_mode(this_mode)) {
+ if (is_comp_pred) {
+#if CONFIG_EXT_INTER
+ for (i = 0; i < 2; ++i) {
+ single_newmv[refs[i]].as_int = single_newmvs[mv_idx][refs[i]].as_int;
+ }
+
+ if (this_mode == NEW_NEWMV) {
+ frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
+ frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
+
+ if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
+ joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL,
+ single_newmv, &rate_mv, 0);
+ } else {
+#if CONFIG_REF_MV
+ vp10_set_mvcost(x, mbmi->ref_frame[0]);
+#endif // CONFIG_REF_MV
+ rate_mv = vp10_mv_bit_cost(
+ &frame_mv[refs[0]].as_mv, &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+#if CONFIG_REF_MV
+ vp10_set_mvcost(x, mbmi->ref_frame[1]);
+#endif // CONFIG_REF_MV
+ rate_mv += vp10_mv_bit_cost(
+ &frame_mv[refs[1]].as_mv, &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ }
+ } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
+ frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
+ rate_mv = vp10_mv_bit_cost(&frame_mv[refs[1]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ } else {
+ frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
+ rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ }
+#else
+ // Initialize mv using single prediction mode result.
+ frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
+ frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
+
+ if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
+ joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col,
+ single_newmv, &rate_mv, 0);
+ } else {
+#if CONFIG_REF_MV
+ vp10_set_mvcost(x, mbmi->ref_frame[0]);
+#endif // CONFIG_REF_MV
+ rate_mv = vp10_mv_bit_cost(&frame_mv[refs[0]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+#if CONFIG_REF_MV
+ vp10_set_mvcost(x, mbmi->ref_frame[1]);
+#endif // CONFIG_REF_MV
+ rate_mv += vp10_mv_bit_cost(&frame_mv[refs[1]].as_mv,
+ &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ }
+#endif // CONFIG_EXT_INTER
+ } else {
+#if CONFIG_EXT_INTER
+ if (is_comp_interintra_pred) {
+ x->best_mv = single_newmvs[mv_idx][refs[0]];
+ rate_mv = single_newmvs_rate[mv_idx][refs[0]];
+ } else {
+ single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, mv_idx,
+ &rate_mv);
+ single_newmvs[mv_idx][refs[0]] = x->best_mv;
+ single_newmvs_rate[mv_idx][refs[0]] = rate_mv;
+ }
+#else
+ single_motion_search(cpi, x, bsize, mi_row, mi_col, &rate_mv);
+ single_newmv[refs[0]] = x->best_mv;
+#endif // CONFIG_EXT_INTER
+
+ if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
+
+ frame_mv[refs[0]] = x->best_mv;
+ xd->mi[0]->bmi[0].as_mv[0] = x->best_mv;
+
+ // Estimate the rate implications of a new mv but discount this
+ // under certain circumstances where we want to help initiate a weak
+ // motion field, where the distortion gain for a single block may not
+ // be enough to overcome the cost of a new mv.
+ if (discount_newmv_test(cpi, this_mode, x->best_mv, mode_mv, refs[0])) {
+ rate_mv = VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
+ }
+ }
+ *rate2 += rate_mv;
+ }
+
+ for (i = 0; i < is_comp_pred + 1; ++i) {
+ cur_mv[i] = frame_mv[refs[i]];
+// Clip "next_nearest" so that it does not extend to far out of image
+#if CONFIG_EXT_INTER
+ if (this_mode != NEWMV && this_mode != NEWFROMNEARMV)
+#else
+ if (this_mode != NEWMV)
+#endif // CONFIG_EXT_INTER
+ clamp_mv2(&cur_mv[i].as_mv, xd);
+
+ if (mv_check_bounds(x, &cur_mv[i].as_mv)) return INT64_MAX;
+ mbmi->mv[i].as_int = cur_mv[i].as_int;
+ }
+
+#if CONFIG_REF_MV
+#if CONFIG_EXT_INTER
+ if (this_mode == NEAREST_NEARESTMV) {
+#else
+ if (this_mode == NEARESTMV && is_comp_pred) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+#endif // CONFIG_EXT_INTER
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
+ cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
+
+ for (i = 0; i < 2; ++i) {
+ clamp_mv2(&cur_mv[i].as_mv, xd);
+ if (mv_check_bounds(x, &cur_mv[i].as_mv)) return INT64_MAX;
+ mbmi->mv[i].as_int = cur_mv[i].as_int;
+ }
+ }
+ }
+
+#if CONFIG_EXT_INTER
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
+ if (this_mode == NEAREST_NEWMV || this_mode == NEAREST_NEARMV) {
+ cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
+
+ lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
+ clamp_mv2(&cur_mv[0].as_mv, xd);
+ if (mv_check_bounds(x, &cur_mv[0].as_mv)) return INT64_MAX;
+ mbmi->mv[0].as_int = cur_mv[0].as_int;
+ }
+
+ if (this_mode == NEW_NEARESTMV || this_mode == NEAR_NEARESTMV) {
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
+
+ lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
+ clamp_mv2(&cur_mv[1].as_mv, xd);
+ if (mv_check_bounds(x, &cur_mv[1].as_mv)) return INT64_MAX;
+ mbmi->mv[1].as_int = cur_mv[1].as_int;
+ }
+ }
+
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
+ if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARESTMV ||
+ this_mode == NEAR_NEARMV) {
+ cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][1].this_mv;
+
+ lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
+ clamp_mv2(&cur_mv[0].as_mv, xd);
+ if (mv_check_bounds(x, &cur_mv[0].as_mv)) return INT64_MAX;
+ mbmi->mv[0].as_int = cur_mv[0].as_int;
+ }
+
+ if (this_mode == NEW_NEARMV || this_mode == NEAREST_NEARMV ||
+ this_mode == NEAR_NEARMV) {
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][1].comp_mv;
+
+ lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
+ clamp_mv2(&cur_mv[1].as_mv, xd);
+ if (mv_check_bounds(x, &cur_mv[1].as_mv)) return INT64_MAX;
+ mbmi->mv[1].as_int = cur_mv[1].as_int;
+ }
+ }
+#else
+ if (this_mode == NEARMV && is_comp_pred) {
+ uint8_t ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+ if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
+ int ref_mv_idx = mbmi->ref_mv_idx + 1;
+ cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
+ cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
+
+ for (i = 0; i < 2; ++i) {
+ clamp_mv2(&cur_mv[i].as_mv, xd);
+ if (mv_check_bounds(x, &cur_mv[i].as_mv)) return INT64_MAX;
+ mbmi->mv[i].as_int = cur_mv[i].as_int;
+ }
+ }
+ }
+#endif // CONFIG_EXT_INTER
+#endif // CONFIG_REF_MV
+
+ // do first prediction into the destination buffer. Do the next
+ // prediction into a temporary buffer. Then keep track of which one
+ // of these currently holds the best predictor, and use the other
+ // one for future predictions. In the end, copy from tmp_buf to
+ // dst if necessary.
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ orig_dst[i] = xd->plane[i].dst.buf;
+ orig_dst_stride[i] = xd->plane[i].dst.stride;
+ }
+
+ // We don't include the cost of the second reference here, because there
+ // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
+ // words if you present them in that order, the second one is always known
+ // if the first is known.
+ //
+ // Under some circumstances we discount the cost of new mv mode to encourage
+ // initiation of a motion field.
+ if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv,
+ refs[0])) {
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode, is_comp_pred, mode_ctx),
+ cost_mv_ref(cpi, NEARESTMV, is_comp_pred, mode_ctx));
+#else
+ *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode, mode_ctx),
+ cost_mv_ref(cpi, NEARESTMV, mode_ctx));
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ } else {
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ *rate2 += cost_mv_ref(cpi, this_mode, is_comp_pred, mode_ctx);
+#else
+ *rate2 += cost_mv_ref(cpi, this_mode, mode_ctx);
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ }
+
+ if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
+#if CONFIG_EXT_INTER
+ mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV
+#else
+ mbmi->mode != NEARESTMV
+#endif // CONFIG_EXT_INTER
+ )
+ return INT64_MAX;
+
+ pred_exists = 0;
+ // Are all MVs integer pel for Y and UV
+ intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
+ if (is_comp_pred) intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
+
+#if !CONFIG_DUAL_FILTER
+ best_filter =
+ predict_interp_filter(cpi, x, bsize, mi_row, mi_col, single_filter);
+#endif
+
+ if (cm->interp_filter != BILINEAR) {
+ int newbest;
+ int tmp_rate_sum = 0;
+ int64_t tmp_dist_sum = 0;
+
+#if CONFIG_DUAL_FILTER
+#if CONFIG_EXT_INTERP
+ for (i = 0; i < 25; ++i) {
+#else
+ for (i = 0; i < 9; ++i) {
+#endif
+#else
+ for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
+#endif
+ int j;
+ int64_t rs_rd;
+ int tmp_skip_sb = 0;
+ int64_t tmp_skip_sse = INT64_MAX;
+
+#if CONFIG_DUAL_FILTER
+ mbmi->interp_filter[0] = filter_sets[i][0];
+ mbmi->interp_filter[1] = filter_sets[i][1];
+ mbmi->interp_filter[2] = filter_sets[i][0];
+ mbmi->interp_filter[3] = filter_sets[i][1];
+#else
+ mbmi->interp_filter = i;
+#endif
+ rs = vp10_get_switchable_rate(cpi, xd);
+ rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
+
+ if (i > 0 && intpel_mv && IsInterpolatingFilter(i)) {
+ rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
+ if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
+ } else {
+ int rate_sum = 0;
+ int64_t dist_sum = 0;
+
+ if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
+ (cpi->sf.interp_filter_search_mask & (1 << i))) {
+ rate_sum = INT_MAX;
+ dist_sum = INT64_MAX;
+ continue;
+ }
+
+ if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) ||
+#if CONFIG_EXT_INTER
+ is_comp_interintra_pred ||
+#endif // CONFIG_EXT_INTER
+ (cm->interp_filter != SWITCHABLE &&
+ (
+#if CONFIG_DUAL_FILTER
+ cm->interp_filter == mbmi->interp_filter[0]
+#else
+ cm->interp_filter == mbmi->interp_filter
+#endif
+ || (i == 0 && intpel_mv && IsInterpolatingFilter(i))))) {
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ } else {
+ for (j = 0; j < MAX_MB_PLANE; j++) {
+ xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
+ xd->plane[j].dst.stride = MAX_SB_SIZE;
+ }
+ }
+ vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &rate_sum,
+ &dist_sum, &tmp_skip_sb, &tmp_skip_sse);
+
+ rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
+ if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
+
+ if (i == 0 && intpel_mv && IsInterpolatingFilter(i)) {
+ tmp_rate_sum = rate_sum;
+ tmp_dist_sum = dist_sum;
+ }
+ }
+
+ if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
+ if (rd / 2 > ref_best_rd) {
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ return INT64_MAX;
+ }
+ }
+ newbest = i == 0 || rd < best_rd;
+
+ if (newbest) {
+ best_rd = rd;
+#if CONFIG_DUAL_FILTER
+ best_filter[0] = mbmi->interp_filter[0];
+ best_filter[1] = mbmi->interp_filter[1];
+ best_filter[2] = mbmi->interp_filter[2];
+ best_filter[3] = mbmi->interp_filter[3];
+#else
+ best_filter = mbmi->interp_filter;
+#endif
+ if (cm->interp_filter == SWITCHABLE && i &&
+ !(intpel_mv && IsInterpolatingFilter(i)))
+ best_needs_copy = !best_needs_copy;
+ }
+
+ if ((cm->interp_filter == SWITCHABLE && newbest) ||
+ (cm->interp_filter != SWITCHABLE &&
+#if CONFIG_DUAL_FILTER
+ cm->interp_filter == mbmi->interp_filter[0])) {
+#else
+ cm->interp_filter == mbmi->interp_filter)) {
+#endif
+ pred_exists = 1;
+ tmp_rd = best_rd;
+
+ skip_txfm_sb = tmp_skip_sb;
+ skip_sse_sb = tmp_skip_sse;
+ } else {
+ pred_exists = 0;
+ }
+ }
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ }
+
+// Set the appropriate filter
+#if CONFIG_DUAL_FILTER
+ mbmi->interp_filter[0] =
+ cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter[0];
+ mbmi->interp_filter[1] =
+ cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter[1];
+ if (mbmi->ref_frame[1] > INTRA_FRAME) {
+ mbmi->interp_filter[2] =
+ cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter[2];
+ mbmi->interp_filter[3] =
+ cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter[3];
+ }
+#else
+ mbmi->interp_filter =
+ cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter;
+#endif
+ rs = cm->interp_filter == SWITCHABLE ? vp10_get_switchable_rate(cpi, xd) : 0;
+
+#if CONFIG_EXT_INTER
+#if CONFIG_OBMC
+ best_bmc_mbmi = *mbmi;
+ rate_mv_bmc = rate_mv;
+ rate2_bmc_nocoeff = *rate2;
+ if (cm->interp_filter == SWITCHABLE) rate2_bmc_nocoeff += rs;
+#endif // CONFIG_OBMC
+
+ if (is_comp_pred && is_interinter_wedge_used(bsize)) {
+ int rate_sum, rs;
+ int64_t dist_sum;
+ int64_t best_rd_nowedge = INT64_MAX;
+ int64_t best_rd_wedge = INT64_MAX;
+ int tmp_skip_txfm_sb;
+ int64_t tmp_skip_sse_sb;
+
+ rs = vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 0);
+ mbmi->use_wedge_interinter = 0;
+ vp10_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ vp10_subtract_plane(x, bsize, 0);
+ rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
+ &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
+ if (rd != INT64_MAX)
+ rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
+ best_rd_nowedge = rd;
+
+ // Disbale wedge search if source variance is small
+ if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
+ best_rd_nowedge / 3 < ref_best_rd) {
+ uint8_t pred0[2 * MAX_SB_SQUARE];
+ uint8_t pred1[2 * MAX_SB_SQUARE];
+ uint8_t *preds0[1] = { pred0 };
+ uint8_t *preds1[1] = { pred1 };
+ int strides[1] = { bw };
+
+ mbmi->use_wedge_interinter = 1;
+ rs = vp10_cost_literal(get_interinter_wedge_bits(bsize)) +
+ vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
+
+ vp10_build_inter_predictors_for_planes_single_buf(
+ xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
+ vp10_build_inter_predictors_for_planes_single_buf(
+ xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
+
+ // Choose the best wedge
+ best_rd_wedge = pick_interinter_wedge(cpi, x, bsize, pred0, pred1);
+ best_rd_wedge += RDCOST(x->rdmult, x->rddiv, rs + rate_mv, 0);
+
+ if (have_newmv_in_inter_mode(this_mode)) {
+ int_mv tmp_mv[2];
+ int rate_mvs[2], tmp_rate_mv = 0;
+ if (this_mode == NEW_NEWMV) {
+ int mv_idxs[2] = { 0, 0 };
+ do_masked_motion_search_indexed(
+ cpi, x, mbmi->interinter_wedge_index, mbmi->interinter_wedge_sign,
+ bsize, mi_row, mi_col, tmp_mv, rate_mvs, mv_idxs, 2);
+ tmp_rate_mv = rate_mvs[0] + rate_mvs[1];
+ mbmi->mv[0].as_int = tmp_mv[0].as_int;
+ mbmi->mv[1].as_int = tmp_mv[1].as_int;
+ } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) {
+ int mv_idxs[2] = { 0, 0 };
+ do_masked_motion_search_indexed(
+ cpi, x, mbmi->interinter_wedge_index, mbmi->interinter_wedge_sign,
+ bsize, mi_row, mi_col, tmp_mv, rate_mvs, mv_idxs, 0);
+ tmp_rate_mv = rate_mvs[0];
+ mbmi->mv[0].as_int = tmp_mv[0].as_int;
+ } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
+ int mv_idxs[2] = { 0, 0 };
+ do_masked_motion_search_indexed(
+ cpi, x, mbmi->interinter_wedge_index, mbmi->interinter_wedge_sign,
+ bsize, mi_row, mi_col, tmp_mv, rate_mvs, mv_idxs, 1);
+ tmp_rate_mv = rate_mvs[1];
+ mbmi->mv[1].as_int = tmp_mv[1].as_int;
+ }
+ vp10_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
+ &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
+ rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum, dist_sum);
+ if (rd < best_rd_wedge) {
+ best_rd_wedge = rd;
+ } else {
+ mbmi->mv[0].as_int = cur_mv[0].as_int;
+ mbmi->mv[1].as_int = cur_mv[1].as_int;
+ tmp_rate_mv = rate_mv;
+ vp10_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0,
+ strides, preds1, strides);
+ }
+ vp10_subtract_plane(x, bsize, 0);
+ rd =
+ estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
+ &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
+ if (rd != INT64_MAX)
+ rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum,
+ dist_sum);
+ best_rd_wedge = rd;
+
+ if (best_rd_wedge < best_rd_nowedge) {
+ mbmi->use_wedge_interinter = 1;
+ xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
+ xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
+ *rate2 += tmp_rate_mv - rate_mv;
+ rate_mv = tmp_rate_mv;
+ } else {
+ mbmi->use_wedge_interinter = 0;
+ mbmi->mv[0].as_int = cur_mv[0].as_int;
+ mbmi->mv[1].as_int = cur_mv[1].as_int;
+ xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
+ xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
+ }
+ } else {
+ vp10_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0,
+ strides, preds1, strides);
+ vp10_subtract_plane(x, bsize, 0);
+ rd =
+ estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
+ &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
+ if (rd != INT64_MAX)
+ rd = RDCOST(x->rdmult, x->rddiv, rs + rate_mv + rate_sum, dist_sum);
+ best_rd_wedge = rd;
+ if (best_rd_wedge < best_rd_nowedge) {
+ mbmi->use_wedge_interinter = 1;
+ } else {
+ mbmi->use_wedge_interinter = 0;
+ }
+ }
+ }
+ if (ref_best_rd < INT64_MAX &&
+ VPXMIN(best_rd_wedge, best_rd_nowedge) / 3 > ref_best_rd)
+ return INT64_MAX;
+
+ pred_exists = 0;
+ tmp_rd = VPXMIN(best_rd_wedge, best_rd_nowedge);
+
+ if (mbmi->use_wedge_interinter)
+ *compmode_wedge_cost =
+ vp10_cost_literal(get_interinter_wedge_bits(bsize)) +
+ vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 1);
+ else
+ *compmode_wedge_cost =
+ vp10_cost_bit(cm->fc->wedge_interinter_prob[bsize], 0);
+ }
+
+ if (is_comp_interintra_pred) {
+ INTERINTRA_MODE best_interintra_mode = II_DC_PRED;
+ int64_t best_interintra_rd = INT64_MAX;
+ int rmode, rate_sum;
+ int64_t dist_sum;
+ int j;
+ int64_t best_interintra_rd_nowedge = INT64_MAX;
+ int64_t best_interintra_rd_wedge = INT64_MAX;
+ int rwedge;
+ int_mv tmp_mv;
+ int tmp_rate_mv = 0;
+ int tmp_skip_txfm_sb;
+ int64_t tmp_skip_sse_sb;
+ DECLARE_ALIGNED(16, uint8_t, intrapred_[2 * MAX_SB_SQUARE]);
+ uint8_t *intrapred;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ intrapred = CONVERT_TO_BYTEPTR(intrapred_);
+ else
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ intrapred = intrapred_;
+
+ mbmi->ref_frame[1] = NONE;
+ for (j = 0; j < MAX_MB_PLANE; j++) {
+ xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
+ xd->plane[j].dst.stride = bw;
+ }
+ vp10_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ mbmi->ref_frame[1] = INTRA_FRAME;
+ mbmi->use_wedge_interintra = 0;
+
+ for (j = 0; j < INTERINTRA_MODES; ++j) {
+ mbmi->interintra_mode = (INTERINTRA_MODE)j;
+ rmode = interintra_mode_cost[mbmi->interintra_mode];
+ vp10_build_intra_predictors_for_interintra(xd, bsize, 0, intrapred, bw);
+ vp10_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
+ &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
+ rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum, dist_sum);
+ if (rd < best_interintra_rd) {
+ best_interintra_rd = rd;
+ best_interintra_mode = mbmi->interintra_mode;
+ }
+ }
+ mbmi->interintra_mode = best_interintra_mode;
+ rmode = interintra_mode_cost[mbmi->interintra_mode];
+ vp10_build_intra_predictors_for_interintra(xd, bsize, 0, intrapred, bw);
+ vp10_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
+ vp10_subtract_plane(x, bsize, 0);
+ rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
+ &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
+ if (rd != INT64_MAX)
+ rd = RDCOST(x->rdmult, x->rddiv, rate_mv + rmode + rate_sum, dist_sum);
+ best_interintra_rd = rd;
+
+ if (ref_best_rd < INT64_MAX && best_interintra_rd > 2 * ref_best_rd) {
+ return INT64_MAX;
+ }
+ if (is_interintra_wedge_used(bsize)) {
+ rwedge = vp10_cost_bit(cm->fc->wedge_interintra_prob[bsize], 0);
+ if (rd != INT64_MAX)
+ rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge + rate_sum,
+ dist_sum);
+ best_interintra_rd_nowedge = rd;
+
+ // Disbale wedge search if source variance is small
+ if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) {
+ mbmi->use_wedge_interintra = 1;
+
+ rwedge = vp10_cost_literal(get_interintra_wedge_bits(bsize)) +
+ vp10_cost_bit(cm->fc->wedge_interintra_prob[bsize], 1);
+
+ best_interintra_rd_wedge =
+ pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_);
+
+ best_interintra_rd_wedge +=
+ RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge, 0);
+ // Refine motion vector.
+ if (have_newmv_in_inter_mode(this_mode)) {
+ // get negative of mask
+ const uint8_t *mask = vp10_get_contiguous_soft_mask(
+ mbmi->interintra_wedge_index, 1, bsize);
+ do_masked_motion_search(cpi, x, mask, bw, bsize, mi_row, mi_col,
+ &tmp_mv, &tmp_rate_mv, 0, mv_idx);
+ mbmi->mv[0].as_int = tmp_mv.as_int;
+ vp10_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
+ &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
+ if (rd < best_interintra_rd_wedge) {
+ best_interintra_rd_wedge = rd;
+ } else {
+ tmp_mv.as_int = cur_mv[0].as_int;
+ tmp_rate_mv = rate_mv;
+ }
+ } else {
+ tmp_mv.as_int = cur_mv[0].as_int;
+ tmp_rate_mv = rate_mv;
+ vp10_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
+ }
+ // Evaluate closer to true rd
+ vp10_subtract_plane(x, bsize, 0);
+ rd =
+ estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
+ &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
+ if (rd != INT64_MAX)
+ rd = RDCOST(x->rdmult, x->rddiv,
+ rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
+ best_interintra_rd_wedge = rd;
+ if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
+ mbmi->use_wedge_interintra = 1;
+ best_interintra_rd = best_interintra_rd_wedge;
+ mbmi->mv[0].as_int = tmp_mv.as_int;
+ *rate2 += tmp_rate_mv - rate_mv;
+ rate_mv = tmp_rate_mv;
+ } else {
+ mbmi->use_wedge_interintra = 0;
+ best_interintra_rd = best_interintra_rd_nowedge;
+ mbmi->mv[0].as_int = cur_mv[0].as_int;
+ }
+ } else {
+ mbmi->use_wedge_interintra = 0;
+ best_interintra_rd = best_interintra_rd_nowedge;
+ }
+ }
+
+ pred_exists = 0;
+ tmp_rd = best_interintra_rd;
+ *compmode_interintra_cost =
+ vp10_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 1);
+ *compmode_interintra_cost += interintra_mode_cost[mbmi->interintra_mode];
+ if (is_interintra_wedge_used(bsize)) {
+ *compmode_interintra_cost += vp10_cost_bit(
+ cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra);
+ if (mbmi->use_wedge_interintra) {
+ *compmode_interintra_cost +=
+ vp10_cost_literal(get_interintra_wedge_bits(bsize));
+ }
+ }
+ } else if (is_interintra_allowed(mbmi)) {
+ *compmode_interintra_cost =
+ vp10_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 0);
+ }
+
+#if CONFIG_EXT_INTERP
+ if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE) {
+#if CONFIG_DUAL_FILTER
+ for (i = 0; i < 4; ++i) mbmi->interp_filter[i] = EIGHTTAP_REGULAR;
+#else
+ mbmi->interp_filter = EIGHTTAP_REGULAR;
+#endif
+ pred_exists = 0;
+ }
+#endif // CONFIG_EXT_INTERP
+#endif // CONFIG_EXT_INTER
+
+ if (pred_exists) {
+ if (best_needs_copy) {
+ // again temporarily set the buffers to local memory to prevent a memcpy
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].dst.buf = tmp_buf + i * MAX_SB_SQUARE;
+ xd->plane[i].dst.stride = MAX_SB_SIZE;
+ }
+ }
+ rd = tmp_rd;
+ } else {
+ int tmp_rate;
+ int64_t tmp_dist;
+
+ // Handles the special case when a filter that is not in the
+ // switchable list (ex. bilinear) is indicated at the frame level, or
+ // skip condition holds.
+ vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
+ &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
+ rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
+ }
+
+#if CONFIG_DUAL_FILTER
+ if (!is_comp_pred) single_filter[this_mode][refs[0]] = mbmi->interp_filter[0];
+#else
+ if (!is_comp_pred) single_filter[this_mode][refs[0]] = mbmi->interp_filter;
+#endif
+
+#if CONFIG_EXT_INTER
+ if (modelled_rd != NULL) {
+ if (is_comp_pred) {
+ const int mode0 = compound_ref0_mode(this_mode);
+ const int mode1 = compound_ref1_mode(this_mode);
+ int64_t mrd =
+ VPXMIN(modelled_rd[mode0][refs[0]], modelled_rd[mode1][refs[1]]);
+ if (rd / 4 * 3 > mrd && ref_best_rd < INT64_MAX) {
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ return INT64_MAX;
+ }
+ } else if (!is_comp_interintra_pred) {
+ modelled_rd[this_mode][refs[0]] = rd;
+ }
+ }
+#endif // CONFIG_EXT_INTER
+
+ if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
+ // if current pred_error modeled rd is substantially more than the best
+ // so far, do not bother doing full rd
+ if (rd / 2 > ref_best_rd) {
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ return INT64_MAX;
+ }
+ }
+
+ if (cm->interp_filter == SWITCHABLE) *rate2 += rs;
+#if CONFIG_OBMC
+ rate2_nocoeff = *rate2;
+#endif // CONFIG_OBMC
+
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ best_rd = INT64_MAX;
+ for (mbmi->motion_variation = SIMPLE_TRANSLATION;
+ mbmi->motion_variation < (allow_motvar ? MOTION_VARIATIONS : 1);
+ mbmi->motion_variation++) {
+ int64_t tmp_rd;
+#if CONFIG_EXT_INTER
+ int tmp_rate2 = mbmi->motion_variation != SIMPLE_TRANSLATION
+ ? rate2_bmc_nocoeff
+ : rate2_nocoeff;
+#else
+ int tmp_rate2 = rate2_nocoeff;
+#endif // CONFIG_EXT_INTER
+#if CONFIG_EXT_INTERP
+#if CONFIG_DUAL_FILTER
+ INTERP_FILTER obmc_interp_filter[2][2] = {
+ { mbmi->interp_filter[0], mbmi->interp_filter[1] }, // obmc == 0
+ { mbmi->interp_filter[0], mbmi->interp_filter[1] } // obmc == 1
+ };
+#else
+ INTERP_FILTER obmc_interp_filter[2] = {
+ mbmi->interp_filter, // obmc == 0
+ mbmi->interp_filter // obmc == 1
+ };
+#endif // CONFIG_DUAL_FILTER
+#endif // CONFIG_EXT_INTERP
+
+#if CONFIG_OBMC
+ int tmp_rate;
+ int64_t tmp_dist;
+ if (mbmi->motion_variation == OBMC_CAUSAL) {
+#if CONFIG_EXT_INTER
+ *mbmi = best_bmc_mbmi;
+ mbmi->motion_variation = OBMC_CAUSAL;
+#endif // CONFIG_EXT_INTER
+ if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
+ int_mv tmp_mv;
+ int_mv pred_mv;
+ int tmp_rate_mv = 0;
+
+ pred_mv.as_int = mbmi->mv[0].as_int;
+ single_motion_search_obmc(cpi, x, bsize, mi_row, mi_col, wsrc, mask2d,
+#if CONFIG_EXT_INTER
+ 0, mv_idx,
+#endif // CONFIG_EXT_INTER
+ &tmp_mv, pred_mv, &tmp_rate_mv);
+ mbmi->mv[0].as_int = tmp_mv.as_int;
+ if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
+ tmp_rate_mv = VPXMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
+ }
+#if CONFIG_EXT_INTER
+ tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
+#else
+ tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
+#endif // CONFIG_EXT_INTER
+#if CONFIG_EXT_INTERP
+#if CONFIG_DUAL_FILTER
+ if (!has_subpel_mv_component(xd->mi[0], xd, 0))
+ obmc_interp_filter[1][0] = mbmi->interp_filter[0] = EIGHTTAP_REGULAR;
+ if (!has_subpel_mv_component(xd->mi[0], xd, 1))
+ obmc_interp_filter[1][1] = mbmi->interp_filter[1] = EIGHTTAP_REGULAR;
+#else
+ if (!vp10_is_interp_needed(xd))
+ obmc_interp_filter[1] = mbmi->interp_filter = EIGHTTAP_REGULAR;
+#endif // CONFIG_DUAL_FILTER
+ // This is not quite correct with CONFIG_DUAL_FILTER when a filter
+ // is needed in only one direction
+ if (!vp10_is_interp_needed(xd)) tmp_rate2 -= rs;
+#endif // CONFIG_EXT_INTERP
+ vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+#if CONFIG_EXT_INTER
+ } else {
+ vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+#endif // CONFIG_EXT_INTER
+ }
+ vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1,
+ dst_stride1, dst_buf2, dst_stride2);
+ model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
+ &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
+ }
+#endif // CONFIG_OBMC
+
+#if CONFIG_WARPED_MOTION
+ if (mbmi->motion_variation == WARPED_CAUSAL) {
+ // TODO(yuec): Add code
+ }
+#endif // CONFIG_WARPED_MOTION
+ x->skip = 0;
+
+ *rate2 = tmp_rate2;
+ if (allow_motvar) *rate2 += cpi->motvar_cost[bsize][mbmi->motion_variation];
+ *distortion = 0;
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ if (!skip_txfm_sb) {
+ int skippable_y, skippable_uv;
+ int64_t sseuv = INT64_MAX;
+ int64_t rdcosty = INT64_MAX;
+
+ // Y cost and distortion
+ vp10_subtract_plane(x, bsize, 0);
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
+ select_tx_type_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
+ bsize, ref_best_rd);
+ } else {
+ int idx, idy;
+ super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse,
+ bsize, ref_best_rd);
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
+ memset(x->blk_skip[0], skippable_y,
+ sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
+ }
+#else
+ super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize,
+ ref_best_rd);
+#endif // CONFIG_VAR_TX
+
+ if (*rate_y == INT_MAX) {
+ *rate2 = INT_MAX;
+ *distortion = INT64_MAX;
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ if (mbmi->motion_variation != SIMPLE_TRANSLATION) {
+ continue;
+ } else {
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ return INT64_MAX;
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ }
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ }
+
+ *rate2 += *rate_y;
+ *distortion += distortion_y;
+
+ rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
+ rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
+
+#if CONFIG_VAR_TX
+ if (!inter_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
+ &sseuv, bsize, ref_best_rd - rdcosty))
+#else
+ if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
+ &sseuv, bsize, ref_best_rd - rdcosty))
+#endif // CONFIG_VAR_TX
+ {
+ *rate2 = INT_MAX;
+ *distortion = INT64_MAX;
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ continue;
+#else
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ return INT64_MAX;
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ }
+
+ *psse += sseuv;
+ *rate2 += *rate_uv;
+ *distortion += distortion_uv;
+ *skippable = skippable_y && skippable_uv;
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ if (*skippable) {
+ *rate2 -= *rate_uv + *rate_y;
+ *rate_y = 0;
+ *rate_uv = 0;
+ *rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ mbmi->skip = 0;
+ // here mbmi->skip temporarily plays a role as what this_skip2 does
+ } else if (!xd->lossless[mbmi->segment_id] &&
+ (RDCOST(x->rdmult, x->rddiv,
+ *rate_y + *rate_uv +
+ vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0),
+ *distortion) >=
+ RDCOST(x->rdmult, x->rddiv,
+ vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1),
+ *psse))) {
+ *rate2 -= *rate_uv + *rate_y;
+ *rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ *distortion = *psse;
+ *rate_y = 0;
+ *rate_uv = 0;
+ mbmi->skip = 1;
+ } else {
+ *rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
+ mbmi->skip = 0;
+ }
+ *disable_skip = 0;
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ } else {
+ x->skip = 1;
+ *disable_skip = 1;
+
+// The cost of skip bit needs to be added.
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ mbmi->skip = 0;
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ *rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+
+ *distortion = skip_sse_sb;
+ *psse = skip_sse_sb;
+ *rate_y = 0;
+ *rate_uv = 0;
+ *skippable = 1;
+ }
+
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ tmp_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
+ if (mbmi->motion_variation == SIMPLE_TRANSLATION || (tmp_rd < best_rd)) {
+#if CONFIG_EXT_INTERP
+#if CONFIG_DUAL_FILTER
+ mbmi->interp_filter[0] = obmc_interp_filter[mbmi->motion_variation][0];
+ mbmi->interp_filter[1] = obmc_interp_filter[mbmi->motion_variation][1];
+#else
+ mbmi->interp_filter = obmc_interp_filter[mbmi->motion_variation];
+#endif // CONFIG_DUAL_FILTER
+#endif // CONFIG_EXT_INTERP
+ best_mbmi = *mbmi;
+ best_rd = tmp_rd;
+ best_rate2 = *rate2;
+ best_rate_y = *rate_y;
+ best_rate_uv = *rate_uv;
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memcpy(best_blk_skip[i], x->blk_skip[i],
+ sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
+#endif // CONFIG_VAR_TX
+ best_distortion = *distortion;
+ best_skippable = *skippable;
+ best_xskip = x->skip;
+ best_disable_skip = *disable_skip;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ x->recon_variance = vp10_high_get_sby_perpixel_variance(
+ cpi, &xd->plane[0].dst, bsize, xd->bd);
+ } else {
+ x->recon_variance =
+ vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
+ }
+#else
+ x->recon_variance =
+ vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ }
+
+ if (best_rd == INT64_MAX) {
+ *rate2 = INT_MAX;
+ *distortion = INT64_MAX;
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ return INT64_MAX;
+ }
+ *mbmi = best_mbmi;
+ *rate2 = best_rate2;
+ *rate_y = best_rate_y;
+ *rate_uv = best_rate_uv;
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memcpy(x->blk_skip[i], best_blk_skip[i],
+ sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
+#endif // CONFIG_VAR_TX
+ *distortion = best_distortion;
+ *skippable = best_skippable;
+ x->skip = best_xskip;
+ *disable_skip = best_disable_skip;
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+
+ if (!is_comp_pred) single_skippable[this_mode][refs[0]] = *skippable;
+
+#if !(CONFIG_OBMC || CONFIG_WARPED_MOTION)
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ x->recon_variance = vp10_high_get_sby_perpixel_variance(
+ cpi, &xd->plane[0].dst, bsize, xd->bd);
+ } else {
+ x->recon_variance =
+ vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
+ }
+#else
+ x->recon_variance =
+ vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // !(CONFIG_OBMC || CONFIG_WARPED_MOTION)
+
+ restore_dst_buf(xd, orig_dst, orig_dst_stride);
+ return 0; // The rate-distortion cost will be re-calculated by caller.
+}
+
+void vp10_rd_pick_intra_mode_sb(VP10_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
+ BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblockd_plane *const pd = xd->plane;
+ int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
+ int y_skip = 0, uv_skip = 0;
+ int64_t dist_y = 0, dist_uv = 0;
+ TX_SIZE max_uv_tx_size;
+ ctx->skip = 0;
+ xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
+ xd->mi[0]->mbmi.ref_frame[1] = NONE;
+
+ if (bsize >= BLOCK_8X8) {
+ if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
+ &y_skip, bsize, best_rd) >= best_rd) {
+ rd_cost->rate = INT_MAX;
+ return;
+ }
+ } else {
+ y_skip = 0;
+ if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
+ &dist_y, best_rd) >= best_rd) {
+ rd_cost->rate = INT_MAX;
+ return;
+ }
+ }
+ max_uv_tx_size = get_uv_tx_size_impl(
+ xd->mi[0]->mbmi.tx_size, bsize, pd[1].subsampling_x, pd[1].subsampling_y);
+ rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
+ &uv_skip, VPXMAX(BLOCK_8X8, bsize), max_uv_tx_size);
+
+ if (y_skip && uv_skip) {
+ rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
+ vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ rd_cost->dist = dist_y + dist_uv;
+ } else {
+ rd_cost->rate =
+ rate_y + rate_uv + vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
+ rd_cost->dist = dist_y + dist_uv;
+ }
+
+ ctx->mic = *xd->mi[0];
+ ctx->mbmi_ext = *x->mbmi_ext;
+ rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
+}
+
+// This function is designed to apply a bias or adjustment to an rd value based
+// on the relative variance of the source and reconstruction.
+#define LOW_VAR_THRESH 16
+#define VLOW_ADJ_MAX 25
+#define VHIGH_ADJ_MAX 8
+static void rd_variance_adjustment(MACROBLOCK *x, int64_t *this_rd,
+ MV_REFERENCE_FRAME ref_frame,
+ unsigned int source_variance) {
+ unsigned int recon_variance = x->recon_variance;
+ unsigned int absvar_diff = 0;
+ int64_t var_error = 0;
+ int64_t var_factor = 0;
+
+ if (*this_rd == INT64_MAX) return;
+
+ if ((source_variance + recon_variance) > LOW_VAR_THRESH) {
+ absvar_diff = (source_variance > recon_variance)
+ ? (source_variance - recon_variance)
+ : (recon_variance - source_variance);
+
+ var_error = ((int64_t)200 * source_variance * recon_variance) /
+ (((int64_t)source_variance * source_variance) +
+ ((int64_t)recon_variance * recon_variance));
+ var_error = 100 - var_error;
+ }
+
+ // Source variance above a threshold and ref frame is intra.
+ // This case is targeted mainly at discouraging intra modes that give rise
+ // to a predictor with a low spatial complexity compared to the source.
+ if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) &&
+ (source_variance > recon_variance)) {
+ var_factor = VPXMIN(absvar_diff, VPXMIN(VLOW_ADJ_MAX, var_error));
+ // A second possible case of interest is where the source variance
+ // is very low and we wish to discourage false texture or motion trails.
+ } else if ((source_variance < (LOW_VAR_THRESH >> 1)) &&
+ (recon_variance > source_variance)) {
+ var_factor = VPXMIN(absvar_diff, VPXMIN(VHIGH_ADJ_MAX, var_error));
+ }
+ *this_rd += (*this_rd * var_factor) / 100;
+}
+
+// Do we have an internal image edge (e.g. formatting bars).
+int vp10_internal_image_edge(VP10_COMP *cpi) {
+ return (cpi->oxcf.pass == 2) &&
+ ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
+ (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
+}
+
+// Checks to see if a super block is on a horizontal image edge.
+// In most cases this is the "real" edge unless there are formatting
+// bars embedded in the stream.
+int vp10_active_h_edge(VP10_COMP *cpi, int mi_row, int mi_step) {
+ int top_edge = 0;
+ int bottom_edge = cpi->common.mi_rows;
+ int is_active_h_edge = 0;
+
+ // For two pass account for any formatting bars detected.
+ if (cpi->oxcf.pass == 2) {
+ TWO_PASS *twopass = &cpi->twopass;
+
+ // The inactive region is specified in MBs not mi units.
+ // The image edge is in the following MB row.
+ top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
+
+ bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
+ bottom_edge = VPXMAX(top_edge, bottom_edge);
+ }
+
+ if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
+ ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
+ is_active_h_edge = 1;
+ }
+ return is_active_h_edge;
+}
+
+// Checks to see if a super block is on a vertical image edge.
+// In most cases this is the "real" edge unless there are formatting
+// bars embedded in the stream.
+int vp10_active_v_edge(VP10_COMP *cpi, int mi_col, int mi_step) {
+ int left_edge = 0;
+ int right_edge = cpi->common.mi_cols;
+ int is_active_v_edge = 0;
+
+ // For two pass account for any formatting bars detected.
+ if (cpi->oxcf.pass == 2) {
+ TWO_PASS *twopass = &cpi->twopass;
+
+ // The inactive region is specified in MBs not mi units.
+ // The image edge is in the following MB row.
+ left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
+
+ right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
+ right_edge = VPXMAX(left_edge, right_edge);
+ }
+
+ if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
+ ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
+ is_active_v_edge = 1;
+ }
+ return is_active_v_edge;
+}
+
+// Checks to see if a super block is at the edge of the active image.
+// In most cases this is the "real" edge unless there are formatting
+// bars embedded in the stream.
+int vp10_active_edge_sb(VP10_COMP *cpi, int mi_row, int mi_col) {
+ return vp10_active_h_edge(cpi, mi_row, cpi->common.mib_size) ||
+ vp10_active_v_edge(cpi, mi_col, cpi->common.mib_size);
+}
+
+static void restore_uv_color_map(VP10_COMP *cpi, MACROBLOCK *x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+ const BLOCK_SIZE bsize = mbmi->sb_type;
+ const int rows =
+ (4 * num_4x4_blocks_high_lookup[bsize]) >> (xd->plane[1].subsampling_y);
+ const int cols =
+ (4 * num_4x4_blocks_wide_lookup[bsize]) >> (xd->plane[1].subsampling_x);
+ int src_stride = x->plane[1].src.stride;
+ const uint8_t *const src_u = x->plane[1].src.buf;
+ const uint8_t *const src_v = x->plane[2].src.buf;
+ float *const data = x->palette_buffer->kmeans_data_buf;
+ float centroids[2 * PALETTE_MAX_SIZE];
+ uint8_t *const color_map = xd->plane[1].color_index_map;
+ int r, c;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const uint16_t *const src_u16 = CONVERT_TO_SHORTPTR(src_u);
+ const uint16_t *const src_v16 = CONVERT_TO_SHORTPTR(src_v);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ (void)cpi;
+
+ for (r = 0; r < rows; ++r) {
+ for (c = 0; c < cols; ++c) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->common.use_highbitdepth) {
+ data[(r * cols + c) * 2] = src_u16[r * src_stride + c];
+ data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c];
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ data[(r * cols + c) * 2] = src_u[r * src_stride + c];
+ data[(r * cols + c) * 2 + 1] = src_v[r * src_stride + c];
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ }
+
+ for (r = 1; r < 3; ++r) {
+ for (c = 0; c < pmi->palette_size[1]; ++c) {
+ centroids[c * 2 + r - 1] = pmi->palette_colors[r * PALETTE_MAX_SIZE + c];
+ }
+ }
+
+ vp10_calc_indices(data, centroids, color_map, rows * cols,
+ pmi->palette_size[1], 2);
+}
+
+#if CONFIG_EXT_INTRA
+static void pick_ext_intra_interframe(
+ VP10_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
+ int *rate_uv_intra, int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv,
+ PREDICTION_MODE *mode_uv, EXT_INTRA_MODE_INFO *ext_intra_mode_info_uv,
+ PALETTE_MODE_INFO *pmi_uv, int8_t *uv_angle_delta, int palette_ctx,
+ int skip_mask, unsigned int *ref_costs_single, int64_t *best_rd,
+ int64_t *best_intra_rd, PREDICTION_MODE *best_intra_mode,
+ int *best_mode_index, int *best_skip2, int *best_mode_skippable,
+#if CONFIG_SUPERTX
+ int *returnrate_nocoef,
+#endif // CONFIG_SUPERTX
+ int64_t *best_pred_rd, MB_MODE_INFO *best_mbmode, RD_COST *rd_cost) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ int rate2 = 0, rate_y = INT_MAX, skippable = 0, rate_uv, rate_dummy, i;
+ int dc_mode_index;
+ const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
+ int64_t distortion2 = 0, distortion_y = 0, this_rd = *best_rd, distortion_uv;
+ TX_SIZE uv_tx;
+
+ for (i = 0; i < MAX_MODES; ++i)
+ if (vp10_mode_order[i].mode == DC_PRED &&
+ vp10_mode_order[i].ref_frame[0] == INTRA_FRAME)
+ break;
+ dc_mode_index = i;
+ assert(i < MAX_MODES);
+
+ // TODO(huisu): use skip_mask for further speedup.
+ (void)skip_mask;
+ mbmi->mode = DC_PRED;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->ref_frame[1] = NONE;
+ if (!rd_pick_ext_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
+ &skippable, bsize, intra_mode_cost[mbmi->mode],
+ &this_rd, 0))
+ return;
+ if (rate_y == INT_MAX) return;
+
+ uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, xd->plane[1].subsampling_x,
+ xd->plane[1].subsampling_y);
+ if (rate_uv_intra[uv_tx] == INT_MAX) {
+ choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
+ &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
+ &skip_uv[uv_tx], &mode_uv[uv_tx]);
+ if (cm->allow_screen_content_tools) pmi_uv[uv_tx] = *pmi;
+ ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info;
+ uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
+ }
+
+ rate_uv = rate_uv_tokenonly[uv_tx];
+ distortion_uv = dist_uv[uv_tx];
+ skippable = skippable && skip_uv[uv_tx];
+ mbmi->uv_mode = mode_uv[uv_tx];
+ if (cm->allow_screen_content_tools) {
+ pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
+ memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
+ pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
+ 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
+ }
+ mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1];
+ if (ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1]) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1];
+ }
+
+ rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv +
+ cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
+ if (cpi->common.allow_screen_content_tools && mbmi->mode == DC_PRED)
+ rate2 += vp10_cost_bit(
+ vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx], 0);
+
+ if (!xd->lossless[mbmi->segment_id]) {
+ // super_block_yrd above includes the cost of the tx_size in the
+ // tokenonly rate, but for intra blocks, tx_size is always coded
+ // (prediction granularity), so we account for it in the full rate,
+ // not the tokenonly rate.
+ rate_y -= cpi->tx_size_cost[max_tx_size -
+ TX_8X8][get_tx_size_context(xd)][mbmi->tx_size];
+ }
+
+ rate2 += vp10_cost_bit(cm->fc->ext_intra_probs[0],
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0]);
+ rate2 += write_uniform_cost(FILTER_INTRA_MODES,
+ mbmi->ext_intra_mode_info.ext_intra_mode[0]);
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
+ rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
+ }
+ if (mbmi->mode == DC_PRED) {
+ rate2 += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1],
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1])
+ rate2 += write_uniform_cost(FILTER_INTRA_MODES,
+ mbmi->ext_intra_mode_info.ext_intra_mode[1]);
+ }
+ distortion2 = distortion_y + distortion_uv;
+ vp10_encode_intra_block_plane(x, bsize, 0, 0);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ x->recon_variance = vp10_high_get_sby_perpixel_variance(
+ cpi, &xd->plane[0].dst, bsize, xd->bd);
+ } else {
+ x->recon_variance =
+ vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
+ }
+#else
+ x->recon_variance =
+ vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ rate2 += ref_costs_single[INTRA_FRAME];
+
+ if (skippable) {
+ rate2 -= (rate_y + rate_uv);
+ rate_y = 0;
+ rate_uv = 0;
+ rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ } else {
+ rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
+ }
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ rd_variance_adjustment(x, &this_rd, INTRA_FRAME, x->source_variance);
+
+ if (this_rd < *best_intra_rd) {
+ *best_intra_rd = this_rd;
+ *best_intra_mode = mbmi->mode;
+ }
+ for (i = 0; i < REFERENCE_MODES; ++i)
+ best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
+
+ if (this_rd < *best_rd) {
+ *best_mode_index = dc_mode_index;
+ mbmi->mv[0].as_int = 0;
+ rd_cost->rate = rate2;
+#if CONFIG_SUPERTX
+ if (x->skip)
+ *returnrate_nocoef = rate2;
+ else
+ *returnrate_nocoef = rate2 - rate_y - rate_uv;
+ *returnrate_nocoef -= vp10_cost_bit(vp10_get_skip_prob(cm, xd), skippable);
+ *returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd),
+ mbmi->ref_frame[0] != INTRA_FRAME);
+#endif // CONFIG_SUPERTX
+ rd_cost->dist = distortion2;
+ rd_cost->rdcost = this_rd;
+ *best_rd = this_rd;
+ *best_mbmode = *mbmi;
+ *best_skip2 = 0;
+ *best_mode_skippable = skippable;
+ }
+}
+#endif // CONFIG_EXT_INTRA
+
+#if CONFIG_OBMC
+static void calc_target_weighted_pred(
+ const VP10_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
+ int mi_row, int mi_col, const uint8_t *above, int above_stride,
+ const uint8_t *left, int left_stride, int32_t *mask_buf, int32_t *wsrc_buf);
+#endif // CONFIG_OBMC
+
+void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, TileDataEnc *tile_data,
+ MACROBLOCK *x, int mi_row, int mi_col,
+ RD_COST *rd_cost,
+#if CONFIG_SUPERTX
+ int *returnrate_nocoef,
+#endif // CONFIG_SUPERTX
+ BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far) {
+ VP10_COMMON *const cm = &cpi->common;
+ RD_OPT *const rd_opt = &cpi->rd;
+ SPEED_FEATURES *const sf = &cpi->sf;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+ MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+ const struct segmentation *const seg = &cm->seg;
+ PREDICTION_MODE this_mode;
+ MV_REFERENCE_FRAME ref_frame, second_ref_frame;
+ unsigned char segment_id = mbmi->segment_id;
+ int comp_pred, i, k;
+ int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
+ struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE];
+#if CONFIG_EXT_INTER
+ int_mv single_newmvs[2][TOTAL_REFS_PER_FRAME] = { { { 0 } }, { { 0 } } };
+ int single_newmvs_rate[2][TOTAL_REFS_PER_FRAME] = { { 0 }, { 0 } };
+ int64_t modelled_rd[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
+#else
+ int_mv single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
+#endif // CONFIG_EXT_INTER
+ INTERP_FILTER single_inter_filter[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
+ int single_skippable[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
+ static const int flag_list[TOTAL_REFS_PER_FRAME] = {
+ 0,
+ VPX_LAST_FLAG,
+#if CONFIG_EXT_REFS
+ VPX_LAST2_FLAG,
+ VPX_LAST3_FLAG,
+#endif // CONFIG_EXT_REFS
+ VPX_GOLD_FLAG,
+#if CONFIG_EXT_REFS
+ VPX_BWD_FLAG,
+#endif // CONFIG_EXT_REFS
+ VPX_ALT_FLAG
+ };
+ int64_t best_rd = best_rd_so_far;
+ int best_rate_y = INT_MAX, best_rate_uv = INT_MAX;
+ int64_t best_pred_diff[REFERENCE_MODES];
+ int64_t best_pred_rd[REFERENCE_MODES];
+ MB_MODE_INFO best_mbmode;
+ int best_mode_skippable = 0;
+ int midx, best_mode_index = -1;
+ unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
+ unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
+ vpx_prob comp_mode_p;
+ int64_t best_intra_rd = INT64_MAX;
+ unsigned int best_pred_sse = UINT_MAX;
+ PREDICTION_MODE best_intra_mode = DC_PRED;
+ int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
+ int64_t dist_uv[TX_SIZES];
+ int skip_uv[TX_SIZES];
+ PREDICTION_MODE mode_uv[TX_SIZES];
+ PALETTE_MODE_INFO pmi_uv[TX_SIZES];
+#if CONFIG_EXT_INTRA
+ EXT_INTRA_MODE_INFO ext_intra_mode_info_uv[TX_SIZES];
+ int8_t uv_angle_delta[TX_SIZES], dc_skipped = 1;
+ int is_directional_mode, angle_stats_ready = 0;
+ int rate_overhead, rate_dummy;
+ uint8_t directional_mode_skip_mask[INTRA_MODES];
+#endif // CONFIG_EXT_INTRA
+ const int intra_cost_penalty = vp10_get_intra_cost_penalty(
+ cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
+ const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
+ int best_skip2 = 0;
+ uint8_t ref_frame_skip_mask[2] = { 0 };
+#if CONFIG_EXT_INTER
+ uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
+ MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME;
+ int64_t best_single_inter_rd = INT64_MAX;
+#else
+ uint16_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
+#endif // CONFIG_EXT_INTER
+ int mode_skip_start = sf->mode_skip_start + 1;
+ const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
+ const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
+ int64_t mode_threshold[MAX_MODES];
+ int *mode_map = tile_data->mode_map[bsize];
+ const int mode_search_skip_flags = sf->mode_search_skip_flags;
+ const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
+ int palette_ctx = 0;
+ const int rows = 4 * num_4x4_blocks_high_lookup[bsize];
+ const int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const MODE_INFO *above_mi = xd->above_mi;
+ const MODE_INFO *left_mi = xd->left_mi;
+#if CONFIG_OBMC
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
+#else
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, int32_t, weighted_src_buf[MAX_SB_SQUARE]);
+ DECLARE_ALIGNED(16, int32_t, mask2d_buf[MAX_SB_SQUARE]);
+ uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
+ int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+ int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ int len = sizeof(uint16_t);
+ dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
+ dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
+ dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_SB_SQUARE * len);
+ dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
+ dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
+ dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_SB_SQUARE * len);
+ } else {
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ dst_buf1[0] = tmp_buf1;
+ dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
+ dst_buf1[2] = tmp_buf1 + 2 * MAX_SB_SQUARE;
+ dst_buf2[0] = tmp_buf2;
+ dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
+ dst_buf2[2] = tmp_buf2 + 2 * MAX_SB_SQUARE;
+#if CONFIG_VP9_HIGHBITDEPTH
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_OBMC
+
+ vp10_zero(best_mbmode);
+ vp10_zero(pmi_uv);
+
+ if (cm->allow_screen_content_tools) {
+ if (above_mi)
+ palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ if (left_mi)
+ palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+ }
+
+#if CONFIG_EXT_INTRA
+ memset(directional_mode_skip_mask, 0,
+ sizeof(directional_mode_skip_mask[0]) * INTRA_MODES);
+#endif // CONFIG_EXT_INTRA
+
+ estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
+ &comp_mode_p);
+
+ for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
+ for (i = 0; i < TX_SIZES; i++) rate_uv_intra[i] = INT_MAX;
+ for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
+ for (i = 0; i < MB_MODE_COUNT; ++i) {
+ for (k = 0; k < TOTAL_REFS_PER_FRAME; ++k) {
+ single_inter_filter[i][k] = SWITCHABLE;
+ single_skippable[i][k] = 0;
+ }
+ }
+
+ rd_cost->rate = INT_MAX;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = INT_MAX;
+#endif // CONFIG_SUPERTX
+
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ x->pred_mv_sad[ref_frame] = INT_MAX;
+ x->mbmi_ext->mode_context[ref_frame] = 0;
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ x->mbmi_ext->compound_mode_context[ref_frame] = 0;
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ if (cpi->ref_frame_flags & flag_list[ref_frame]) {
+ assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
+ setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
+ frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
+ }
+ frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
+ frame_mv[ZEROMV][ref_frame].as_int = 0;
+#if CONFIG_EXT_INTER
+ frame_mv[NEWFROMNEARMV][ref_frame].as_int = INVALID_MV;
+ frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
+ frame_mv[ZERO_ZEROMV][ref_frame].as_int = 0;
+#endif // CONFIG_EXT_INTER
+ }
+
+#if CONFIG_REF_MV
+ for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
+ MODE_INFO *const mi = xd->mi[0];
+ int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
+ x->mbmi_ext->mode_context[ref_frame] = 0;
+ vp10_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
+ mbmi_ext->ref_mv_stack[ref_frame],
+#if CONFIG_EXT_INTER
+ mbmi_ext->compound_mode_context,
+#endif // CONFIG_EXT_INTER
+ candidates, mi_row, mi_col, NULL, NULL,
+ mbmi_ext->mode_context);
+ }
+#endif // CONFIG_REF_MV
+
+#if CONFIG_OBMC
+ vp10_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1,
+ dst_width1, dst_height1, dst_stride1);
+ vp10_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
+ dst_width2, dst_height2, dst_stride2);
+ vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
+ calc_target_weighted_pred(cm, x, xd, mi_row, mi_col, dst_buf1[0],
+ dst_stride1[0], dst_buf2[0], dst_stride2[0],
+ mask2d_buf, weighted_src_buf);
+#endif // CONFIG_OBMC
+
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
+// Skip checking missing references in both single and compound reference
+// modes. Note that a mode will be skipped iff both reference frames
+// are masked out.
+#if CONFIG_EXT_REFS
+ if (ref_frame == BWDREF_FRAME || ref_frame == ALTREF_FRAME) {
+ ref_frame_skip_mask[0] |= (1 << ref_frame);
+ ref_frame_skip_mask[1] |= ((1 << ref_frame) | 0x01);
+ } else {
+#endif // CONFIG_EXT_REFS
+ ref_frame_skip_mask[0] |= (1 << ref_frame);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+#if CONFIG_EXT_REFS
+ }
+#endif // CONFIG_EXT_REFS
+ } else {
+ for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
+ // Skip fixed mv modes for poor references
+ if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
+ mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
+ break;
+ }
+ }
+ }
+ // If the segment reference frame feature is enabled....
+ // then do nothing if the current ref frame is not allowed..
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
+ ref_frame_skip_mask[0] |= (1 << ref_frame);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ }
+ }
+
+ // Disable this drop out case if the ref frame
+ // segment level feature is enabled for this segment. This is to
+ // prevent the possibility that we end up unable to pick any mode.
+ if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
+ // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
+ // unless ARNR filtering is enabled in which case we want
+ // an unfiltered alternative. We allow near/nearest as well
+ // because they may result in zero-zero MVs but be cheaper.
+ if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
+ ref_frame_skip_mask[0] = (1 << LAST_FRAME) |
+#if CONFIG_EXT_REFS
+ (1 << LAST2_FRAME) | (1 << LAST3_FRAME) |
+ (1 << BWDREF_FRAME) |
+#endif // CONFIG_EXT_REFS
+ (1 << GOLDEN_FRAME);
+ ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
+ // TODO(zoeliu): To further explore whether following needs to be done for
+ // BWDREF_FRAME as well.
+ mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
+ if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
+ mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
+ if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
+ mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
+#if CONFIG_EXT_INTER
+ if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != 0)
+ mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
+ if (frame_mv[NEAREST_NEARMV][ALTREF_FRAME].as_int != 0)
+ mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARMV);
+ if (frame_mv[NEAR_NEARESTMV][ALTREF_FRAME].as_int != 0)
+ mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARESTMV);
+ if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != 0)
+ mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV);
+#endif // CONFIG_EXT_INTER
+ }
+ }
+
+ if (cpi->rc.is_src_frame_alt_ref) {
+ if (sf->alt_ref_search_fp) {
+ assert(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]);
+ mode_skip_mask[ALTREF_FRAME] = 0;
+ ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
+ ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
+ }
+ }
+
+ if (sf->alt_ref_search_fp)
+ if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
+ if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
+ mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
+
+ if (sf->adaptive_mode_search) {
+ if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
+ cpi->rc.frames_since_golden >= 3)
+ if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
+ mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
+ }
+
+ if (bsize > sf->max_intra_bsize) {
+ ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
+ ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
+ }
+
+ mode_skip_mask[INTRA_FRAME] |=
+ ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
+
+ for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
+ for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
+ mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
+
+ midx = sf->schedule_mode_search ? mode_skip_start : 0;
+ while (midx > 4) {
+ uint8_t end_pos = 0;
+ for (i = 5; i < midx; ++i) {
+ if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
+ uint8_t tmp = mode_map[i];
+ mode_map[i] = mode_map[i - 1];
+ mode_map[i - 1] = tmp;
+ end_pos = i;
+ }
+ }
+ midx = end_pos;
+ }
+
+ if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
+ x->use_default_intra_tx_type = 1;
+ else
+ x->use_default_intra_tx_type = 0;
+
+ if (cpi->sf.tx_type_search.fast_inter_tx_type_search)
+ x->use_default_inter_tx_type = 1;
+ else
+ x->use_default_inter_tx_type = 0;
+
+#if CONFIG_EXT_INTER
+ for (i = 0; i < MB_MODE_COUNT; ++i)
+ for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
+ modelled_rd[i][ref_frame] = INT64_MAX;
+#endif // CONFIG_EXT_INTER
+
+ for (midx = 0; midx < MAX_MODES; ++midx) {
+ int mode_index;
+ int mode_excluded = 0;
+ int64_t this_rd = INT64_MAX;
+ int disable_skip = 0;
+ int compmode_cost = 0;
+#if CONFIG_EXT_INTER
+ int compmode_interintra_cost = 0;
+ int compmode_wedge_cost = 0;
+#endif // CONFIG_EXT_INTER
+ int rate2 = 0, rate_y = 0, rate_uv = 0;
+ int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
+ int skippable = 0;
+ int this_skip2 = 0;
+ int64_t total_sse = INT64_MAX;
+ int early_term = 0;
+#if CONFIG_REF_MV
+ uint8_t ref_frame_type;
+#endif
+
+ mode_index = mode_map[midx];
+ this_mode = vp10_mode_order[mode_index].mode;
+ ref_frame = vp10_mode_order[mode_index].ref_frame[0];
+ second_ref_frame = vp10_mode_order[mode_index].ref_frame[1];
+
+#if CONFIG_EXT_INTER
+ if (ref_frame > INTRA_FRAME && second_ref_frame == INTRA_FRAME) {
+ // Mode must by compatible
+ assert(is_interintra_allowed_mode(this_mode));
+
+ if (!is_interintra_allowed_bsize(bsize)) continue;
+ }
+
+ if (is_inter_compound_mode(this_mode)) {
+ frame_mv[this_mode][ref_frame].as_int =
+ frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
+ frame_mv[this_mode][second_ref_frame].as_int =
+ frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int;
+ }
+#endif // CONFIG_EXT_INTER
+
+ // Look at the reference frame of the best mode so far and set the
+ // skip mask to look at a subset of the remaining modes.
+ if (midx == mode_skip_start && best_mode_index >= 0) {
+ switch (best_mbmode.ref_frame[0]) {
+ case INTRA_FRAME: break;
+ case LAST_FRAME:
+ ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+#if CONFIG_EXT_REFS
+ case LAST2_FRAME:
+ ref_frame_skip_mask[0] |= LAST2_FRAME_MODE_MASK;
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+ case LAST3_FRAME:
+ ref_frame_skip_mask[0] |= LAST3_FRAME_MODE_MASK;
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+#endif // CONFIG_EXT_REFS
+ case GOLDEN_FRAME:
+ ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+#if CONFIG_EXT_REFS
+ case BWDREF_FRAME:
+ ref_frame_skip_mask[0] |= BWDREF_FRAME_MODE_MASK;
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+#endif // CONFIG_EXT_REFS
+ case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALTREF_FRAME_MODE_MASK;
+#if CONFIG_EXT_REFS
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+#endif // CONFIG_EXT_REFS
+ break;
+ case NONE:
+ case TOTAL_REFS_PER_FRAME:
+ assert(0 && "Invalid Reference frame");
+ break;
+ }
+ }
+
+ if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
+ (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
+ continue;
+
+ if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
+
+ // Test best rd so far against threshold for trying this mode.
+ if (best_mode_skippable && sf->schedule_mode_search)
+ mode_threshold[mode_index] <<= 1;
+
+ if (best_rd < mode_threshold[mode_index]) continue;
+
+ comp_pred = second_ref_frame > INTRA_FRAME;
+ if (comp_pred) {
+ if (!cpi->allow_comp_inter_inter) continue;
+
+ // Skip compound inter modes if ARF is not available.
+ if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
+
+ // Do not allow compound prediction if the segment level reference frame
+ // feature is in use as in this case there can only be one reference.
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
+
+ if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
+ best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
+ continue;
+
+ mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
+ } else {
+ if (ref_frame != INTRA_FRAME)
+ mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
+ }
+
+ if (ref_frame == INTRA_FRAME) {
+ if (sf->adaptive_mode_search)
+ if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
+ continue;
+
+ if (this_mode != DC_PRED) {
+ // Disable intra modes other than DC_PRED for blocks with low variance
+ // Threshold for intra skipping based on source variance
+ // TODO(debargha): Specialize the threshold for super block sizes
+ const unsigned int skip_intra_var_thresh = 64;
+ if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
+ x->source_variance < skip_intra_var_thresh)
+ continue;
+ // Only search the oblique modes if the best so far is
+ // one of the neighboring directional modes
+ if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
+ (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
+ if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME)
+ continue;
+ }
+ if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
+ if (conditional_skipintra(this_mode, best_intra_mode)) continue;
+ }
+ }
+ } else {
+ const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
+ if (!check_best_zero_mv(cpi, mbmi_ext->mode_context,
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ mbmi_ext->compound_mode_context,
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ frame_mv, this_mode, ref_frames, bsize, -1))
+ continue;
+ }
+
+ mbmi->mode = this_mode;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ref_frame[0] = ref_frame;
+ mbmi->ref_frame[1] = second_ref_frame;
+ pmi->palette_size[0] = 0;
+ pmi->palette_size[1] = 0;
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
+ // Evaluate all sub-pel filters irrespective of whether we can use
+ // them for this frame.
+#if CONFIG_DUAL_FILTER
+ for (i = 0; i < 4; ++i) {
+ mbmi->interp_filter[i] = cm->interp_filter == SWITCHABLE
+ ? EIGHTTAP_REGULAR
+ : cm->interp_filter;
+ }
+#else
+ mbmi->interp_filter =
+ cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR : cm->interp_filter;
+#endif
+ mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
+ mbmi->motion_variation = SIMPLE_TRANSLATION;
+
+ x->skip = 0;
+ set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
+
+ // Select prediction reference frames.
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
+ if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
+ }
+
+#if CONFIG_EXT_INTER
+ mbmi->interintra_mode = (PREDICTION_MODE)(DC_PRED - 1);
+#endif // CONFIG_EXT_INTER
+
+ if (ref_frame == INTRA_FRAME) {
+ TX_SIZE uv_tx;
+ struct macroblockd_plane *const pd = &xd->plane[1];
+#if CONFIG_EXT_INTRA
+ is_directional_mode = (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED);
+ if (is_directional_mode) {
+ if (!angle_stats_ready) {
+ const int src_stride = x->plane[0].src.stride;
+ const uint8_t *src = x->plane[0].src.buf;
+ const int rows = 4 * num_4x4_blocks_high_lookup[bsize];
+ const int cols = 4 * num_4x4_blocks_wide_lookup[bsize];
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ highbd_angle_estimation(src, src_stride, rows, cols,
+ directional_mode_skip_mask);
+ else
+#endif
+ angle_estimation(src, src_stride, rows, cols,
+ directional_mode_skip_mask);
+ angle_stats_ready = 1;
+ }
+ if (directional_mode_skip_mask[mbmi->mode]) continue;
+ rate_overhead = write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1, 0) +
+ intra_mode_cost[mbmi->mode];
+ rate_y = INT_MAX;
+ this_rd =
+ rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
+ &skippable, bsize, rate_overhead, best_rd);
+ } else {
+ mbmi->angle_delta[0] = 0;
+ super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize,
+ best_rd);
+ }
+#else
+ super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize,
+ best_rd);
+#endif // CONFIG_EXT_INTRA
+
+ if (rate_y == INT_MAX) continue;
+
+#if CONFIG_EXT_INTRA
+ if (mbmi->mode == DC_PRED) dc_skipped = 0;
+#endif // CONFIG_EXT_INTRA
+
+ uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x,
+ pd->subsampling_y);
+ if (rate_uv_intra[uv_tx] == INT_MAX) {
+ choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
+ &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
+ &skip_uv[uv_tx], &mode_uv[uv_tx]);
+ if (cm->allow_screen_content_tools) pmi_uv[uv_tx] = *pmi;
+#if CONFIG_EXT_INTRA
+ ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info;
+ uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
+#endif // CONFIG_EXT_INTRA
+ }
+
+ rate_uv = rate_uv_tokenonly[uv_tx];
+ distortion_uv = dist_uv[uv_tx];
+ skippable = skippable && skip_uv[uv_tx];
+ mbmi->uv_mode = mode_uv[uv_tx];
+ if (cm->allow_screen_content_tools) {
+ pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
+ memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
+ pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
+ 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
+ }
+#if CONFIG_EXT_INTRA
+ mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1];
+ if (ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1]) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1];
+ }
+#endif // CONFIG_EXT_INTRA
+
+ rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv +
+ cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
+ if (cpi->common.allow_screen_content_tools && mbmi->mode == DC_PRED)
+ rate2 += vp10_cost_bit(
+ vp10_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx],
+ 0);
+
+ if (!xd->lossless[mbmi->segment_id]) {
+ // super_block_yrd above includes the cost of the tx_size in the
+ // tokenonly rate, but for intra blocks, tx_size is always coded
+ // (prediction granularity), so we account for it in the full rate,
+ // not the tokenonly rate.
+ rate_y -= cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(
+ xd)][mbmi->tx_size];
+ }
+#if CONFIG_EXT_INTRA
+ if (is_directional_mode) {
+ int p_angle;
+ const int intra_filter_ctx = vp10_get_pred_context_intra_interp(xd);
+ rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[0]);
+ p_angle =
+ mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
+ if (vp10_is_intra_filter_switchable(p_angle))
+ rate2 += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
+ }
+
+ if (mbmi->mode == DC_PRED && ALLOW_FILTER_INTRA_MODES) {
+ rate2 += vp10_cost_bit(cm->fc->ext_intra_probs[0],
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[0]) {
+ rate2 += write_uniform_cost(
+ FILTER_INTRA_MODES, mbmi->ext_intra_mode_info.ext_intra_mode[0]);
+ }
+ }
+
+ if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
+ rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTAS + 1,
+ MAX_ANGLE_DELTAS + mbmi->angle_delta[1]);
+ }
+
+ if (ALLOW_FILTER_INTRA_MODES && mbmi->mode == DC_PRED) {
+ rate2 += vp10_cost_bit(cpi->common.fc->ext_intra_probs[1],
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1]);
+ if (mbmi->ext_intra_mode_info.use_ext_intra_mode[1])
+ rate2 += write_uniform_cost(
+ FILTER_INTRA_MODES, mbmi->ext_intra_mode_info.ext_intra_mode[1]);
+ }
+#endif // CONFIG_EXT_INTRA
+ if (this_mode != DC_PRED && this_mode != TM_PRED)
+ rate2 += intra_cost_penalty;
+ distortion2 = distortion_y + distortion_uv;
+ vp10_encode_intra_block_plane(x, bsize, 0, 1);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ x->recon_variance = vp10_high_get_sby_perpixel_variance(
+ cpi, &xd->plane[0].dst, bsize, xd->bd);
+ } else {
+ x->recon_variance =
+ vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
+ }
+#else
+ x->recon_variance =
+ vp10_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ } else {
+#if CONFIG_REF_MV
+ int_mv backup_ref_mv[2];
+
+ backup_ref_mv[0] = mbmi_ext->ref_mvs[ref_frame][0];
+ if (comp_pred) backup_ref_mv[1] = mbmi_ext->ref_mvs[second_ref_frame][0];
+#endif
+#if CONFIG_EXT_INTER
+ if (second_ref_frame == INTRA_FRAME) {
+ if (best_single_inter_ref != ref_frame) continue;
+ mbmi->interintra_mode = best_intra_mode;
+#if CONFIG_EXT_INTRA
+ // TODO(debargha|geza.lore):
+ // Should we use ext_intra modes for interintra?
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+ mbmi->angle_delta[0] = 0;
+ mbmi->angle_delta[1] = 0;
+ mbmi->intra_filter = INTRA_FILTER_LINEAR;
+#endif // CONFIG_EXT_INTRA
+ }
+#endif // CONFIG_EXT_INTER
+#if CONFIG_REF_MV
+ mbmi->ref_mv_idx = 0;
+ ref_frame_type = vp10_ref_frame_type(mbmi->ref_frame);
+
+ if (this_mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
+ int ref;
+ for (ref = 0; ref < 1 + comp_pred; ++ref) {
+ int_mv this_mv =
+ (ref == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv
+ : mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+ mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
+ }
+ }
+#endif
+ this_rd = handle_inter_mode(
+ cpi, x, bsize, &rate2, &distortion2, &skippable, &rate_y, &rate_uv,
+ &disable_skip, frame_mv, mi_row, mi_col,
+#if CONFIG_OBMC
+ dst_buf1, dst_stride1, dst_buf2, dst_stride2, weighted_src_buf,
+ mask2d_buf,
+#endif // CONFIG_OBMC
+#if CONFIG_EXT_INTER
+ single_newmvs, single_newmvs_rate, &compmode_interintra_cost,
+ &compmode_wedge_cost, modelled_rd,
+#else
+ single_newmv,
+#endif // CONFIG_EXT_INTER
+ single_inter_filter, single_skippable, &total_sse, best_rd);
+
+#if CONFIG_REF_MV
+ // TODO(jingning): This needs some refactoring to improve code quality
+ // and reduce redundant steps.
+ if ((mbmi->mode == NEARMV &&
+ mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
+ (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
+ int_mv backup_mv = frame_mv[NEARMV][ref_frame];
+ MB_MODE_INFO backup_mbmi = *mbmi;
+ int backup_skip = x->skip;
+ int64_t tmp_ref_rd = this_rd;
+ int ref_idx;
+
+ // TODO(jingning): This should be deprecated shortly.
+ int idx_offset = (mbmi->mode == NEARMV) ? 1 : 0;
+ int ref_set =
+ VPXMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 1 - idx_offset);
+
+ uint8_t drl_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx_offset);
+ // Dummy
+ int_mv backup_fmv[2];
+ backup_fmv[0] = frame_mv[NEWMV][ref_frame];
+ if (comp_pred) backup_fmv[1] = frame_mv[NEWMV][second_ref_frame];
+
+ rate2 += cpi->drl_mode_cost0[drl_ctx][0];
+
+ if (this_rd < INT64_MAX) {
+ if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
+ RDCOST(x->rdmult, x->rddiv, 0, total_sse))
+ tmp_ref_rd =
+ RDCOST(x->rdmult, x->rddiv,
+ rate2 + vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0),
+ distortion2);
+ else
+ tmp_ref_rd =
+ RDCOST(x->rdmult, x->rddiv,
+ rate2 + vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1) -
+ rate_y - rate_uv,
+ total_sse);
+ }
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memcpy(x->blk_skip_drl[i], x->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif
+
+ for (ref_idx = 0; ref_idx < ref_set; ++ref_idx) {
+ int64_t tmp_alt_rd = INT64_MAX;
+ int tmp_rate = 0, tmp_rate_y = 0, tmp_rate_uv = 0;
+ int tmp_skip = 1;
+ int64_t tmp_dist = 0, tmp_sse = 0;
+ int dummy_disable_skip = 0;
+ int ref;
+ int_mv cur_mv;
+
+ mbmi->ref_mv_idx = 1 + ref_idx;
+
+ for (ref = 0; ref < 1 + comp_pred; ++ref) {
+ int_mv this_mv =
+ (ref == 0)
+ ? mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
+ .this_mv
+ : mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
+ .comp_mv;
+ clamp_mv_ref(&this_mv.as_mv, xd->n8_w << 3, xd->n8_h << 3, xd);
+ mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
+ }
+
+ cur_mv =
+ mbmi_ext->ref_mv_stack[ref_frame][mbmi->ref_mv_idx + idx_offset]
+ .this_mv;
+ clamp_mv2(&cur_mv.as_mv, xd);
+
+ if (!mv_check_bounds(x, &cur_mv.as_mv)) {
+ INTERP_FILTER
+ dummy_single_inter_filter[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME] = {
+ { 0 }
+ };
+ int dummy_single_skippable[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME] = {
+ { 0 }
+ };
+ int dummy_disable_skip = 0;
+#if CONFIG_EXT_INTER
+ int_mv dummy_single_newmvs[2][TOTAL_REFS_PER_FRAME] = { { { 0 } },
+ { { 0 } } };
+ int dummy_single_newmvs_rate[2][TOTAL_REFS_PER_FRAME] = { { 0 },
+ { 0 } };
+ int dummy_compmode_interintra_cost = 0;
+ int dummy_compmode_wedge_cost = 0;
+#else
+ int_mv dummy_single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
+#endif
+
+ frame_mv[NEARMV][ref_frame] = cur_mv;
+ tmp_alt_rd = handle_inter_mode(
+ cpi, x, bsize, &tmp_rate, &tmp_dist, &tmp_skip, &tmp_rate_y,
+ &tmp_rate_uv, &dummy_disable_skip, frame_mv, mi_row, mi_col,
+#if CONFIG_OBMC
+ dst_buf1, dst_stride1, dst_buf2, dst_stride2, weighted_src_buf,
+ mask2d_buf,
+#endif // CONFIG_OBMC
+#if CONFIG_EXT_INTER
+ dummy_single_newmvs, dummy_single_newmvs_rate,
+ &dummy_compmode_interintra_cost, &dummy_compmode_wedge_cost,
+ NULL,
+#else
+ dummy_single_newmv,
+#endif
+ dummy_single_inter_filter, dummy_single_skippable, &tmp_sse,
+ best_rd);
+ }
+
+ for (i = 0; i < mbmi->ref_mv_idx; ++i) {
+ uint8_t drl1_ctx = 0;
+ drl1_ctx = vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
+ i + idx_offset);
+ tmp_rate += cpi->drl_mode_cost0[drl1_ctx][1];
+ }
+
+ if (mbmi_ext->ref_mv_count[ref_frame_type] >
+ mbmi->ref_mv_idx + idx_offset + 1 &&
+ ref_idx < ref_set - 1) {
+ uint8_t drl1_ctx =
+ vp10_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
+ mbmi->ref_mv_idx + idx_offset);
+ tmp_rate += cpi->drl_mode_cost0[drl1_ctx][0];
+ }
+
+ if (tmp_alt_rd < INT64_MAX) {
+#if CONFIG_OBMC
+ tmp_alt_rd = RDCOST(x->rdmult, x->rddiv, tmp_rate, tmp_dist);
+#else
+ if (RDCOST(x->rdmult, x->rddiv, tmp_rate_y + tmp_rate_uv,
+ tmp_dist) < RDCOST(x->rdmult, x->rddiv, 0, tmp_sse))
+ tmp_alt_rd = RDCOST(
+ x->rdmult, x->rddiv,
+ tmp_rate + vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0),
+ tmp_dist);
+ else
+ tmp_alt_rd = RDCOST(
+ x->rdmult, x->rddiv,
+ tmp_rate + vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1) -
+ tmp_rate_y - tmp_rate_uv,
+ tmp_sse);
+#endif // CONFIG_OBMC
+ }
+
+ if (tmp_ref_rd > tmp_alt_rd) {
+ rate2 = tmp_rate;
+ disable_skip = dummy_disable_skip;
+ distortion2 = tmp_dist;
+ skippable = tmp_skip;
+ rate_y = tmp_rate_y;
+ rate_uv = tmp_rate_uv;
+ total_sse = tmp_sse;
+ this_rd = tmp_alt_rd;
+ tmp_ref_rd = tmp_alt_rd;
+ backup_mbmi = *mbmi;
+ backup_skip = x->skip;
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memcpy(x->blk_skip_drl[i], x->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif
+ } else {
+ *mbmi = backup_mbmi;
+ x->skip = backup_skip;
+ }
+ }
+
+ frame_mv[NEARMV][ref_frame] = backup_mv;
+ frame_mv[NEWMV][ref_frame] = backup_fmv[0];
+ if (comp_pred) frame_mv[NEWMV][second_ref_frame] = backup_fmv[1];
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memcpy(x->blk_skip[i], x->blk_skip_drl[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif
+ }
+ mbmi_ext->ref_mvs[ref_frame][0] = backup_ref_mv[0];
+ if (comp_pred) mbmi_ext->ref_mvs[second_ref_frame][0] = backup_ref_mv[1];
+#endif // CONFIG_REF_MV
+
+ if (this_rd == INT64_MAX) continue;
+
+ compmode_cost = vp10_cost_bit(comp_mode_p, comp_pred);
+
+ if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
+ }
+
+#if CONFIG_EXT_INTER
+ rate2 += compmode_interintra_cost;
+ if (cm->reference_mode != SINGLE_REFERENCE && comp_pred)
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ if (mbmi->motion_variation == SIMPLE_TRANSLATION)
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+ rate2 += compmode_wedge_cost;
+#endif // CONFIG_EXT_INTER
+
+ // Estimate the reference frame signaling cost and add it
+ // to the rolling cost variable.
+ if (comp_pred) {
+ rate2 += ref_costs_comp[ref_frame];
+#if CONFIG_EXT_REFS
+ rate2 += ref_costs_comp[second_ref_frame];
+#endif // CONFIG_EXT_REFS
+ } else {
+ rate2 += ref_costs_single[ref_frame];
+ }
+
+#if CONFIG_OBMC
+ if (ref_frame == INTRA_FRAME) {
+#else
+ if (!disable_skip) {
+#endif // CONFIG_OBMC
+ if (skippable) {
+ // Back out the coefficient coding costs
+ rate2 -= (rate_y + rate_uv);
+ rate_y = 0;
+ rate_uv = 0;
+ // Cost the skip mb case
+ rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
+ if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
+ RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
+ // Add in the cost of the no skip flag.
+ rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
+ } else {
+ // FIXME(rbultje) make this work for splitmv also
+ rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ distortion2 = total_sse;
+ assert(total_sse >= 0);
+ rate2 -= (rate_y + rate_uv);
+ this_skip2 = 1;
+ rate_y = 0;
+ rate_uv = 0;
+ }
+ } else {
+ // Add in the cost of the no skip flag.
+ rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
+ }
+
+ // Calculate the final RD estimate for this mode.
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+#if CONFIG_OBMC
+ } else {
+ this_skip2 = mbmi->skip;
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ if (this_skip2) {
+ rate_y = 0;
+ rate_uv = 0;
+ }
+#endif // CONFIG_OBMC
+ }
+
+ // Apply an adjustment to the rd value based on the similarity of the
+ // source variance and reconstructed variance.
+ rd_variance_adjustment(x, &this_rd, ref_frame, x->source_variance);
+
+ if (ref_frame == INTRA_FRAME) {
+ // Keep record of best intra rd
+ if (this_rd < best_intra_rd) {
+ best_intra_rd = this_rd;
+ best_intra_mode = mbmi->mode;
+ }
+#if CONFIG_EXT_INTER
+ } else if (second_ref_frame == NONE) {
+ if (this_rd < best_single_inter_rd) {
+ best_single_inter_rd = this_rd;
+ best_single_inter_ref = mbmi->ref_frame[0];
+ }
+#endif // CONFIG_EXT_INTER
+ }
+
+ if (!disable_skip && ref_frame == INTRA_FRAME) {
+ for (i = 0; i < REFERENCE_MODES; ++i)
+ best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
+ }
+
+ // Did this mode help.. i.e. is it the new best mode
+ if (this_rd < best_rd || x->skip) {
+ if (!mode_excluded) {
+ // Note index of best mode so far
+ best_mode_index = mode_index;
+
+ if (ref_frame == INTRA_FRAME) {
+ /* required for left and above block mv */
+ mbmi->mv[0].as_int = 0;
+ } else {
+ best_pred_sse = x->pred_sse[ref_frame];
+ }
+
+ rd_cost->rate = rate2;
+#if CONFIG_SUPERTX
+ if (x->skip)
+ *returnrate_nocoef = rate2;
+ else
+ *returnrate_nocoef = rate2 - rate_y - rate_uv;
+ *returnrate_nocoef -=
+ vp10_cost_bit(vp10_get_skip_prob(cm, xd),
+ disable_skip || skippable || this_skip2);
+ *returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd),
+ mbmi->ref_frame[0] != INTRA_FRAME);
+#if CONFIG_OBMC || CONFIG_WARPED_MOTION
+ if (is_inter_block(mbmi) && is_motvar_allowed(mbmi))
+ *returnrate_nocoef -= cpi->motvar_cost[bsize][mbmi->motion_variation];
+#endif // CONFIG_OBMC || CONFIG_WARPED_MOTION
+#endif // CONFIG_SUPERTX
+ rd_cost->dist = distortion2;
+ rd_cost->rdcost = this_rd;
+ best_rd = this_rd;
+ best_mbmode = *mbmi;
+ best_skip2 = this_skip2;
+ best_mode_skippable = skippable;
+ best_rate_y = rate_y + vp10_cost_bit(vp10_get_skip_prob(cm, xd),
+ this_skip2 || skippable);
+ best_rate_uv = rate_uv;
+
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memcpy(ctx->blk_skip[i], x->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif
+
+ // TODO(debargha): enhance this test with a better distortion prediction
+ // based on qp, activity mask and history
+ if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
+ (mode_index > MIN_EARLY_TERM_INDEX)) {
+ int qstep = xd->plane[0].dequant[1];
+ // TODO(debargha): Enhance this by specializing for each mode_index
+ int scale = 4;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ qstep >>= (xd->bd - 8);
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ if (x->source_variance < UINT_MAX) {
+ const int var_adjust = (x->source_variance < 16);
+ scale -= var_adjust;
+ }
+ if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) {
+ early_term = 1;
+ }
+ }
+ }
+ }
+
+ /* keep record of best compound/single-only prediction */
+ if (!disable_skip && ref_frame != INTRA_FRAME) {
+ int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
+
+ if (cm->reference_mode == REFERENCE_MODE_SELECT) {
+ single_rate = rate2 - compmode_cost;
+ hybrid_rate = rate2;
+ } else {
+ single_rate = rate2;
+ hybrid_rate = rate2 + compmode_cost;
+ }
+
+ single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
+ hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
+
+ if (!comp_pred) {
+ if (single_rd < best_pred_rd[SINGLE_REFERENCE])
+ best_pred_rd[SINGLE_REFERENCE] = single_rd;
+ } else {
+ if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
+ best_pred_rd[COMPOUND_REFERENCE] = single_rd;
+ }
+ if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
+ best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
+ }
+
+ if (early_term) break;
+
+ if (x->skip && !comp_pred) break;
+ }
+
+ if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index >= 0 &&
+ ((sf->tx_type_search.fast_inter_tx_type_search == 1 &&
+ is_inter_mode(best_mbmode.mode)) ||
+ (sf->tx_type_search.fast_intra_tx_type_search == 1 &&
+ !is_inter_mode(best_mbmode.mode)))) {
+ int rate_y = 0, rate_uv = 0;
+ int64_t dist_y = 0, dist_uv = 0;
+ int skip_y = 0, skip_uv = 0, skip_blk = 0;
+ int64_t sse_y = 0, sse_uv = 0;
+
+ x->use_default_inter_tx_type = 0;
+ x->use_default_intra_tx_type = 0;
+
+ *mbmi = best_mbmode;
+
+ set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+
+ // Select prediction reference frames.
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
+ if (has_second_ref(mbmi))
+ xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
+ }
+
+ if (is_inter_mode(mbmi->mode)) {
+ vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+#if CONFIG_OBMC
+ if (mbmi->motion_variation == OBMC_CAUSAL)
+ vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1,
+ dst_stride1, dst_buf2, dst_stride2);
+#endif // CONFIG_OBMC
+ vp10_subtract_plane(x, bsize, 0);
+#if CONFIG_VAR_TX
+ if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
+ select_tx_type_yrd(cpi, x, &rate_y, &dist_y, &skip_y, &sse_y, bsize,
+ INT64_MAX);
+ } else {
+ int idx, idy;
+ super_block_yrd(cpi, x, &rate_y, &dist_y, &skip_y, &sse_y, bsize,
+ INT64_MAX);
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
+ memset(x->blk_skip[0], skip_y,
+ sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
+ }
+
+ inter_block_uvrd(cpi, x, &rate_uv, &dist_uv, &skip_uv, &sse_uv, bsize,
+ INT64_MAX);
+#else
+ super_block_yrd(cpi, x, &rate_y, &dist_y, &skip_y, &sse_y, bsize,
+ INT64_MAX);
+ super_block_uvrd(cpi, x, &rate_uv, &dist_uv, &skip_uv, &sse_uv, bsize,
+ INT64_MAX);
+#endif // CONFIG_VAR_TX
+ } else {
+ super_block_yrd(cpi, x, &rate_y, &dist_y, &skip_y, &sse_y, bsize,
+ INT64_MAX);
+ super_block_uvrd(cpi, x, &rate_uv, &dist_uv, &skip_uv, &sse_uv, bsize,
+ INT64_MAX);
+ }
+
+ if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, (dist_y + dist_uv)) >
+ RDCOST(x->rdmult, x->rddiv, 0, (sse_y + sse_uv))) {
+ skip_blk = 1;
+ rate_y = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ rate_uv = 0;
+ dist_y = sse_y;
+ dist_uv = sse_uv;
+ } else {
+ skip_blk = 0;
+ rate_y += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
+ }
+
+ if (RDCOST(x->rdmult, x->rddiv, best_rate_y + best_rate_uv, rd_cost->dist) >
+ RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, (dist_y + dist_uv))) {
+#if CONFIG_VAR_TX
+ int idx, idy;
+#endif
+ best_mbmode.tx_type = mbmi->tx_type;
+ best_mbmode.tx_size = mbmi->tx_size;
+#if CONFIG_VAR_TX
+ for (idy = 0; idy < xd->n8_h; ++idy)
+ for (idx = 0; idx < xd->n8_w; ++idx)
+ best_mbmode.inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
+
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memcpy(ctx->blk_skip[i], x->blk_skip[i],
+ sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif
+ rd_cost->rate += (rate_y + rate_uv - best_rate_y - best_rate_uv);
+ rd_cost->dist = dist_y + dist_uv;
+ rd_cost->rdcost =
+ RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
+ best_skip2 = skip_blk;
+ }
+ }
+
+ // Only try palette mode when the best mode so far is an intra mode.
+ if (cm->allow_screen_content_tools && !is_inter_mode(best_mbmode.mode)) {
+ PREDICTION_MODE mode_selected;
+ int rate2 = 0, rate_y = 0;
+#if CONFIG_SUPERTX
+ int best_rate_nocoef;
+#endif
+ int64_t distortion2 = 0, distortion_y = 0, dummy_rd = best_rd, this_rd;
+ int skippable = 0, rate_overhead = 0;
+ TX_SIZE best_tx_size, uv_tx;
+ TX_TYPE best_tx_type;
+ PALETTE_MODE_INFO palette_mode_info;
+ uint8_t *const best_palette_color_map =
+ x->palette_buffer->best_palette_color_map;
+ uint8_t *const color_map = xd->plane[0].color_index_map;
+
+ mbmi->mode = DC_PRED;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ref_frame[0] = INTRA_FRAME;
+ mbmi->ref_frame[1] = NONE;
+ palette_mode_info.palette_size[0] = 0;
+ rate_overhead = rd_pick_palette_intra_sby(
+ cpi, x, bsize, palette_ctx, intra_mode_cost[DC_PRED],
+ &palette_mode_info, best_palette_color_map, &best_tx_size,
+ &best_tx_type, &mode_selected, &dummy_rd);
+ if (palette_mode_info.palette_size[0] == 0) goto PALETTE_EXIT;
+
+ pmi->palette_size[0] = palette_mode_info.palette_size[0];
+ if (palette_mode_info.palette_size[0] > 0) {
+ memcpy(pmi->palette_colors, palette_mode_info.palette_colors,
+ PALETTE_MAX_SIZE * sizeof(palette_mode_info.palette_colors[0]));
+ memcpy(color_map, best_palette_color_map,
+ rows * cols * sizeof(best_palette_color_map[0]));
+ }
+ super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize,
+ best_rd);
+ if (rate_y == INT_MAX) goto PALETTE_EXIT;
+ uv_tx =
+ get_uv_tx_size_impl(mbmi->tx_size, bsize, xd->plane[1].subsampling_x,
+ xd->plane[1].subsampling_y);
+ if (rate_uv_intra[uv_tx] == INT_MAX) {
+ choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
+ &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
+ &skip_uv[uv_tx], &mode_uv[uv_tx]);
+ pmi_uv[uv_tx] = *pmi;
+#if CONFIG_EXT_INTRA
+ ext_intra_mode_info_uv[uv_tx] = mbmi->ext_intra_mode_info;
+ uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
+#endif // CONFIG_EXT_INTRA
+ }
+ mbmi->uv_mode = mode_uv[uv_tx];
+ pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
+ if (pmi->palette_size[1] > 0)
+ memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
+ pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
+ 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
+#if CONFIG_EXT_INTRA
+ mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] =
+ ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1];
+ if (ext_intra_mode_info_uv[uv_tx].use_ext_intra_mode[1]) {
+ mbmi->ext_intra_mode_info.ext_intra_mode[1] =
+ ext_intra_mode_info_uv[uv_tx].ext_intra_mode[1];
+ }
+#endif // CONFIG_EXT_INTRA
+ skippable = skippable && skip_uv[uv_tx];
+ distortion2 = distortion_y + dist_uv[uv_tx];
+ rate2 = rate_y + rate_overhead + rate_uv_intra[uv_tx];
+ rate2 += ref_costs_single[INTRA_FRAME];
+
+ if (skippable) {
+ rate2 -= (rate_y + rate_uv_tokenonly[uv_tx]);
+#if CONFIG_SUPERTX
+ best_rate_nocoef = rate2;
+#endif
+ rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ } else {
+#if CONFIG_SUPERTX
+ best_rate_nocoef = rate2 - (rate_y + rate_uv_tokenonly[uv_tx]);
+#endif
+ rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
+ }
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ if (this_rd < best_rd) {
+ best_mode_index = 3;
+ mbmi->mv[0].as_int = 0;
+ rd_cost->rate = rate2;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = best_rate_nocoef;
+#endif
+ rd_cost->dist = distortion2;
+ rd_cost->rdcost = this_rd;
+ best_rd = this_rd;
+ best_mbmode = *mbmi;
+ best_skip2 = 0;
+ best_mode_skippable = skippable;
+ }
+ }
+PALETTE_EXIT:
+
+#if CONFIG_EXT_INTRA
+ // TODO(huisu): ext-intra is turned off in lossless mode for now to
+ // avoid a unit test failure
+ if (!xd->lossless[mbmi->segment_id] && ALLOW_FILTER_INTRA_MODES &&
+ mbmi->palette_mode_info.palette_size[0] == 0 && !dc_skipped &&
+ best_mode_index >= 0 && best_intra_rd < (best_rd + (best_rd >> 3))) {
+ pick_ext_intra_interframe(
+ cpi, x, ctx, bsize, rate_uv_intra, rate_uv_tokenonly, dist_uv, skip_uv,
+ mode_uv, ext_intra_mode_info_uv, pmi_uv, uv_angle_delta, palette_ctx, 0,
+ ref_costs_single, &best_rd, &best_intra_rd, &best_intra_mode,
+ &best_mode_index, &best_skip2, &best_mode_skippable,
+#if CONFIG_SUPERTX
+ returnrate_nocoef,
+#endif // CONFIG_SUPERTX
+ best_pred_rd, &best_mbmode, rd_cost);
+ }
+#endif // CONFIG_EXT_INTRA
+
+ // The inter modes' rate costs are not calculated precisely in some cases.
+ // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
+ // ZEROMV. Here, checks are added for those cases, and the mode decisions
+ // are corrected.
+ if (best_mbmode.mode == NEWMV
+#if CONFIG_EXT_INTER
+ || best_mbmode.mode == NEWFROMNEARMV || best_mbmode.mode == NEW_NEWMV
+#endif // CONFIG_EXT_INTER
+ ) {
+ const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
+ best_mbmode.ref_frame[1] };
+ int comp_pred_mode = refs[1] > INTRA_FRAME;
+#if CONFIG_REF_MV
+ const uint8_t rf_type = vp10_ref_frame_type(best_mbmode.ref_frame);
+ if (!comp_pred_mode) {
+ int i;
+ int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
+ ? VPXMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
+ : INT_MAX;
+
+ for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
+ int_mv cur_mv = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
+ if (cur_mv.as_int == best_mbmode.mv[0].as_int) {
+ best_mbmode.mode = NEARMV;
+ best_mbmode.ref_mv_idx = i;
+ }
+ }
+
+ if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
+ best_mbmode.mode = NEARESTMV;
+ else if (best_mbmode.mv[0].as_int == 0)
+ best_mbmode.mode = ZEROMV;
+ } else {
+ int_mv nearestmv[2];
+ int_mv nearmv[2];
+
+#if CONFIG_EXT_INTER
+ if (mbmi_ext->ref_mv_count[rf_type] > 1) {
+ nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][1].this_mv;
+ nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][1].comp_mv;
+ } else {
+ nearmv[0] = frame_mv[NEARMV][refs[0]];
+ nearmv[1] = frame_mv[NEARMV][refs[1]];
+ }
+#else
+ int i;
+ int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
+ ? VPXMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
+ : INT_MAX;
+
+ for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
+ nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
+ nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
+
+ if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
+ nearmv[1].as_int == best_mbmode.mv[1].as_int) {
+ best_mbmode.mode = NEARMV;
+ best_mbmode.ref_mv_idx = i;
+ }
+ }
+#endif
+ if (mbmi_ext->ref_mv_count[rf_type] >= 1) {
+ nearestmv[0] = mbmi_ext->ref_mv_stack[rf_type][0].this_mv;
+ nearestmv[1] = mbmi_ext->ref_mv_stack[rf_type][0].comp_mv;
+ } else {
+ nearestmv[0] = frame_mv[NEARESTMV][refs[0]];
+ nearestmv[1] = frame_mv[NEARESTMV][refs[1]];
+ }
+
+ if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
+ nearestmv[1].as_int == best_mbmode.mv[1].as_int)
+#if CONFIG_EXT_INTER
+ best_mbmode.mode = NEAREST_NEARESTMV;
+ else if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
+ nearmv[1].as_int == best_mbmode.mv[1].as_int)
+ best_mbmode.mode = NEAREST_NEARMV;
+ else if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
+ nearestmv[1].as_int == best_mbmode.mv[1].as_int)
+ best_mbmode.mode = NEAR_NEARESTMV;
+ else if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
+ nearmv[1].as_int == best_mbmode.mv[1].as_int)
+ best_mbmode.mode = NEAR_NEARMV;
+ else if (best_mbmode.mv[0].as_int == 0 && best_mbmode.mv[1].as_int == 0)
+ best_mbmode.mode = ZERO_ZEROMV;
+#else
+ best_mbmode.mode = NEARESTMV;
+ else if (best_mbmode.mv[0].as_int == 0 && best_mbmode.mv[1].as_int == 0)
+ best_mbmode.mode = ZEROMV;
+#endif // CONFIG_EXT_INTER
+ }
+#else
+#if CONFIG_EXT_INTER
+ if (!comp_pred_mode) {
+#endif // CONFIG_EXT_INTER
+ if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
+ ((comp_pred_mode &&
+ frame_mv[NEARESTMV][refs[1]].as_int == best_mbmode.mv[1].as_int) ||
+ !comp_pred_mode))
+ best_mbmode.mode = NEARESTMV;
+ else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
+ ((comp_pred_mode &&
+ frame_mv[NEARMV][refs[1]].as_int ==
+ best_mbmode.mv[1].as_int) ||
+ !comp_pred_mode))
+ best_mbmode.mode = NEARMV;
+ else if (best_mbmode.mv[0].as_int == 0 &&
+ ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) ||
+ !comp_pred_mode))
+ best_mbmode.mode = ZEROMV;
+#if CONFIG_EXT_INTER
+ } else {
+ const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
+ best_mbmode.ref_frame[1] };
+
+ if (frame_mv[NEAREST_NEARESTMV][refs[0]].as_int ==
+ best_mbmode.mv[0].as_int &&
+ frame_mv[NEAREST_NEARESTMV][refs[1]].as_int ==
+ best_mbmode.mv[1].as_int)
+ best_mbmode.mode = NEAREST_NEARESTMV;
+ else if (frame_mv[NEAREST_NEARMV][refs[0]].as_int ==
+ best_mbmode.mv[0].as_int &&
+ frame_mv[NEAREST_NEARMV][refs[1]].as_int ==
+ best_mbmode.mv[1].as_int)
+ best_mbmode.mode = NEAREST_NEARMV;
+ else if (frame_mv[NEAR_NEARESTMV][refs[0]].as_int ==
+ best_mbmode.mv[0].as_int &&
+ frame_mv[NEAR_NEARESTMV][refs[1]].as_int ==
+ best_mbmode.mv[1].as_int)
+ best_mbmode.mode = NEAR_NEARESTMV;
+ else if (frame_mv[NEAR_NEARMV][refs[0]].as_int ==
+ best_mbmode.mv[0].as_int &&
+ frame_mv[NEAR_NEARMV][refs[1]].as_int ==
+ best_mbmode.mv[1].as_int)
+ best_mbmode.mode = NEAR_NEARMV;
+ else if (best_mbmode.mv[0].as_int == 0 && best_mbmode.mv[1].as_int == 0)
+ best_mbmode.mode = ZERO_ZEROMV;
+ }
+#endif // CONFIG_EXT_INTER
+#endif
+ }
+
+#if CONFIG_REF_MV
+ if (best_mbmode.ref_frame[0] > INTRA_FRAME && best_mbmode.mv[0].as_int == 0 &&
+#if CONFIG_EXT_INTER
+ (best_mbmode.ref_frame[1] <= INTRA_FRAME)
+#else
+ (best_mbmode.ref_frame[1] == NONE || best_mbmode.mv[1].as_int == 0)
+#endif // CONFIG_EXT_INTER
+ ) {
+ int16_t mode_ctx = mbmi_ext->mode_context[best_mbmode.ref_frame[0]];
+#if !CONFIG_EXT_INTER
+ if (best_mbmode.ref_frame[1] > NONE)
+ mode_ctx &= (mbmi_ext->mode_context[best_mbmode.ref_frame[1]] | 0x00ff);
+#endif // !CONFIG_EXT_INTER
+
+ if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) best_mbmode.mode = ZEROMV;
+ }
+#endif
+
+ if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
+ rd_cost->rate = INT_MAX;
+ rd_cost->rdcost = INT64_MAX;
+ return;
+ }
+
+ // If we used an estimate for the uv intra rd in the loop above...
+ if (sf->use_uv_intra_rd_estimate) {
+ // Do Intra UV best rd mode selection if best mode choice above was intra.
+ if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
+ TX_SIZE uv_tx_size;
+ *mbmi = best_mbmode;
+ uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
+ rd_pick_intra_sbuv_mode(
+ cpi, x, &rate_uv_intra[uv_tx_size], &rate_uv_tokenonly[uv_tx_size],
+ &dist_uv[uv_tx_size], &skip_uv[uv_tx_size],
+ bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, uv_tx_size);
+ }
+ }
+
+#if CONFIG_DUAL_FILTER
+ assert((cm->interp_filter == SWITCHABLE) ||
+ (cm->interp_filter == best_mbmode.interp_filter[0]) ||
+ !is_inter_block(&best_mbmode));
+ assert((cm->interp_filter == SWITCHABLE) ||
+ (cm->interp_filter == best_mbmode.interp_filter[1]) ||
+ !is_inter_block(&best_mbmode));
+ if (best_mbmode.ref_frame[1] > INTRA_FRAME) {
+ assert((cm->interp_filter == SWITCHABLE) ||
+ (cm->interp_filter == best_mbmode.interp_filter[2]) ||
+ !is_inter_block(&best_mbmode));
+ assert((cm->interp_filter == SWITCHABLE) ||
+ (cm->interp_filter == best_mbmode.interp_filter[3]) ||
+ !is_inter_block(&best_mbmode));
+ }
+#else
+ assert((cm->interp_filter == SWITCHABLE) ||
+ (cm->interp_filter == best_mbmode.interp_filter) ||
+ !is_inter_block(&best_mbmode));
+#endif
+
+ if (!cpi->rc.is_src_frame_alt_ref)
+ vp10_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
+ sf->adaptive_rd_thresh, bsize, best_mode_index);
+
+ // macroblock modes
+ *mbmi = best_mbmode;
+ x->skip |= best_skip2;
+
+#if CONFIG_REF_MV
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
+ if (mbmi->mode != NEWMV)
+ mbmi->pred_mv[i].as_int = mbmi->mv[i].as_int;
+ else
+ mbmi->pred_mv[i].as_int = mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+ }
+#endif
+
+ for (i = 0; i < REFERENCE_MODES; ++i) {
+ if (best_pred_rd[i] == INT64_MAX)
+ best_pred_diff[i] = INT_MIN;
+ else
+ best_pred_diff[i] = best_rd - best_pred_rd[i];
+ }
+
+ x->skip |= best_mode_skippable;
+
+ assert(best_mode_index >= 0);
+
+ store_coding_context(x, ctx, best_mode_index, best_pred_diff,
+ best_mode_skippable);
+
+ if (cm->allow_screen_content_tools && pmi->palette_size[1] > 0) {
+ restore_uv_color_map(cpi, x);
+ }
+}
+
+void vp10_rd_pick_inter_mode_sb_seg_skip(VP10_COMP *cpi, TileDataEnc *tile_data,
+ MACROBLOCK *x, RD_COST *rd_cost,
+ BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ unsigned char segment_id = mbmi->segment_id;
+ const int comp_pred = 0;
+ int i;
+ int64_t best_pred_diff[REFERENCE_MODES];
+ unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
+ unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
+ vpx_prob comp_mode_p;
+ INTERP_FILTER best_filter = SWITCHABLE;
+ int64_t this_rd = INT64_MAX;
+ int rate2 = 0;
+ const int64_t distortion2 = 0;
+
+ estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
+ &comp_mode_p);
+
+ for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
+ for (i = LAST_FRAME; i < TOTAL_REFS_PER_FRAME; ++i)
+ x->pred_mv_sad[i] = INT_MAX;
+
+ rd_cost->rate = INT_MAX;
+
+ assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
+
+ mbmi->palette_mode_info.palette_size[0] = 0;
+ mbmi->palette_mode_info.palette_size[1] = 0;
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
+ mbmi->mode = ZEROMV;
+ mbmi->motion_variation = SIMPLE_TRANSLATION;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ref_frame[0] = LAST_FRAME;
+ mbmi->ref_frame[1] = NONE;
+ mbmi->mv[0].as_int = 0;
+#if CONFIG_REF_MV
+ mbmi->ref_mv_idx = 0;
+ mbmi->pred_mv[0].as_int = 0;
+#endif
+ x->skip = 1;
+
+ if (cm->interp_filter != BILINEAR) {
+ best_filter = EIGHTTAP_REGULAR;
+ if (cm->interp_filter == SWITCHABLE &&
+#if CONFIG_EXT_INTERP
+ vp10_is_interp_needed(xd) &&
+#endif // CONFIG_EXT_INTERP
+ x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
+ int rs;
+ int best_rs = INT_MAX;
+ for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
+#if CONFIG_DUAL_FILTER
+ int k;
+ for (k = 0; k < 4; ++k) mbmi->interp_filter[k] = i;
+#else
+ mbmi->interp_filter = i;
+#endif
+ rs = vp10_get_switchable_rate(cpi, xd);
+ if (rs < best_rs) {
+ best_rs = rs;
+#if CONFIG_DUAL_FILTER
+ best_filter = mbmi->interp_filter[0];
+#else
+ best_filter = mbmi->interp_filter;
+#endif
+ }
+ }
+ }
+ }
+ // Set the appropriate filter
+ if (cm->interp_filter == SWITCHABLE) {
+#if CONFIG_DUAL_FILTER
+ for (i = 0; i < 4; ++i) mbmi->interp_filter[i] = best_filter;
+#else
+ mbmi->interp_filter = best_filter;
+#endif
+ rate2 += vp10_get_switchable_rate(cpi, xd);
+ } else {
+#if CONFIG_DUAL_FILTER
+ for (i = 0; i < 4; ++i) mbmi->interp_filter[0] = cm->interp_filter;
+#else
+ mbmi->interp_filter = cm->interp_filter;
+#endif
+ }
+
+ if (cm->reference_mode == REFERENCE_MODE_SELECT)
+ rate2 += vp10_cost_bit(comp_mode_p, comp_pred);
+
+ // Estimate the reference frame signaling cost and add it
+ // to the rolling cost variable.
+ rate2 += ref_costs_single[LAST_FRAME];
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+
+ rd_cost->rate = rate2;
+ rd_cost->dist = distortion2;
+ rd_cost->rdcost = this_rd;
+
+ if (this_rd >= best_rd_so_far) {
+ rd_cost->rate = INT_MAX;
+ rd_cost->rdcost = INT64_MAX;
+ return;
+ }
+
+#if CONFIG_DUAL_FILTER
+ assert((cm->interp_filter == SWITCHABLE) ||
+ (cm->interp_filter == mbmi->interp_filter[0]));
+#else
+ assert((cm->interp_filter == SWITCHABLE) ||
+ (cm->interp_filter == mbmi->interp_filter));
+#endif
+
+ vp10_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
+ cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
+
+ vp10_zero(best_pred_diff);
+
+ store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, 0);
+}
+
+void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
+ TileDataEnc *tile_data,
+ struct macroblock *x, int mi_row,
+ int mi_col, struct RD_COST *rd_cost,
+#if CONFIG_SUPERTX
+ int *returnrate_nocoef,
+#endif // CONFIG_SUPERTX
+ BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far) {
+ VP10_COMMON *const cm = &cpi->common;
+ RD_OPT *const rd_opt = &cpi->rd;
+ SPEED_FEATURES *const sf = &cpi->sf;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const struct segmentation *const seg = &cm->seg;
+ MV_REFERENCE_FRAME ref_frame, second_ref_frame;
+ unsigned char segment_id = mbmi->segment_id;
+ int comp_pred, i;
+ int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
+ struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE];
+ static const int flag_list[TOTAL_REFS_PER_FRAME] = {
+ 0,
+ VPX_LAST_FLAG,
+#if CONFIG_EXT_REFS
+ VPX_LAST2_FLAG,
+ VPX_LAST3_FLAG,
+#endif // CONFIG_EXT_REFS
+ VPX_GOLD_FLAG,
+#if CONFIG_EXT_REFS
+ VPX_BWD_FLAG,
+#endif // CONFIG_EXT_REFS
+ VPX_ALT_FLAG
+ };
+ int64_t best_rd = best_rd_so_far;
+ int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
+ int64_t best_pred_diff[REFERENCE_MODES];
+ int64_t best_pred_rd[REFERENCE_MODES];
+ MB_MODE_INFO best_mbmode;
+ int ref_index, best_ref_index = 0;
+ unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
+ unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
+ vpx_prob comp_mode_p;
+#if CONFIG_DUAL_FILTER
+ INTERP_FILTER tmp_best_filter[4] = { 0 };
+#else
+ INTERP_FILTER tmp_best_filter = SWITCHABLE;
+#endif
+ int rate_uv_intra, rate_uv_tokenonly;
+ int64_t dist_uv;
+ int skip_uv;
+ PREDICTION_MODE mode_uv = DC_PRED;
+ const int intra_cost_penalty = vp10_get_intra_cost_penalty(
+ cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
+#if CONFIG_EXT_INTER
+ int_mv seg_mvs[4][2][TOTAL_REFS_PER_FRAME];
+#else
+ int_mv seg_mvs[4][TOTAL_REFS_PER_FRAME];
+#endif // CONFIG_EXT_INTER
+ b_mode_info best_bmodes[4];
+ int best_skip2 = 0;
+ int ref_frame_skip_mask[2] = { 0 };
+ int internal_active_edge =
+ vp10_active_edge_sb(cpi, mi_row, mi_col) && vp10_internal_image_edge(cpi);
+
+#if CONFIG_SUPERTX
+ best_rd_so_far = INT64_MAX;
+ best_rd = best_rd_so_far;
+ best_yrd = best_rd_so_far;
+#endif // CONFIG_SUPERTX
+ vp10_zero(best_mbmode);
+
+#if CONFIG_EXT_INTRA
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[0] = 0;
+ mbmi->ext_intra_mode_info.use_ext_intra_mode[1] = 0;
+#endif // CONFIG_EXT_INTRA
+ mbmi->motion_variation = SIMPLE_TRANSLATION;
+#if CONFIG_EXT_INTER
+ mbmi->use_wedge_interinter = 0;
+ mbmi->use_wedge_interintra = 0;
+#endif // CONFIG_EXT_INTER
+
+ for (i = 0; i < 4; i++) {
+ int j;
+#if CONFIG_EXT_INTER
+ int k;
+
+ for (k = 0; k < 2; k++)
+ for (j = 0; j < TOTAL_REFS_PER_FRAME; j++)
+ seg_mvs[i][k][j].as_int = INVALID_MV;
+#else
+ for (j = 0; j < TOTAL_REFS_PER_FRAME; j++)
+ seg_mvs[i][j].as_int = INVALID_MV;
+#endif // CONFIG_EXT_INTER
+ }
+
+ estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
+ &comp_mode_p);
+
+ for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
+ rate_uv_intra = INT_MAX;
+
+ rd_cost->rate = INT_MAX;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = INT_MAX;
+#endif
+
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
+ x->mbmi_ext->mode_context[ref_frame] = 0;
+#if CONFIG_REF_MV && CONFIG_EXT_INTER
+ x->mbmi_ext->compound_mode_context[ref_frame] = 0;
+#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
+ if (cpi->ref_frame_flags & flag_list[ref_frame]) {
+ setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
+ frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
+ } else {
+ ref_frame_skip_mask[0] |= (1 << ref_frame);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ }
+ frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
+#if CONFIG_EXT_INTER
+ frame_mv[NEWFROMNEARMV][ref_frame].as_int = INVALID_MV;
+#endif // CONFIG_EXT_INTER
+ frame_mv[ZEROMV][ref_frame].as_int = 0;
+ }
+
+ mbmi->palette_mode_info.palette_size[0] = 0;
+ mbmi->palette_mode_info.palette_size[1] = 0;
+
+ for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
+ int mode_excluded = 0;
+ int64_t this_rd = INT64_MAX;
+ int disable_skip = 0;
+ int compmode_cost = 0;
+ int rate2 = 0, rate_y = 0, rate_uv = 0;
+ int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
+ int skippable = 0;
+ int i;
+ int this_skip2 = 0;
+ int64_t total_sse = INT_MAX;
+ int early_term = 0;
+
+ ref_frame = vp10_ref_order[ref_index].ref_frame[0];
+ second_ref_frame = vp10_ref_order[ref_index].ref_frame[1];
+
+ // Look at the reference frame of the best mode so far and set the
+ // skip mask to look at a subset of the remaining modes.
+ if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
+ if (ref_index == 3) {
+ switch (best_mbmode.ref_frame[0]) {
+ case INTRA_FRAME: break;
+ case LAST_FRAME:
+ ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) |
+#if CONFIG_EXT_REFS
+ (1 << LAST2_FRAME) | (1 << LAST3_FRAME) |
+ (1 << BWDREF_FRAME) |
+#endif // CONFIG_EXT_REFS
+ (1 << ALTREF_FRAME);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+#if CONFIG_EXT_REFS
+ case LAST2_FRAME:
+ ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << LAST3_FRAME) |
+ (1 << GOLDEN_FRAME) |
+ (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+ case LAST3_FRAME:
+ ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << LAST2_FRAME) |
+ (1 << GOLDEN_FRAME) |
+ (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+#endif // CONFIG_EXT_REFS
+ case GOLDEN_FRAME:
+ ref_frame_skip_mask[0] |= (1 << LAST_FRAME) |
+#if CONFIG_EXT_REFS
+ (1 << LAST2_FRAME) | (1 << LAST3_FRAME) |
+ (1 << BWDREF_FRAME) |
+#endif // CONFIG_EXT_REFS
+ (1 << ALTREF_FRAME);
+ ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
+ break;
+#if CONFIG_EXT_REFS
+ case BWDREF_FRAME:
+ ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << LAST2_FRAME) |
+ (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) |
+ (1 << ALTREF_FRAME);
+ ref_frame_skip_mask[1] |= (1 << ALTREF_FRAME) | 0x01;
+ break;
+#endif // CONFIG_EXT_REFS
+ case ALTREF_FRAME:
+ ref_frame_skip_mask[0] |= (1 << LAST_FRAME) |
+#if CONFIG_EXT_REFS
+ (1 << LAST2_FRAME) | (1 << LAST3_FRAME) |
+ (1 << BWDREF_FRAME) |
+#endif // CONFIG_EXT_REFS
+ (1 << GOLDEN_FRAME);
+#if CONFIG_EXT_REFS
+ ref_frame_skip_mask[1] |= (1 << BWDREF_FRAME) | 0x01;
+#endif // CONFIG_EXT_REFS
+ break;
+ case NONE:
+ case TOTAL_REFS_PER_FRAME:
+ assert(0 && "Invalid Reference frame");
+ break;
+ }
+ }
+ }
+
+ if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
+ (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
+ continue;
+
+ // Test best rd so far against threshold for trying this mode.
+ if (!internal_active_edge &&
+ rd_less_than_thresh(best_rd,
+ rd_opt->threshes[segment_id][bsize][ref_index],
+ tile_data->thresh_freq_fact[bsize][ref_index]))
+ continue;
+
+ comp_pred = second_ref_frame > INTRA_FRAME;
+ if (comp_pred) {
+ if (!cpi->allow_comp_inter_inter) continue;
+ if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
+ // Do not allow compound prediction if the segment level reference frame
+ // feature is in use as in this case there can only be one reference.
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
+
+ if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
+ best_mbmode.ref_frame[0] == INTRA_FRAME)
+ continue;
+ }
+
+ // TODO(jingning, jkoleszar): scaling reference frame not supported for
+ // sub8x8 blocks.
+ if (ref_frame > INTRA_FRAME &&
+ vp10_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
+ continue;
+
+ if (second_ref_frame > INTRA_FRAME &&
+ vp10_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
+ continue;
+
+ if (comp_pred)
+ mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
+ else if (ref_frame != INTRA_FRAME)
+ mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
+
+ // If the segment reference frame feature is enabled....
+ // then do nothing if the current ref frame is not allowed..
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
+ continue;
+ // Disable this drop out case if the ref frame
+ // segment level feature is enabled for this segment. This is to
+ // prevent the possibility that we end up unable to pick any mode.
+ } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
+ // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
+ // unless ARNR filtering is enabled in which case we want
+ // an unfiltered alternative. We allow near/nearest as well
+ // because they may result in zero-zero MVs but be cheaper.
+ if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
+ continue;
+ }
+
+ mbmi->tx_size = TX_4X4;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ref_frame[0] = ref_frame;
+ mbmi->ref_frame[1] = second_ref_frame;
+// Evaluate all sub-pel filters irrespective of whether we can use
+// them for this frame.
+#if CONFIG_DUAL_FILTER
+ for (i = 0; i < 4; ++i)
+ mbmi->interp_filter[i] = cm->interp_filter == SWITCHABLE
+ ? EIGHTTAP_REGULAR
+ : cm->interp_filter;
+#else
+ mbmi->interp_filter =
+ cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR : cm->interp_filter;
+#endif
+ x->skip = 0;
+ set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
+
+ // Select prediction reference frames.
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
+ if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
+ }
+
+#if CONFIG_VAR_TX
+ mbmi->inter_tx_size[0][0] = mbmi->tx_size;
+#endif
+
+ if (ref_frame == INTRA_FRAME) {
+ int rate;
+ if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y,
+ best_rd) >= best_rd)
+ continue;
+ rate2 += rate;
+ rate2 += intra_cost_penalty;
+ distortion2 += distortion_y;
+
+ if (rate_uv_intra == INT_MAX) {
+ choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4, &rate_uv_intra,
+ &rate_uv_tokenonly, &dist_uv, &skip_uv, &mode_uv);
+ }
+ rate2 += rate_uv_intra;
+ rate_uv = rate_uv_tokenonly;
+ distortion2 += dist_uv;
+ distortion_uv = dist_uv;
+ mbmi->uv_mode = mode_uv;
+ } else {
+ int rate;
+ int64_t distortion;
+ int64_t this_rd_thresh;
+ int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
+ int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
+ int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
+ int tmp_best_skippable = 0;
+ int switchable_filter_index;
+ int_mv *second_ref =
+ comp_pred ? &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
+ b_mode_info tmp_best_bmodes[16]; // Should this be 4 ?
+ MB_MODE_INFO tmp_best_mbmode;
+#if CONFIG_DUAL_FILTER
+#if CONFIG_EXT_INTERP
+ BEST_SEG_INFO bsi[25];
+#else
+ BEST_SEG_INFO bsi[9];
+#endif
+#else
+ BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
+#endif
+ int pred_exists = 0;
+ int uv_skippable;
+#if CONFIG_EXT_INTER
+ int_mv compound_seg_newmvs[4][2];
+ for (i = 0; i < 4; i++) {
+ compound_seg_newmvs[i][0].as_int = INVALID_MV;
+ compound_seg_newmvs[i][1].as_int = INVALID_MV;
+ }
+#endif // CONFIG_EXT_INTER
+
+ this_rd_thresh = (ref_frame == LAST_FRAME)
+ ? rd_opt->threshes[segment_id][bsize][THR_LAST]
+ : rd_opt->threshes[segment_id][bsize][THR_ALTR];
+#if CONFIG_EXT_REFS
+ this_rd_thresh = (ref_frame == LAST2_FRAME)
+ ? rd_opt->threshes[segment_id][bsize][THR_LAST2]
+ : this_rd_thresh;
+ this_rd_thresh = (ref_frame == LAST3_FRAME)
+ ? rd_opt->threshes[segment_id][bsize][THR_LAST3]
+ : this_rd_thresh;
+#endif // CONFIG_EXT_REFS
+ this_rd_thresh = (ref_frame == GOLDEN_FRAME)
+ ? rd_opt->threshes[segment_id][bsize][THR_GOLD]
+ : this_rd_thresh;
+#if CONFIG_EXT_REFS
+// TODO(zoeliu): To explore whether this_rd_thresh should consider
+// BWDREF_FRAME and ALTREF_FRAME
+#endif // CONFIG_EXT_REFS
+
+ // TODO(any): Add search of the tx_type to improve rd performance at the
+ // expense of speed.
+ mbmi->tx_type = DCT_DCT;
+
+ if (cm->interp_filter != BILINEAR) {
+#if CONFIG_DUAL_FILTER
+ tmp_best_filter[0] = EIGHTTAP_REGULAR;
+ tmp_best_filter[1] = EIGHTTAP_REGULAR;
+ tmp_best_filter[2] = EIGHTTAP_REGULAR;
+ tmp_best_filter[3] = EIGHTTAP_REGULAR;
+#else
+ tmp_best_filter = EIGHTTAP_REGULAR;
+#endif
+ if (x->source_variance < sf->disable_filter_search_var_thresh) {
+#if CONFIG_DUAL_FILTER
+ tmp_best_filter[0] = EIGHTTAP_REGULAR;
+#else
+ tmp_best_filter = EIGHTTAP_REGULAR;
+#endif
+ } else if (sf->adaptive_pred_interp_filter == 1 &&
+ ctx->pred_interp_filter < SWITCHABLE) {
+#if CONFIG_DUAL_FILTER
+ tmp_best_filter[0] = ctx->pred_interp_filter;
+#else
+ tmp_best_filter = ctx->pred_interp_filter;
+#endif
+ } else if (sf->adaptive_pred_interp_filter == 2) {
+#if CONFIG_DUAL_FILTER
+ tmp_best_filter[0] = ctx->pred_interp_filter < SWITCHABLE
+ ? ctx->pred_interp_filter
+ : 0;
+#else
+ tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE
+ ? ctx->pred_interp_filter
+ : 0;
+#endif
+ } else {
+#if CONFIG_DUAL_FILTER
+ for (switchable_filter_index = 0;
+#if CONFIG_EXT_INTERP
+ switchable_filter_index < 25;
+#else
+ switchable_filter_index < 9;
+#endif
+ ++switchable_filter_index) {
+#else
+ for (switchable_filter_index = 0;
+ switchable_filter_index < SWITCHABLE_FILTERS;
+ ++switchable_filter_index) {
+#endif
+ int newbest, rs;
+ int64_t rs_rd;
+ MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
+#if CONFIG_DUAL_FILTER
+ mbmi->interp_filter[0] = filter_sets[switchable_filter_index][0];
+ mbmi->interp_filter[1] = filter_sets[switchable_filter_index][1];
+ mbmi->interp_filter[2] = filter_sets[switchable_filter_index][0];
+ mbmi->interp_filter[3] = filter_sets[switchable_filter_index][1];
+#else
+ mbmi->interp_filter = switchable_filter_index;
+#endif
+ tmp_rd = rd_pick_best_sub8x8_mode(
+ cpi, x, &mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd,
+ &rate, &rate_y, &distortion, &skippable, &total_sse,
+ (int)this_rd_thresh, seg_mvs,
+#if CONFIG_EXT_INTER
+ compound_seg_newmvs,
+#endif // CONFIG_EXT_INTER
+ bsi, switchable_filter_index, mi_row, mi_col);
+#if CONFIG_EXT_INTERP
+#if CONFIG_DUAL_FILTER
+ if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
+ (mbmi->interp_filter[0] != EIGHTTAP_REGULAR ||
+ mbmi->interp_filter[1] != EIGHTTAP_REGULAR)) // invalid config
+ continue;
+#else
+ if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
+ mbmi->interp_filter != EIGHTTAP_REGULAR) // invalid config
+ continue;
+#endif
+#endif // CONFIG_EXT_INTERP
+ if (tmp_rd == INT64_MAX) continue;
+ rs = vp10_get_switchable_rate(cpi, xd);
+ rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
+ if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd;
+
+ newbest = (tmp_rd < tmp_best_rd);
+ if (newbest) {
+#if CONFIG_DUAL_FILTER
+ tmp_best_filter[0] = mbmi->interp_filter[0];
+ tmp_best_filter[1] = mbmi->interp_filter[1];
+ tmp_best_filter[2] = mbmi->interp_filter[2];
+ tmp_best_filter[3] = mbmi->interp_filter[3];
+#else
+ tmp_best_filter = mbmi->interp_filter;
+#endif
+ tmp_best_rd = tmp_rd;
+ }
+ if ((newbest && cm->interp_filter == SWITCHABLE) ||
+ (
+#if CONFIG_DUAL_FILTER
+ mbmi->interp_filter[0] == cm->interp_filter
+#else
+ mbmi->interp_filter == cm->interp_filter
+#endif
+ && cm->interp_filter != SWITCHABLE)) {
+ tmp_best_rdu = tmp_rd;
+ tmp_best_rate = rate;
+ tmp_best_ratey = rate_y;
+ tmp_best_distortion = distortion;
+ tmp_best_sse = total_sse;
+ tmp_best_skippable = skippable;
+ tmp_best_mbmode = *mbmi;
+ for (i = 0; i < 4; i++) {
+ tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
+ }
+ pred_exists = 1;
+ }
+ } // switchable_filter_index loop
+ }
+ }
+
+ if (tmp_best_rdu == INT64_MAX && pred_exists) continue;
+
+#if CONFIG_DUAL_FILTER
+ mbmi->interp_filter[0] =
+ (cm->interp_filter == SWITCHABLE ? tmp_best_filter[0]
+ : cm->interp_filter);
+ mbmi->interp_filter[1] =
+ (cm->interp_filter == SWITCHABLE ? tmp_best_filter[1]
+ : cm->interp_filter);
+ mbmi->interp_filter[2] =
+ (cm->interp_filter == SWITCHABLE ? tmp_best_filter[2]
+ : cm->interp_filter);
+ mbmi->interp_filter[3] =
+ (cm->interp_filter == SWITCHABLE ? tmp_best_filter[3]
+ : cm->interp_filter);
+#else
+ mbmi->interp_filter =
+ (cm->interp_filter == SWITCHABLE ? tmp_best_filter
+ : cm->interp_filter);
+#endif
+
+ if (!pred_exists) {
+ // Handles the special case when a filter that is not in the
+ // switchable list (bilinear) is indicated at the frame level
+ tmp_rd = rd_pick_best_sub8x8_mode(
+ cpi, x, &x->mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd,
+ &rate, &rate_y, &distortion, &skippable, &total_sse,
+ (int)this_rd_thresh, seg_mvs,
+#if CONFIG_EXT_INTER
+ compound_seg_newmvs,
+#endif // CONFIG_EXT_INTER
+ bsi, 0, mi_row, mi_col);
+#if CONFIG_EXT_INTERP
+#if CONFIG_DUAL_FILTER
+ if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
+ (mbmi->interp_filter[0] != EIGHTTAP_REGULAR ||
+ mbmi->interp_filter[1] != EIGHTTAP_REGULAR)) {
+ mbmi->interp_filter[0] = EIGHTTAP_REGULAR;
+ mbmi->interp_filter[1] = EIGHTTAP_REGULAR;
+ }
+#else
+ if (!vp10_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE &&
+ mbmi->interp_filter != EIGHTTAP_REGULAR)
+ mbmi->interp_filter = EIGHTTAP_REGULAR;
+#endif // CONFIG_DUAL_FILTER
+#endif // CONFIG_EXT_INTERP
+ if (tmp_rd == INT64_MAX) continue;
+ } else {
+ total_sse = tmp_best_sse;
+ rate = tmp_best_rate;
+ rate_y = tmp_best_ratey;
+ distortion = tmp_best_distortion;
+ skippable = tmp_best_skippable;
+ *mbmi = tmp_best_mbmode;
+ for (i = 0; i < 4; i++) xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
+ }
+ // Add in the cost of the transform type
+ if (!xd->lossless[mbmi->segment_id]) {
+ int rate_tx_type = 0;
+#if CONFIG_EXT_TX
+ if (get_ext_tx_types(mbmi->tx_size, bsize, 1) > 1) {
+ const int eset = get_ext_tx_set(mbmi->tx_size, bsize, 1);
+ rate_tx_type =
+ cpi->inter_tx_type_costs[eset][mbmi->tx_size][mbmi->tx_type];
+ }
+#else
+ if (mbmi->tx_size < TX_32X32) {
+ rate_tx_type = cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
+ }
+#endif
+ rate += rate_tx_type;
+ rate_y += rate_tx_type;
+ }
+
+ rate2 += rate;
+ distortion2 += distortion;
+
+ if (cm->interp_filter == SWITCHABLE)
+ rate2 += vp10_get_switchable_rate(cpi, xd);
+
+ if (!mode_excluded)
+ mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
+ : cm->reference_mode == COMPOUND_REFERENCE;
+
+ compmode_cost = vp10_cost_bit(comp_mode_p, comp_pred);
+
+ tmp_best_rdu =
+ best_rd - VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
+ RDCOST(x->rdmult, x->rddiv, 0, total_sse));
+
+ if (tmp_best_rdu > 0) {
+ // If even the 'Y' rd value of split is higher than best so far
+ // then dont bother looking at UV
+ vp10_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8);
+#if CONFIG_VAR_TX
+ if (!inter_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
+ &uv_sse, BLOCK_8X8, tmp_best_rdu))
+ continue;
+#else
+ if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
+ &uv_sse, BLOCK_8X8, tmp_best_rdu))
+ continue;
+#endif
+ rate2 += rate_uv;
+ distortion2 += distortion_uv;
+ skippable = skippable && uv_skippable;
+ total_sse += uv_sse;
+ } else {
+ continue;
+ }
+ }
+
+ if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
+
+ // Estimate the reference frame signaling cost and add it
+ // to the rolling cost variable.
+ if (second_ref_frame > INTRA_FRAME) {
+ rate2 += ref_costs_comp[ref_frame];
+#if CONFIG_EXT_REFS
+ rate2 += ref_costs_comp[second_ref_frame];
+#endif // CONFIG_EXT_REFS
+ } else {
+ rate2 += ref_costs_single[ref_frame];
+ }
+
+ if (!disable_skip) {
+ // Skip is never coded at the segment level for sub8x8 blocks and instead
+ // always coded in the bitstream at the mode info level.
+
+ if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
+ if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
+ RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
+ // Add in the cost of the no skip flag.
+ rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
+ } else {
+ // FIXME(rbultje) make this work for splitmv also
+ rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
+ distortion2 = total_sse;
+ assert(total_sse >= 0);
+ rate2 -= (rate_y + rate_uv);
+ rate_y = 0;
+ rate_uv = 0;
+ this_skip2 = 1;
+ }
+ } else {
+ // Add in the cost of the no skip flag.
+ rate2 += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
+ }
+
+ // Calculate the final RD estimate for this mode.
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ }
+
+ if (!disable_skip && ref_frame == INTRA_FRAME) {
+ for (i = 0; i < REFERENCE_MODES; ++i)
+ best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
+ }
+
+ // Did this mode help.. i.e. is it the new best mode
+ if (this_rd < best_rd || x->skip) {
+ if (!mode_excluded) {
+ // Note index of best mode so far
+ best_ref_index = ref_index;
+
+ if (ref_frame == INTRA_FRAME) {
+ /* required for left and above block mv */
+ mbmi->mv[0].as_int = 0;
+ }
+
+ rd_cost->rate = rate2;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = rate2 - rate_y - rate_uv;
+ if (!disable_skip)
+ *returnrate_nocoef -=
+ vp10_cost_bit(vp10_get_skip_prob(cm, xd), this_skip2);
+ *returnrate_nocoef -= vp10_cost_bit(vp10_get_intra_inter_prob(cm, xd),
+ mbmi->ref_frame[0] != INTRA_FRAME);
+ assert(*returnrate_nocoef > 0);
+#endif // CONFIG_SUPERTX
+ rd_cost->dist = distortion2;
+ rd_cost->rdcost = this_rd;
+ best_rd = this_rd;
+ best_yrd =
+ best_rd - RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
+ best_mbmode = *mbmi;
+ best_skip2 = this_skip2;
+
+#if CONFIG_VAR_TX
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ memset(ctx->blk_skip[i], 0, sizeof(uint8_t) * ctx->num_4x4_blk);
+#endif
+
+ for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i];
+
+ // TODO(debargha): enhance this test with a better distortion prediction
+ // based on qp, activity mask and history
+ if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
+ (ref_index > MIN_EARLY_TERM_INDEX)) {
+ int qstep = xd->plane[0].dequant[1];
+ // TODO(debargha): Enhance this by specializing for each mode_index
+ int scale = 4;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ qstep >>= (xd->bd - 8);
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ if (x->source_variance < UINT_MAX) {
+ const int var_adjust = (x->source_variance < 16);
+ scale -= var_adjust;
+ }
+ if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) {
+ early_term = 1;
+ }
+ }
+ }
+ }
+
+ /* keep record of best compound/single-only prediction */
+ if (!disable_skip && ref_frame != INTRA_FRAME) {
+ int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
+
+ if (cm->reference_mode == REFERENCE_MODE_SELECT) {
+ single_rate = rate2 - compmode_cost;
+ hybrid_rate = rate2;
+ } else {
+ single_rate = rate2;
+ hybrid_rate = rate2 + compmode_cost;
+ }
+
+ single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
+ hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
+
+ if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
+ best_pred_rd[SINGLE_REFERENCE] = single_rd;
+ else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
+ best_pred_rd[COMPOUND_REFERENCE] = single_rd;
+
+ if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
+ best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
+ }
+
+ if (early_term) break;
+
+ if (x->skip && !comp_pred) break;
+ }
+
+ if (best_rd >= best_rd_so_far) {
+ rd_cost->rate = INT_MAX;
+ rd_cost->rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = INT_MAX;
+#endif // CONFIG_SUPERTX
+ return;
+ }
+
+ // If we used an estimate for the uv intra rd in the loop above...
+ if (sf->use_uv_intra_rd_estimate) {
+ // Do Intra UV best rd mode selection if best mode choice above was intra.
+ if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
+ *mbmi = best_mbmode;
+ rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra, &rate_uv_tokenonly,
+ &dist_uv, &skip_uv, BLOCK_8X8, TX_4X4);
+ }
+ }
+
+ if (best_rd == INT64_MAX) {
+ rd_cost->rate = INT_MAX;
+ rd_cost->dist = INT64_MAX;
+ rd_cost->rdcost = INT64_MAX;
+#if CONFIG_SUPERTX
+ *returnrate_nocoef = INT_MAX;
+#endif // CONFIG_SUPERTX
+ return;
+ }
+
+#if CONFIG_DUAL_FILTER
+ assert((cm->interp_filter == SWITCHABLE) ||
+ (cm->interp_filter == best_mbmode.interp_filter[0]) ||
+ !is_inter_block(&best_mbmode));
+#else
+ assert((cm->interp_filter == SWITCHABLE) ||
+ (cm->interp_filter == best_mbmode.interp_filter) ||
+ !is_inter_block(&best_mbmode));
+#endif
+
+ vp10_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
+ sf->adaptive_rd_thresh, bsize, best_ref_index);
+
+ // macroblock modes
+ *mbmi = best_mbmode;
+#if CONFIG_VAR_TX && CONFIG_EXT_TX && CONFIG_RECT_TX
+ mbmi->inter_tx_size[0][0] = mbmi->tx_size;
+#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
+
+ x->skip |= best_skip2;
+ if (!is_inter_block(&best_mbmode)) {
+ for (i = 0; i < 4; i++) xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
+ } else {
+ for (i = 0; i < 4; ++i)
+ memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
+
+ mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
+ mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
+#if CONFIG_REF_MV
+ mbmi->pred_mv[0].as_int = xd->mi[0]->bmi[3].pred_mv_s8[0].as_int;
+ mbmi->pred_mv[1].as_int = xd->mi[0]->bmi[3].pred_mv_s8[1].as_int;
+#endif
+ }
+
+ for (i = 0; i < REFERENCE_MODES; ++i) {
+ if (best_pred_rd[i] == INT64_MAX)
+ best_pred_diff[i] = INT_MIN;
+ else
+ best_pred_diff[i] = best_rd - best_pred_rd[i];
+ }
+
+ store_coding_context(x, ctx, best_ref_index, best_pred_diff, 0);
+}
+
+#if CONFIG_OBMC
+// This function has a structure similar to vp10_build_obmc_inter_prediction
+//
+// The OBMC predictor is computed as:
+//
+// PObmc(x,y) =
+// VPX_BLEND_A64(Mh(x),
+// VPX_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
+// PLeft(x, y))
+//
+// Scaling up by VPX_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
+// rounding, this can be written as:
+//
+// VPX_BLEND_A64_MAX_ALPHA * VPX_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
+// Mh(x) * Mv(y) * P(x,y) +
+// Mh(x) * Cv(y) * Pabove(x,y) +
+// VPX_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
+//
+// Where :
+//
+// Cv(y) = VPX_BLEND_A64_MAX_ALPHA - Mv(y)
+// Ch(y) = VPX_BLEND_A64_MAX_ALPHA - Mh(y)
+//
+// This function computes 'wsrc' and 'mask' as:
+//
+// wsrc(x, y) =
+// VPX_BLEND_A64_MAX_ALPHA * VPX_BLEND_A64_MAX_ALPHA * src(x, y) -
+// Mh(x) * Cv(y) * Pabove(x,y) +
+// VPX_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
+//
+// mask(x, y) = Mh(x) * Mv(y)
+//
+// These can then be used to efficiently approximate the error for any
+// predictor P in the context of the provided neighbouring predictors by
+// computing:
+//
+// error(x, y) =
+// wsrc(x, y) - mask(x, y) * P(x, y) / (VPX_BLEND_A64_MAX_ALPHA ** 2)
+//
+static void calc_target_weighted_pred(const VP10_COMMON *cm,
+ const MACROBLOCK *x,
+ const MACROBLOCKD *xd, int mi_row,
+ int mi_col, const uint8_t *above,
+ int above_stride, const uint8_t *left,
+ int left_stride, int32_t *mask_buf,
+ int32_t *wsrc_buf) {
+ const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ int row, col, i;
+ const int bw = 8 * xd->n8_w;
+ const int bh = 8 * xd->n8_h;
+ const int wsrc_stride = bw;
+ const int mask_stride = bw;
+ const int src_scale = VPX_BLEND_A64_MAX_ALPHA * VPX_BLEND_A64_MAX_ALPHA;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
+#else
+ const int is_hbd = 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // plane 0 should not be subsampled
+ assert(xd->plane[0].subsampling_x == 0);
+ assert(xd->plane[0].subsampling_y == 0);
+
+ vp10_zero_array(wsrc_buf, bw * bh);
+ for (i = 0; i < bw * bh; ++i) mask_buf[i] = VPX_BLEND_A64_MAX_ALPHA;
+
+ // handle above row
+ if (xd->up_available) {
+ const int overlap = num_4x4_blocks_high_lookup[bsize] * 2;
+ const int miw = VPXMIN(xd->n8_w, cm->mi_cols - mi_col);
+ const int mi_row_offset = -1;
+ const uint8_t *const mask1d = vp10_get_obmc_mask(overlap);
+
+ assert(miw > 0);
+
+ i = 0;
+ do { // for each mi in the above row
+ const int mi_col_offset = i;
+ const MB_MODE_INFO *const above_mbmi =
+ &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
+ const int mi_step =
+ VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
+ const int neighbor_bw = mi_step * MI_SIZE;
+
+ if (is_neighbor_overlappable(above_mbmi)) {
+ const int tmp_stride = above_stride;
+ int32_t *wsrc = wsrc_buf + (i * MI_SIZE);
+ int32_t *mask = mask_buf + (i * MI_SIZE);
+
+ if (!is_hbd) {
+ const uint8_t *tmp = above;
+
+ for (row = 0; row < overlap; ++row) {
+ const uint8_t m0 = mask1d[row];
+ const uint8_t m1 = VPX_BLEND_A64_MAX_ALPHA - m0;
+ for (col = 0; col < neighbor_bw; ++col) {
+ wsrc[col] = m1 * tmp[col];
+ mask[col] = m0;
+ }
+ wsrc += wsrc_stride;
+ mask += mask_stride;
+ tmp += tmp_stride;
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ } else {
+ const uint16_t *tmp = CONVERT_TO_SHORTPTR(above);
+
+ for (row = 0; row < overlap; ++row) {
+ const uint8_t m0 = mask1d[row];
+ const uint8_t m1 = VPX_BLEND_A64_MAX_ALPHA - m0;
+ for (col = 0; col < neighbor_bw; ++col) {
+ wsrc[col] = m1 * tmp[col];
+ mask[col] = m0;
+ }
+ wsrc += wsrc_stride;
+ mask += mask_stride;
+ tmp += tmp_stride;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ }
+
+ above += neighbor_bw;
+ i += mi_step;
+ } while (i < miw);
+ }
+
+ for (i = 0; i < bw * bh; ++i) {
+ wsrc_buf[i] *= VPX_BLEND_A64_MAX_ALPHA;
+ mask_buf[i] *= VPX_BLEND_A64_MAX_ALPHA;
+ }
+
+ // handle left column
+ if (xd->left_available) {
+ const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2;
+ const int mih = VPXMIN(xd->n8_h, cm->mi_rows - mi_row);
+ const int mi_col_offset = -1;
+ const uint8_t *const mask1d = vp10_get_obmc_mask(overlap);
+
+ assert(mih > 0);
+
+ i = 0;
+ do { // for each mi in the left column
+ const int mi_row_offset = i;
+ const MB_MODE_INFO *const left_mbmi =
+ &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
+ const int mi_step =
+ VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
+ const int neighbor_bh = mi_step * MI_SIZE;
+
+ if (is_neighbor_overlappable(left_mbmi)) {
+ const int tmp_stride = left_stride;
+ int32_t *wsrc = wsrc_buf + (i * MI_SIZE * wsrc_stride);
+ int32_t *mask = mask_buf + (i * MI_SIZE * mask_stride);
+
+ if (!is_hbd) {
+ const uint8_t *tmp = left;
+
+ for (row = 0; row < neighbor_bh; ++row) {
+ for (col = 0; col < overlap; ++col) {
+ const uint8_t m0 = mask1d[col];
+ const uint8_t m1 = VPX_BLEND_A64_MAX_ALPHA - m0;
+ wsrc[col] = (wsrc[col] >> VPX_BLEND_A64_ROUND_BITS) * m0 +
+ (tmp[col] << VPX_BLEND_A64_ROUND_BITS) * m1;
+ mask[col] = (mask[col] >> VPX_BLEND_A64_ROUND_BITS) * m0;
+ }
+ wsrc += wsrc_stride;
+ mask += mask_stride;
+ tmp += tmp_stride;
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ } else {
+ const uint16_t *tmp = CONVERT_TO_SHORTPTR(left);
+
+ for (row = 0; row < neighbor_bh; ++row) {
+ for (col = 0; col < overlap; ++col) {
+ const uint8_t m0 = mask1d[col];
+ const uint8_t m1 = VPX_BLEND_A64_MAX_ALPHA - m0;
+ wsrc[col] = (wsrc[col] >> VPX_BLEND_A64_ROUND_BITS) * m0 +
+ (tmp[col] << VPX_BLEND_A64_ROUND_BITS) * m1;
+ mask[col] = (mask[col] >> VPX_BLEND_A64_ROUND_BITS) * m0;
+ }
+ wsrc += wsrc_stride;
+ mask += mask_stride;
+ tmp += tmp_stride;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ }
+
+ left += neighbor_bh * left_stride;
+ i += mi_step;
+ } while (i < mih);
+ }
+
+ if (!is_hbd) {
+ const uint8_t *src = x->plane[0].src.buf;
+
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col) {
+ wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
+ }
+ wsrc_buf += wsrc_stride;
+ src += x->plane[0].src.stride;
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ } else {
+ const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
+
+ for (row = 0; row < bh; ++row) {
+ for (col = 0; col < bw; ++col) {
+ wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
+ }
+ wsrc_buf += wsrc_stride;
+ src += x->plane[0].src.stride;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+}
+#endif // CONFIG_OBMC
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
new file mode 100644
index 0000000..4ce2879
--- /dev/null
+++ b/av1/encoder/rdopt.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_RDOPT_H_
+#define VP10_ENCODER_RDOPT_H_
+
+#include "av1/common/blockd.h"
+
+#include "av1/encoder/block.h"
+#include "av1/encoder/context_tree.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct TileInfo;
+struct VP10_COMP;
+struct macroblock;
+struct RD_COST;
+
+void vp10_rd_pick_intra_mode_sb(struct VP10_COMP *cpi, struct macroblock *x,
+ struct RD_COST *rd_cost, BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx, int64_t best_rd);
+
+unsigned int vp10_get_sby_perpixel_variance(VP10_COMP *cpi,
+ const struct buf_2d *ref,
+ BLOCK_SIZE bs);
+#if CONFIG_VP9_HIGHBITDEPTH
+unsigned int vp10_high_get_sby_perpixel_variance(VP10_COMP *cpi,
+ const struct buf_2d *ref,
+ BLOCK_SIZE bs, int bd);
+#endif
+
+void vp10_rd_pick_inter_mode_sb(struct VP10_COMP *cpi,
+ struct TileDataEnc *tile_data,
+ struct macroblock *x, int mi_row, int mi_col,
+ struct RD_COST *rd_cost,
+#if CONFIG_SUPERTX
+ int *returnrate_nocoef,
+#endif // CONFIG_SUPERTX
+ BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far);
+
+void vp10_rd_pick_inter_mode_sb_seg_skip(
+ struct VP10_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x,
+ struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far);
+
+int vp10_internal_image_edge(struct VP10_COMP *cpi);
+int vp10_active_h_edge(struct VP10_COMP *cpi, int mi_row, int mi_step);
+int vp10_active_v_edge(struct VP10_COMP *cpi, int mi_col, int mi_step);
+int vp10_active_edge_sb(struct VP10_COMP *cpi, int mi_row, int mi_col);
+
+void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
+ struct TileDataEnc *tile_data,
+ struct macroblock *x, int mi_row,
+ int mi_col, struct RD_COST *rd_cost,
+#if CONFIG_SUPERTX
+ int *returnrate_nocoef,
+#endif // CONFIG_SUPERTX
+ BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far);
+
+#if CONFIG_SUPERTX
+#if CONFIG_VAR_TX
+void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
+ int blk_row, int blk_col, int plane, int block,
+ int plane_bsize, int coeff_ctx, int *rate,
+ int64_t *dist, int64_t *bsse, int *skip);
+#endif
+
+void vp10_txfm_rd_in_plane_supertx(MACROBLOCK *x, const VP10_COMP *cpi,
+ int *rate, int64_t *distortion,
+ int *skippable, int64_t *sse,
+ int64_t ref_best_rd, int plane,
+ BLOCK_SIZE bsize, TX_SIZE tx_size,
+ int use_fast_coef_casting);
+#endif // CONFIG_SUPERTX
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_RDOPT_H_
diff --git a/av1/encoder/resize.c b/av1/encoder/resize.c
new file mode 100644
index 0000000..e209b21
--- /dev/null
+++ b/av1/encoder/resize.c
@@ -0,0 +1,819 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#include "aom_dsp/vpx_dsp_common.h"
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#include "aom_ports/mem.h"
+#include "av1/common/common.h"
+#include "av1/encoder/resize.h"
+
+#define FILTER_BITS 7
+
+#define INTERP_TAPS 8
+#define SUBPEL_BITS 5
+#define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1)
+#define INTERP_PRECISION_BITS 32
+
+typedef int16_t interp_kernel[INTERP_TAPS];
+
+// Filters for interpolation (0.5-band) - note this also filters integer pels.
+static const interp_kernel filteredinterp_filters500[(1 << SUBPEL_BITS)] = {
+ { -3, 0, 35, 64, 35, 0, -3, 0 }, { -3, -1, 34, 64, 36, 1, -3, 0 },
+ { -3, -1, 32, 64, 38, 1, -3, 0 }, { -2, -2, 31, 63, 39, 2, -3, 0 },
+ { -2, -2, 29, 63, 41, 2, -3, 0 }, { -2, -2, 28, 63, 42, 3, -4, 0 },
+ { -2, -3, 27, 63, 43, 4, -4, 0 }, { -2, -3, 25, 62, 45, 5, -4, 0 },
+ { -2, -3, 24, 62, 46, 5, -4, 0 }, { -2, -3, 23, 61, 47, 6, -4, 0 },
+ { -2, -3, 21, 60, 49, 7, -4, 0 }, { -1, -4, 20, 60, 50, 8, -4, -1 },
+ { -1, -4, 19, 59, 51, 9, -4, -1 }, { -1, -4, 17, 58, 52, 10, -4, 0 },
+ { -1, -4, 16, 57, 53, 12, -4, -1 }, { -1, -4, 15, 56, 54, 13, -4, -1 },
+ { -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 13, 54, 56, 15, -4, -1 },
+ { -1, -4, 12, 53, 57, 16, -4, -1 }, { 0, -4, 10, 52, 58, 17, -4, -1 },
+ { -1, -4, 9, 51, 59, 19, -4, -1 }, { -1, -4, 8, 50, 60, 20, -4, -1 },
+ { 0, -4, 7, 49, 60, 21, -3, -2 }, { 0, -4, 6, 47, 61, 23, -3, -2 },
+ { 0, -4, 5, 46, 62, 24, -3, -2 }, { 0, -4, 5, 45, 62, 25, -3, -2 },
+ { 0, -4, 4, 43, 63, 27, -3, -2 }, { 0, -4, 3, 42, 63, 28, -2, -2 },
+ { 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 2, 39, 63, 31, -2, -2 },
+ { 0, -3, 1, 38, 64, 32, -1, -3 }, { 0, -3, 1, 36, 64, 34, -1, -3 }
+};
+
+// Filters for interpolation (0.625-band) - note this also filters integer pels.
+static const interp_kernel filteredinterp_filters625[(1 << SUBPEL_BITS)] = {
+ { -1, -8, 33, 80, 33, -8, -1, 0 }, { -1, -8, 30, 80, 35, -8, -1, 1 },
+ { -1, -8, 28, 80, 37, -7, -2, 1 }, { 0, -8, 26, 79, 39, -7, -2, 1 },
+ { 0, -8, 24, 79, 41, -7, -2, 1 }, { 0, -8, 22, 78, 43, -6, -2, 1 },
+ { 0, -8, 20, 78, 45, -5, -3, 1 }, { 0, -8, 18, 77, 48, -5, -3, 1 },
+ { 0, -8, 16, 76, 50, -4, -3, 1 }, { 0, -8, 15, 75, 52, -3, -4, 1 },
+ { 0, -7, 13, 74, 54, -3, -4, 1 }, { 0, -7, 11, 73, 56, -2, -4, 1 },
+ { 0, -7, 10, 71, 58, -1, -4, 1 }, { 1, -7, 8, 70, 60, 0, -5, 1 },
+ { 1, -6, 6, 68, 62, 1, -5, 1 }, { 1, -6, 5, 67, 63, 2, -5, 1 },
+ { 1, -6, 4, 65, 65, 4, -6, 1 }, { 1, -5, 2, 63, 67, 5, -6, 1 },
+ { 1, -5, 1, 62, 68, 6, -6, 1 }, { 1, -5, 0, 60, 70, 8, -7, 1 },
+ { 1, -4, -1, 58, 71, 10, -7, 0 }, { 1, -4, -2, 56, 73, 11, -7, 0 },
+ { 1, -4, -3, 54, 74, 13, -7, 0 }, { 1, -4, -3, 52, 75, 15, -8, 0 },
+ { 1, -3, -4, 50, 76, 16, -8, 0 }, { 1, -3, -5, 48, 77, 18, -8, 0 },
+ { 1, -3, -5, 45, 78, 20, -8, 0 }, { 1, -2, -6, 43, 78, 22, -8, 0 },
+ { 1, -2, -7, 41, 79, 24, -8, 0 }, { 1, -2, -7, 39, 79, 26, -8, 0 },
+ { 1, -2, -7, 37, 80, 28, -8, -1 }, { 1, -1, -8, 35, 80, 30, -8, -1 },
+};
+
+// Filters for interpolation (0.75-band) - note this also filters integer pels.
+static const interp_kernel filteredinterp_filters750[(1 << SUBPEL_BITS)] = {
+ { 2, -11, 25, 96, 25, -11, 2, 0 }, { 2, -11, 22, 96, 28, -11, 2, 0 },
+ { 2, -10, 19, 95, 31, -11, 2, 0 }, { 2, -10, 17, 95, 34, -12, 2, 0 },
+ { 2, -9, 14, 94, 37, -12, 2, 0 }, { 2, -8, 12, 93, 40, -12, 1, 0 },
+ { 2, -8, 9, 92, 43, -12, 1, 1 }, { 2, -7, 7, 91, 46, -12, 1, 0 },
+ { 2, -7, 5, 90, 49, -12, 1, 0 }, { 2, -6, 3, 88, 52, -12, 0, 1 },
+ { 2, -5, 1, 86, 55, -12, 0, 1 }, { 2, -5, -1, 84, 58, -11, 0, 1 },
+ { 2, -4, -2, 82, 61, -11, -1, 1 }, { 2, -4, -4, 80, 64, -10, -1, 1 },
+ { 1, -3, -5, 77, 67, -9, -1, 1 }, { 1, -3, -6, 75, 70, -8, -2, 1 },
+ { 1, -2, -7, 72, 72, -7, -2, 1 }, { 1, -2, -8, 70, 75, -6, -3, 1 },
+ { 1, -1, -9, 67, 77, -5, -3, 1 }, { 1, -1, -10, 64, 80, -4, -4, 2 },
+ { 1, -1, -11, 61, 82, -2, -4, 2 }, { 1, 0, -11, 58, 84, -1, -5, 2 },
+ { 1, 0, -12, 55, 86, 1, -5, 2 }, { 1, 0, -12, 52, 88, 3, -6, 2 },
+ { 0, 1, -12, 49, 90, 5, -7, 2 }, { 0, 1, -12, 46, 91, 7, -7, 2 },
+ { 1, 1, -12, 43, 92, 9, -8, 2 }, { 0, 1, -12, 40, 93, 12, -8, 2 },
+ { 0, 2, -12, 37, 94, 14, -9, 2 }, { 0, 2, -12, 34, 95, 17, -10, 2 },
+ { 0, 2, -11, 31, 95, 19, -10, 2 }, { 0, 2, -11, 28, 96, 22, -11, 2 }
+};
+
+// Filters for interpolation (0.875-band) - note this also filters integer pels.
+static const interp_kernel filteredinterp_filters875[(1 << SUBPEL_BITS)] = {
+ { 3, -8, 13, 112, 13, -8, 3, 0 }, { 3, -7, 10, 112, 17, -9, 3, -1 },
+ { 2, -6, 7, 111, 21, -9, 3, -1 }, { 2, -5, 4, 111, 24, -10, 3, -1 },
+ { 2, -4, 1, 110, 28, -11, 3, -1 }, { 1, -3, -1, 108, 32, -12, 4, -1 },
+ { 1, -2, -3, 106, 36, -13, 4, -1 }, { 1, -1, -6, 105, 40, -14, 4, -1 },
+ { 1, -1, -7, 102, 44, -14, 4, -1 }, { 1, 0, -9, 100, 48, -15, 4, -1 },
+ { 1, 1, -11, 97, 53, -16, 4, -1 }, { 0, 1, -12, 95, 57, -16, 4, -1 },
+ { 0, 2, -13, 91, 61, -16, 4, -1 }, { 0, 2, -14, 88, 65, -16, 4, -1 },
+ { 0, 3, -15, 84, 69, -17, 4, 0 }, { 0, 3, -16, 81, 73, -16, 3, 0 },
+ { 0, 3, -16, 77, 77, -16, 3, 0 }, { 0, 3, -16, 73, 81, -16, 3, 0 },
+ { 0, 4, -17, 69, 84, -15, 3, 0 }, { -1, 4, -16, 65, 88, -14, 2, 0 },
+ { -1, 4, -16, 61, 91, -13, 2, 0 }, { -1, 4, -16, 57, 95, -12, 1, 0 },
+ { -1, 4, -16, 53, 97, -11, 1, 1 }, { -1, 4, -15, 48, 100, -9, 0, 1 },
+ { -1, 4, -14, 44, 102, -7, -1, 1 }, { -1, 4, -14, 40, 105, -6, -1, 1 },
+ { -1, 4, -13, 36, 106, -3, -2, 1 }, { -1, 4, -12, 32, 108, -1, -3, 1 },
+ { -1, 3, -11, 28, 110, 1, -4, 2 }, { -1, 3, -10, 24, 111, 4, -5, 2 },
+ { -1, 3, -9, 21, 111, 7, -6, 2 }, { -1, 3, -9, 17, 112, 10, -7, 3 }
+};
+
+// Filters for interpolation (full-band) - no filtering for integer pixels
+static const interp_kernel filteredinterp_filters1000[(1 << SUBPEL_BITS)] = {
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 1, -3, 128, 3, -1, 0, 0 },
+ { -1, 2, -6, 127, 7, -2, 1, 0 }, { -1, 3, -9, 126, 12, -4, 1, 0 },
+ { -1, 4, -12, 125, 16, -5, 1, 0 }, { -1, 4, -14, 123, 20, -6, 2, 0 },
+ { -1, 5, -15, 120, 25, -8, 2, 0 }, { -1, 5, -17, 118, 30, -9, 3, -1 },
+ { -1, 6, -18, 114, 35, -10, 3, -1 }, { -1, 6, -19, 111, 41, -12, 3, -1 },
+ { -1, 6, -20, 107, 46, -13, 4, -1 }, { -1, 6, -21, 103, 52, -14, 4, -1 },
+ { -1, 6, -21, 99, 57, -16, 5, -1 }, { -1, 6, -21, 94, 63, -17, 5, -1 },
+ { -1, 6, -20, 89, 68, -18, 5, -1 }, { -1, 6, -20, 84, 73, -19, 6, -1 },
+ { -1, 6, -20, 79, 79, -20, 6, -1 }, { -1, 6, -19, 73, 84, -20, 6, -1 },
+ { -1, 5, -18, 68, 89, -20, 6, -1 }, { -1, 5, -17, 63, 94, -21, 6, -1 },
+ { -1, 5, -16, 57, 99, -21, 6, -1 }, { -1, 4, -14, 52, 103, -21, 6, -1 },
+ { -1, 4, -13, 46, 107, -20, 6, -1 }, { -1, 3, -12, 41, 111, -19, 6, -1 },
+ { -1, 3, -10, 35, 114, -18, 6, -1 }, { -1, 3, -9, 30, 118, -17, 5, -1 },
+ { 0, 2, -8, 25, 120, -15, 5, -1 }, { 0, 2, -6, 20, 123, -14, 4, -1 },
+ { 0, 1, -5, 16, 125, -12, 4, -1 }, { 0, 1, -4, 12, 126, -9, 3, -1 },
+ { 0, 1, -2, 7, 127, -6, 2, -1 }, { 0, 0, -1, 3, 128, -3, 1, 0 }
+};
+
+// Filters for factor of 2 downsampling.
+static const int16_t vp10_down2_symeven_half_filter[] = { 56, 12, -3, -1 };
+static const int16_t vp10_down2_symodd_half_filter[] = { 64, 35, 0, -3 };
+
+static const interp_kernel *choose_interp_filter(int inlength, int outlength) {
+ int outlength16 = outlength * 16;
+ if (outlength16 >= inlength * 16)
+ return filteredinterp_filters1000;
+ else if (outlength16 >= inlength * 13)
+ return filteredinterp_filters875;
+ else if (outlength16 >= inlength * 11)
+ return filteredinterp_filters750;
+ else if (outlength16 >= inlength * 9)
+ return filteredinterp_filters625;
+ else
+ return filteredinterp_filters500;
+}
+
+static void interpolate(const uint8_t *const input, int inlength,
+ uint8_t *output, int outlength) {
+ const int64_t delta =
+ (((uint64_t)inlength << 32) + outlength / 2) / outlength;
+ const int64_t offset =
+ inlength > outlength
+ ? (((int64_t)(inlength - outlength) << 31) + outlength / 2) /
+ outlength
+ : -(((int64_t)(outlength - inlength) << 31) + outlength / 2) /
+ outlength;
+ uint8_t *optr = output;
+ int x, x1, x2, sum, k, int_pel, sub_pel;
+ int64_t y;
+
+ const interp_kernel *interp_filters =
+ choose_interp_filter(inlength, outlength);
+
+ x = 0;
+ y = offset;
+ while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) {
+ x++;
+ y += delta;
+ }
+ x1 = x;
+ x = outlength - 1;
+ y = delta * x + offset;
+ while ((y >> INTERP_PRECISION_BITS) + (int64_t)(INTERP_TAPS / 2) >=
+ inlength) {
+ x--;
+ y -= delta;
+ }
+ x2 = x;
+ if (x1 > x2) {
+ for (x = 0, y = offset; x < outlength; ++x, y += delta) {
+ const int16_t *filter;
+ int_pel = y >> INTERP_PRECISION_BITS;
+ sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
+ filter = interp_filters[sub_pel];
+ sum = 0;
+ for (k = 0; k < INTERP_TAPS; ++k) {
+ const int pk = int_pel - INTERP_TAPS / 2 + 1 + k;
+ sum += filter[k] *
+ input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))];
+ }
+ *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ } else {
+ // Initial part.
+ for (x = 0, y = offset; x < x1; ++x, y += delta) {
+ const int16_t *filter;
+ int_pel = y >> INTERP_PRECISION_BITS;
+ sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
+ filter = interp_filters[sub_pel];
+ sum = 0;
+ for (k = 0; k < INTERP_TAPS; ++k)
+ sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0
+ ? 0
+ : int_pel - INTERP_TAPS / 2 + 1 + k)];
+ *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ // Middle part.
+ for (; x <= x2; ++x, y += delta) {
+ const int16_t *filter;
+ int_pel = y >> INTERP_PRECISION_BITS;
+ sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
+ filter = interp_filters[sub_pel];
+ sum = 0;
+ for (k = 0; k < INTERP_TAPS; ++k)
+ sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k];
+ *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ // End part.
+ for (; x < outlength; ++x, y += delta) {
+ const int16_t *filter;
+ int_pel = y >> INTERP_PRECISION_BITS;
+ sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
+ filter = interp_filters[sub_pel];
+ sum = 0;
+ for (k = 0; k < INTERP_TAPS; ++k)
+ sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= inlength
+ ? inlength - 1
+ : int_pel - INTERP_TAPS / 2 + 1 + k)];
+ *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
+ }
+ }
+}
+
+static void down2_symeven(const uint8_t *const input, int length,
+ uint8_t *output) {
+ // Actual filter len = 2 * filter_len_half.
+ const int16_t *filter = vp10_down2_symeven_half_filter;
+ const int filter_len_half = sizeof(vp10_down2_symeven_half_filter) / 2;
+ int i, j;
+ uint8_t *optr = output;
+ int l1 = filter_len_half;
+ int l2 = (length - filter_len_half);
+ l1 += (l1 & 1);
+ l2 += (l2 & 1);
+ if (l1 > l2) {
+ // Short input length.
+ for (i = 0; i < length; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1));
+ for (j = 0; j < filter_len_half; ++j) {
+ sum += (input[(i - j < 0 ? 0 : i - j)] +
+ input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
+ filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ } else {
+ // Initial part.
+ for (i = 0; i < l1; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1));
+ for (j = 0; j < filter_len_half; ++j) {
+ sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ // Middle part.
+ for (; i < l2; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1));
+ for (j = 0; j < filter_len_half; ++j) {
+ sum += (input[i - j] + input[i + 1 + j]) * filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ // End part.
+ for (; i < length; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1));
+ for (j = 0; j < filter_len_half; ++j) {
+ sum += (input[i - j] +
+ input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
+ filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ }
+}
+
+static void down2_symodd(const uint8_t *const input, int length,
+ uint8_t *output) {
+ // Actual filter len = 2 * filter_len_half - 1.
+ const int16_t *filter = vp10_down2_symodd_half_filter;
+ const int filter_len_half = sizeof(vp10_down2_symodd_half_filter) / 2;
+ int i, j;
+ uint8_t *optr = output;
+ int l1 = filter_len_half - 1;
+ int l2 = (length - filter_len_half + 1);
+ l1 += (l1 & 1);
+ l2 += (l2 & 1);
+ if (l1 > l2) {
+ // Short input length.
+ for (i = 0; i < length; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
+ for (j = 1; j < filter_len_half; ++j) {
+ sum += (input[(i - j < 0 ? 0 : i - j)] +
+ input[(i + j >= length ? length - 1 : i + j)]) *
+ filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ } else {
+ // Initial part.
+ for (i = 0; i < l1; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
+ for (j = 1; j < filter_len_half; ++j) {
+ sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ // Middle part.
+ for (; i < l2; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
+ for (j = 1; j < filter_len_half; ++j) {
+ sum += (input[i - j] + input[i + j]) * filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ // End part.
+ for (; i < length; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
+ for (j = 1; j < filter_len_half; ++j) {
+ sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) *
+ filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel(sum);
+ }
+ }
+}
+
+static int get_down2_length(int length, int steps) {
+ int s;
+ for (s = 0; s < steps; ++s) length = (length + 1) >> 1;
+ return length;
+}
+
+static int get_down2_steps(int in_length, int out_length) {
+ int steps = 0;
+ int proj_in_length;
+ while ((proj_in_length = get_down2_length(in_length, 1)) >= out_length) {
+ ++steps;
+ in_length = proj_in_length;
+ }
+ return steps;
+}
+
+static void resize_multistep(const uint8_t *const input, int length,
+ uint8_t *output, int olength, uint8_t *otmp) {
+ int steps;
+ if (length == olength) {
+ memcpy(output, input, sizeof(output[0]) * length);
+ return;
+ }
+ steps = get_down2_steps(length, olength);
+
+ if (steps > 0) {
+ int s;
+ uint8_t *out = NULL;
+ uint8_t *otmp2;
+ int filteredlength = length;
+
+ assert(otmp != NULL);
+ otmp2 = otmp + get_down2_length(length, 1);
+ for (s = 0; s < steps; ++s) {
+ const int proj_filteredlength = get_down2_length(filteredlength, 1);
+ const uint8_t *const in = (s == 0 ? input : out);
+ if (s == steps - 1 && proj_filteredlength == olength)
+ out = output;
+ else
+ out = (s & 1 ? otmp2 : otmp);
+ if (filteredlength & 1)
+ down2_symodd(in, filteredlength, out);
+ else
+ down2_symeven(in, filteredlength, out);
+ filteredlength = proj_filteredlength;
+ }
+ if (filteredlength != olength) {
+ interpolate(out, filteredlength, output, olength);
+ }
+ } else {
+ interpolate(input, length, output, olength);
+ }
+}
+
+static void fill_col_to_arr(uint8_t *img, int stride, int len, uint8_t *arr) {
+ int i;
+ uint8_t *iptr = img;
+ uint8_t *aptr = arr;
+ for (i = 0; i < len; ++i, iptr += stride) {
+ *aptr++ = *iptr;
+ }
+}
+
+static void fill_arr_to_col(uint8_t *img, int stride, int len, uint8_t *arr) {
+ int i;
+ uint8_t *iptr = img;
+ uint8_t *aptr = arr;
+ for (i = 0; i < len; ++i, iptr += stride) {
+ *iptr = *aptr++;
+ }
+}
+
+void vp10_resize_plane(const uint8_t *const input, int height, int width,
+ int in_stride, uint8_t *output, int height2, int width2,
+ int out_stride) {
+ int i;
+ uint8_t *intbuf = (uint8_t *)malloc(sizeof(uint8_t) * width2 * height);
+ uint8_t *tmpbuf =
+ (uint8_t *)malloc(sizeof(uint8_t) * (width < height ? height : width));
+ uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * height);
+ uint8_t *arrbuf2 = (uint8_t *)malloc(sizeof(uint8_t) * height2);
+ if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL)
+ goto Error;
+ assert(width > 0);
+ assert(height > 0);
+ assert(width2 > 0);
+ assert(height2 > 0);
+ for (i = 0; i < height; ++i)
+ resize_multistep(input + in_stride * i, width, intbuf + width2 * i, width2,
+ tmpbuf);
+ for (i = 0; i < width2; ++i) {
+ fill_col_to_arr(intbuf + i, width2, height, arrbuf);
+ resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf);
+ fill_arr_to_col(output + i, out_stride, height2, arrbuf2);
+ }
+
+Error:
+ free(intbuf);
+ free(tmpbuf);
+ free(arrbuf);
+ free(arrbuf2);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_interpolate(const uint16_t *const input, int inlength,
+ uint16_t *output, int outlength, int bd) {
+ const int64_t delta =
+ (((uint64_t)inlength << 32) + outlength / 2) / outlength;
+ const int64_t offset =
+ inlength > outlength
+ ? (((int64_t)(inlength - outlength) << 31) + outlength / 2) /
+ outlength
+ : -(((int64_t)(outlength - inlength) << 31) + outlength / 2) /
+ outlength;
+ uint16_t *optr = output;
+ int x, x1, x2, sum, k, int_pel, sub_pel;
+ int64_t y;
+
+ const interp_kernel *interp_filters =
+ choose_interp_filter(inlength, outlength);
+
+ x = 0;
+ y = offset;
+ while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) {
+ x++;
+ y += delta;
+ }
+ x1 = x;
+ x = outlength - 1;
+ y = delta * x + offset;
+ while ((y >> INTERP_PRECISION_BITS) + (int64_t)(INTERP_TAPS / 2) >=
+ inlength) {
+ x--;
+ y -= delta;
+ }
+ x2 = x;
+ if (x1 > x2) {
+ for (x = 0, y = offset; x < outlength; ++x, y += delta) {
+ const int16_t *filter;
+ int_pel = y >> INTERP_PRECISION_BITS;
+ sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
+ filter = interp_filters[sub_pel];
+ sum = 0;
+ for (k = 0; k < INTERP_TAPS; ++k) {
+ const int pk = int_pel - INTERP_TAPS / 2 + 1 + k;
+ sum += filter[k] *
+ input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))];
+ }
+ *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
+ }
+ } else {
+ // Initial part.
+ for (x = 0, y = offset; x < x1; ++x, y += delta) {
+ const int16_t *filter;
+ int_pel = y >> INTERP_PRECISION_BITS;
+ sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
+ filter = interp_filters[sub_pel];
+ sum = 0;
+ for (k = 0; k < INTERP_TAPS; ++k)
+ sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0
+ ? 0
+ : int_pel - INTERP_TAPS / 2 + 1 + k)];
+ *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
+ }
+ // Middle part.
+ for (; x <= x2; ++x, y += delta) {
+ const int16_t *filter;
+ int_pel = y >> INTERP_PRECISION_BITS;
+ sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
+ filter = interp_filters[sub_pel];
+ sum = 0;
+ for (k = 0; k < INTERP_TAPS; ++k)
+ sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k];
+ *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
+ }
+ // End part.
+ for (; x < outlength; ++x, y += delta) {
+ const int16_t *filter;
+ int_pel = y >> INTERP_PRECISION_BITS;
+ sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK;
+ filter = interp_filters[sub_pel];
+ sum = 0;
+ for (k = 0; k < INTERP_TAPS; ++k)
+ sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= inlength
+ ? inlength - 1
+ : int_pel - INTERP_TAPS / 2 + 1 + k)];
+ *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
+ }
+ }
+}
+
+static void highbd_down2_symeven(const uint16_t *const input, int length,
+ uint16_t *output, int bd) {
+ // Actual filter len = 2 * filter_len_half.
+ static const int16_t *filter = vp10_down2_symeven_half_filter;
+ const int filter_len_half = sizeof(vp10_down2_symeven_half_filter) / 2;
+ int i, j;
+ uint16_t *optr = output;
+ int l1 = filter_len_half;
+ int l2 = (length - filter_len_half);
+ l1 += (l1 & 1);
+ l2 += (l2 & 1);
+ if (l1 > l2) {
+ // Short input length.
+ for (i = 0; i < length; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1));
+ for (j = 0; j < filter_len_half; ++j) {
+ sum += (input[(i - j < 0 ? 0 : i - j)] +
+ input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
+ filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel_highbd(sum, bd);
+ }
+ } else {
+ // Initial part.
+ for (i = 0; i < l1; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1));
+ for (j = 0; j < filter_len_half; ++j) {
+ sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel_highbd(sum, bd);
+ }
+ // Middle part.
+ for (; i < l2; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1));
+ for (j = 0; j < filter_len_half; ++j) {
+ sum += (input[i - j] + input[i + 1 + j]) * filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel_highbd(sum, bd);
+ }
+ // End part.
+ for (; i < length; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1));
+ for (j = 0; j < filter_len_half; ++j) {
+ sum += (input[i - j] +
+ input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) *
+ filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel_highbd(sum, bd);
+ }
+ }
+}
+
+static void highbd_down2_symodd(const uint16_t *const input, int length,
+ uint16_t *output, int bd) {
+ // Actual filter len = 2 * filter_len_half - 1.
+ static const int16_t *filter = vp10_down2_symodd_half_filter;
+ const int filter_len_half = sizeof(vp10_down2_symodd_half_filter) / 2;
+ int i, j;
+ uint16_t *optr = output;
+ int l1 = filter_len_half - 1;
+ int l2 = (length - filter_len_half + 1);
+ l1 += (l1 & 1);
+ l2 += (l2 & 1);
+ if (l1 > l2) {
+ // Short input length.
+ for (i = 0; i < length; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
+ for (j = 1; j < filter_len_half; ++j) {
+ sum += (input[(i - j < 0 ? 0 : i - j)] +
+ input[(i + j >= length ? length - 1 : i + j)]) *
+ filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel_highbd(sum, bd);
+ }
+ } else {
+ // Initial part.
+ for (i = 0; i < l1; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
+ for (j = 1; j < filter_len_half; ++j) {
+ sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel_highbd(sum, bd);
+ }
+ // Middle part.
+ for (; i < l2; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
+ for (j = 1; j < filter_len_half; ++j) {
+ sum += (input[i - j] + input[i + j]) * filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel_highbd(sum, bd);
+ }
+ // End part.
+ for (; i < length; i += 2) {
+ int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0];
+ for (j = 1; j < filter_len_half; ++j) {
+ sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) *
+ filter[j];
+ }
+ sum >>= FILTER_BITS;
+ *optr++ = clip_pixel_highbd(sum, bd);
+ }
+ }
+}
+
+static void highbd_resize_multistep(const uint16_t *const input, int length,
+ uint16_t *output, int olength,
+ uint16_t *otmp, int bd) {
+ int steps;
+ if (length == olength) {
+ memcpy(output, input, sizeof(output[0]) * length);
+ return;
+ }
+ steps = get_down2_steps(length, olength);
+
+ if (steps > 0) {
+ int s;
+ uint16_t *out = NULL;
+ uint16_t *otmp2;
+ int filteredlength = length;
+
+ assert(otmp != NULL);
+ otmp2 = otmp + get_down2_length(length, 1);
+ for (s = 0; s < steps; ++s) {
+ const int proj_filteredlength = get_down2_length(filteredlength, 1);
+ const uint16_t *const in = (s == 0 ? input : out);
+ if (s == steps - 1 && proj_filteredlength == olength)
+ out = output;
+ else
+ out = (s & 1 ? otmp2 : otmp);
+ if (filteredlength & 1)
+ highbd_down2_symodd(in, filteredlength, out, bd);
+ else
+ highbd_down2_symeven(in, filteredlength, out, bd);
+ filteredlength = proj_filteredlength;
+ }
+ if (filteredlength != olength) {
+ highbd_interpolate(out, filteredlength, output, olength, bd);
+ }
+ } else {
+ highbd_interpolate(input, length, output, olength, bd);
+ }
+}
+
+static void highbd_fill_col_to_arr(uint16_t *img, int stride, int len,
+ uint16_t *arr) {
+ int i;
+ uint16_t *iptr = img;
+ uint16_t *aptr = arr;
+ for (i = 0; i < len; ++i, iptr += stride) {
+ *aptr++ = *iptr;
+ }
+}
+
+static void highbd_fill_arr_to_col(uint16_t *img, int stride, int len,
+ uint16_t *arr) {
+ int i;
+ uint16_t *iptr = img;
+ uint16_t *aptr = arr;
+ for (i = 0; i < len; ++i, iptr += stride) {
+ *iptr = *aptr++;
+ }
+}
+
+void vp10_highbd_resize_plane(const uint8_t *const input, int height, int width,
+ int in_stride, uint8_t *output, int height2,
+ int width2, int out_stride, int bd) {
+ int i;
+ uint16_t *intbuf = (uint16_t *)malloc(sizeof(uint16_t) * width2 * height);
+ uint16_t *tmpbuf =
+ (uint16_t *)malloc(sizeof(uint16_t) * (width < height ? height : width));
+ uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * height);
+ uint16_t *arrbuf2 = (uint16_t *)malloc(sizeof(uint16_t) * height2);
+ if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL)
+ goto Error;
+ for (i = 0; i < height; ++i) {
+ highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width,
+ intbuf + width2 * i, width2, tmpbuf, bd);
+ }
+ for (i = 0; i < width2; ++i) {
+ highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf);
+ highbd_resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf, bd);
+ highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2,
+ arrbuf2);
+ }
+
+Error:
+ free(intbuf);
+ free(tmpbuf);
+ free(arrbuf);
+ free(arrbuf2);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+void vp10_resize_frame420(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride, int height, int width, uint8_t *oy,
+ int oy_stride, uint8_t *ou, uint8_t *ov,
+ int ouv_stride, int oheight, int owidth) {
+ vp10_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
+ vp10_resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2,
+ owidth / 2, ouv_stride);
+ vp10_resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2,
+ owidth / 2, ouv_stride);
+}
+
+void vp10_resize_frame422(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride, int height, int width, uint8_t *oy,
+ int oy_stride, uint8_t *ou, uint8_t *ov,
+ int ouv_stride, int oheight, int owidth) {
+ vp10_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
+ vp10_resize_plane(u, height, width / 2, uv_stride, ou, oheight, owidth / 2,
+ ouv_stride);
+ vp10_resize_plane(v, height, width / 2, uv_stride, ov, oheight, owidth / 2,
+ ouv_stride);
+}
+
+void vp10_resize_frame444(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride, int height, int width, uint8_t *oy,
+ int oy_stride, uint8_t *ou, uint8_t *ov,
+ int ouv_stride, int oheight, int owidth) {
+ vp10_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride);
+ vp10_resize_plane(u, height, width, uv_stride, ou, oheight, owidth,
+ ouv_stride);
+ vp10_resize_plane(v, height, width, uv_stride, ov, oheight, owidth,
+ ouv_stride);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_resize_frame420(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride, int height, int width,
+ uint8_t *oy, int oy_stride, uint8_t *ou,
+ uint8_t *ov, int ouv_stride, int oheight,
+ int owidth, int bd) {
+ vp10_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
+ oy_stride, bd);
+ vp10_highbd_resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2,
+ owidth / 2, ouv_stride, bd);
+ vp10_highbd_resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2,
+ owidth / 2, ouv_stride, bd);
+}
+
+void vp10_highbd_resize_frame422(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride, int height, int width,
+ uint8_t *oy, int oy_stride, uint8_t *ou,
+ uint8_t *ov, int ouv_stride, int oheight,
+ int owidth, int bd) {
+ vp10_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
+ oy_stride, bd);
+ vp10_highbd_resize_plane(u, height, width / 2, uv_stride, ou, oheight,
+ owidth / 2, ouv_stride, bd);
+ vp10_highbd_resize_plane(v, height, width / 2, uv_stride, ov, oheight,
+ owidth / 2, ouv_stride, bd);
+}
+
+void vp10_highbd_resize_frame444(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride, int height, int width,
+ uint8_t *oy, int oy_stride, uint8_t *ou,
+ uint8_t *ov, int ouv_stride, int oheight,
+ int owidth, int bd) {
+ vp10_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth,
+ oy_stride, bd);
+ vp10_highbd_resize_plane(u, height, width, uv_stride, ou, oheight, owidth,
+ ouv_stride, bd);
+ vp10_highbd_resize_plane(v, height, width, uv_stride, ov, oheight, owidth,
+ ouv_stride, bd);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/av1/encoder/resize.h b/av1/encoder/resize.h
new file mode 100644
index 0000000..8fe1d1b
--- /dev/null
+++ b/av1/encoder/resize.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_RESIZE_H_
+#define VP10_ENCODER_RESIZE_H_
+
+#include <stdio.h>
+#include "aom/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_resize_plane(const uint8_t *const input, int height, int width,
+ int in_stride, uint8_t *output, int height2, int width2,
+ int out_stride);
+void vp10_resize_frame420(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride, int height, int width, uint8_t *oy,
+ int oy_stride, uint8_t *ou, uint8_t *ov,
+ int ouv_stride, int oheight, int owidth);
+void vp10_resize_frame422(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride, int height, int width, uint8_t *oy,
+ int oy_stride, uint8_t *ou, uint8_t *ov,
+ int ouv_stride, int oheight, int owidth);
+void vp10_resize_frame444(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride, int height, int width, uint8_t *oy,
+ int oy_stride, uint8_t *ou, uint8_t *ov,
+ int ouv_stride, int oheight, int owidth);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_resize_plane(const uint8_t *const input, int height, int width,
+ int in_stride, uint8_t *output, int height2,
+ int width2, int out_stride, int bd);
+void vp10_highbd_resize_frame420(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride, int height, int width,
+ uint8_t *oy, int oy_stride, uint8_t *ou,
+ uint8_t *ov, int ouv_stride, int oheight,
+ int owidth, int bd);
+void vp10_highbd_resize_frame422(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride, int height, int width,
+ uint8_t *oy, int oy_stride, uint8_t *ou,
+ uint8_t *ov, int ouv_stride, int oheight,
+ int owidth, int bd);
+void vp10_highbd_resize_frame444(const uint8_t *const y, int y_stride,
+ const uint8_t *const u, const uint8_t *const v,
+ int uv_stride, int height, int width,
+ uint8_t *oy, int oy_stride, uint8_t *ou,
+ uint8_t *ov, int ouv_stride, int oheight,
+ int owidth, int bd);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_RESIZE_H_
diff --git a/av1/encoder/segmentation.c b/av1/encoder/segmentation.c
new file mode 100644
index 0000000..5ac1283
--- /dev/null
+++ b/av1/encoder/segmentation.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+
+#include "aom_mem/vpx_mem.h"
+
+#include "av1/common/pred_common.h"
+#include "av1/common/tile_common.h"
+
+#include "av1/encoder/cost.h"
+#include "av1/encoder/segmentation.h"
+#include "av1/encoder/subexp.h"
+
+void vp10_enable_segmentation(struct segmentation *seg) {
+ seg->enabled = 1;
+ seg->update_map = 1;
+ seg->update_data = 1;
+}
+
+void vp10_disable_segmentation(struct segmentation *seg) {
+ seg->enabled = 0;
+ seg->update_map = 0;
+ seg->update_data = 0;
+}
+
+void vp10_set_segment_data(struct segmentation *seg, signed char *feature_data,
+ unsigned char abs_delta) {
+ seg->abs_delta = abs_delta;
+
+ memcpy(seg->feature_data, feature_data, sizeof(seg->feature_data));
+}
+void vp10_disable_segfeature(struct segmentation *seg, int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ seg->feature_mask[segment_id] &= ~(1 << feature_id);
+}
+
+void vp10_clear_segdata(struct segmentation *seg, int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ seg->feature_data[segment_id][feature_id] = 0;
+}
+
+// Based on set of segment counts calculate a probability tree
+static void calc_segtree_probs(unsigned *segcounts,
+ vpx_prob *segment_tree_probs,
+ const vpx_prob *cur_tree_probs) {
+ // Work out probabilities of each segment
+ const unsigned cc[4] = { segcounts[0] + segcounts[1],
+ segcounts[2] + segcounts[3],
+ segcounts[4] + segcounts[5],
+ segcounts[6] + segcounts[7] };
+ const unsigned ccc[2] = { cc[0] + cc[1], cc[2] + cc[3] };
+ int i;
+
+ segment_tree_probs[0] = get_binary_prob(ccc[0], ccc[1]);
+ segment_tree_probs[1] = get_binary_prob(cc[0], cc[1]);
+ segment_tree_probs[2] = get_binary_prob(cc[2], cc[3]);
+ segment_tree_probs[3] = get_binary_prob(segcounts[0], segcounts[1]);
+ segment_tree_probs[4] = get_binary_prob(segcounts[2], segcounts[3]);
+ segment_tree_probs[5] = get_binary_prob(segcounts[4], segcounts[5]);
+ segment_tree_probs[6] = get_binary_prob(segcounts[6], segcounts[7]);
+
+ for (i = 0; i < 7; i++) {
+ const unsigned *ct =
+ i == 0 ? ccc : i < 3 ? cc + (i & 2) : segcounts + (i - 3) * 2;
+ vp10_prob_diff_update_savings_search(
+ ct, cur_tree_probs[i], &segment_tree_probs[i], DIFF_UPDATE_PROB);
+ }
+}
+
+// Based on set of segment counts and probabilities calculate a cost estimate
+static int cost_segmap(unsigned *segcounts, vpx_prob *probs) {
+ const int c01 = segcounts[0] + segcounts[1];
+ const int c23 = segcounts[2] + segcounts[3];
+ const int c45 = segcounts[4] + segcounts[5];
+ const int c67 = segcounts[6] + segcounts[7];
+ const int c0123 = c01 + c23;
+ const int c4567 = c45 + c67;
+
+ // Cost the top node of the tree
+ int cost = c0123 * vp10_cost_zero(probs[0]) + c4567 * vp10_cost_one(probs[0]);
+
+ // Cost subsequent levels
+ if (c0123 > 0) {
+ cost += c01 * vp10_cost_zero(probs[1]) + c23 * vp10_cost_one(probs[1]);
+
+ if (c01 > 0)
+ cost += segcounts[0] * vp10_cost_zero(probs[3]) +
+ segcounts[1] * vp10_cost_one(probs[3]);
+ if (c23 > 0)
+ cost += segcounts[2] * vp10_cost_zero(probs[4]) +
+ segcounts[3] * vp10_cost_one(probs[4]);
+ }
+
+ if (c4567 > 0) {
+ cost += c45 * vp10_cost_zero(probs[2]) + c67 * vp10_cost_one(probs[2]);
+
+ if (c45 > 0)
+ cost += segcounts[4] * vp10_cost_zero(probs[5]) +
+ segcounts[5] * vp10_cost_one(probs[5]);
+ if (c67 > 0)
+ cost += segcounts[6] * vp10_cost_zero(probs[6]) +
+ segcounts[7] * vp10_cost_one(probs[6]);
+ }
+
+ return cost;
+}
+
+static void count_segs(const VP10_COMMON *cm, MACROBLOCKD *xd,
+ const TileInfo *tile, MODE_INFO **mi,
+ unsigned *no_pred_segcounts,
+ unsigned (*temporal_predictor_count)[2],
+ unsigned *t_unpred_seg_counts, int bw, int bh,
+ int mi_row, int mi_col) {
+ int segment_id;
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+ xd->mi = mi;
+ segment_id = xd->mi[0]->mbmi.segment_id;
+
+ set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
+
+ // Count the number of hits on each segment with no prediction
+ no_pred_segcounts[segment_id]++;
+
+ // Temporal prediction not allowed on key frames
+ if (cm->frame_type != KEY_FRAME) {
+ const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+ // Test to see if the segment id matches the predicted value.
+ const int pred_segment_id =
+ get_segment_id(cm, cm->last_frame_seg_map, bsize, mi_row, mi_col);
+ const int pred_flag = pred_segment_id == segment_id;
+ const int pred_context = vp10_get_pred_context_seg_id(xd);
+
+ // Store the prediction status for this mb and update counts
+ // as appropriate
+ xd->mi[0]->mbmi.seg_id_predicted = pred_flag;
+ temporal_predictor_count[pred_context][pred_flag]++;
+
+ // Update the "unpredicted" segment count
+ if (!pred_flag) t_unpred_seg_counts[segment_id]++;
+ }
+}
+
+static void count_segs_sb(const VP10_COMMON *cm, MACROBLOCKD *xd,
+ const TileInfo *tile, MODE_INFO **mi,
+ unsigned *no_pred_segcounts,
+ unsigned (*temporal_predictor_count)[2],
+ unsigned *t_unpred_seg_counts, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ const int mis = cm->mi_stride;
+ const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2;
+#if CONFIG_EXT_PARTITION_TYPES
+ PARTITION_TYPE partition;
+#else
+ int bw, bh;
+#endif // CONFIG_EXT_PARTITION_TYPES
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+#if CONFIG_EXT_PARTITION_TYPES
+ if (bsize == BLOCK_8X8)
+ partition = PARTITION_NONE;
+ else
+ partition = get_partition(cm, mi_row, mi_col, bsize);
+ switch (partition) {
+ case PARTITION_NONE:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, bs, bs, mi_row, mi_col);
+ break;
+ case PARTITION_HORZ:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, bs, hbs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, bs, hbs,
+ mi_row + hbs, mi_col);
+ break;
+ case PARTITION_VERT:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row,
+ mi_col + hbs);
+ break;
+ case PARTITION_HORZ_A:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, hbs, hbs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
+ mi_row, mi_col + hbs);
+ count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, bs, hbs,
+ mi_row + hbs, mi_col);
+ break;
+ case PARTITION_HORZ_B:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, bs, hbs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
+ mi_row + hbs, mi_col);
+ count_segs(cm, xd, tile, mi + hbs + hbs * mis, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
+ mi_row + hbs, mi_col + hbs);
+ break;
+ case PARTITION_VERT_A:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, hbs, hbs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
+ mi_row + hbs, mi_col);
+ count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row,
+ mi_col + hbs);
+ break;
+ case PARTITION_VERT_B:
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
+ mi_row, mi_col + hbs);
+ count_segs(cm, xd, tile, mi + hbs + hbs * mis, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, hbs, hbs,
+ mi_row + hbs, mi_col + hbs);
+ break;
+ case PARTITION_SPLIT: {
+ const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
+ int n;
+
+ assert(num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type] < bs &&
+ num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type] < bs);
+
+ for (n = 0; n < 4; n++) {
+ const int mi_dc = hbs * (n & 1);
+ const int mi_dr = hbs * (n >> 1);
+
+ count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc], no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts,
+ mi_row + mi_dr, mi_col + mi_dc, subsize);
+ }
+ } break;
+ default: assert(0);
+ }
+#else
+ bw = num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type];
+ bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type];
+
+ if (bw == bs && bh == bs) {
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, bs, bs, mi_row, mi_col);
+ } else if (bw == bs && bh < bs) {
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, bs, hbs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, bs, hbs,
+ mi_row + hbs, mi_col);
+ } else if (bw < bs && bh == bs) {
+ count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
+ t_unpred_seg_counts, hbs, bs, mi_row, mi_col);
+ count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row,
+ mi_col + hbs);
+ } else {
+ const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize];
+ int n;
+
+ assert(bw < bs && bh < bs);
+
+ for (n = 0; n < 4; n++) {
+ const int mi_dc = hbs * (n & 1);
+ const int mi_dr = hbs * (n >> 1);
+
+ count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc], no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts,
+ mi_row + mi_dr, mi_col + mi_dc, subsize);
+ }
+ }
+#endif // CONFIG_EXT_PARTITION_TYPES
+}
+
+void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd) {
+ struct segmentation *seg = &cm->seg;
+ struct segmentation_probs *segp = &cm->fc->seg;
+
+ int no_pred_cost;
+ int t_pred_cost = INT_MAX;
+
+ int i, tile_col, tile_row, mi_row, mi_col;
+
+ unsigned(*temporal_predictor_count)[2] = cm->counts.seg.pred;
+ unsigned *no_pred_segcounts = cm->counts.seg.tree_total;
+ unsigned *t_unpred_seg_counts = cm->counts.seg.tree_mispred;
+
+ vpx_prob no_pred_tree[SEG_TREE_PROBS];
+ vpx_prob t_pred_tree[SEG_TREE_PROBS];
+ vpx_prob t_nopred_prob[PREDICTION_PROBS];
+
+ (void)xd;
+
+ // We are about to recompute all the segment counts, so zero the accumulators.
+ vp10_zero(cm->counts.seg);
+
+ // First of all generate stats regarding how well the last segment map
+ // predicts this one
+ for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) {
+ TileInfo tile_info;
+ vp10_tile_set_row(&tile_info, cm, tile_row);
+ for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) {
+ MODE_INFO **mi_ptr;
+ vp10_tile_set_col(&tile_info, cm, tile_col);
+ mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride +
+ tile_info.mi_col_start;
+ for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
+ mi_row += cm->mib_size, mi_ptr += cm->mib_size * cm->mi_stride) {
+ MODE_INFO **mi = mi_ptr;
+ for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
+ mi_col += cm->mib_size, mi += cm->mib_size) {
+ count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts,
+ temporal_predictor_count, t_unpred_seg_counts, mi_row,
+ mi_col, cm->sb_size);
+ }
+ }
+ }
+ }
+
+ // Work out probability tree for coding segments without prediction
+ // and the cost.
+ calc_segtree_probs(no_pred_segcounts, no_pred_tree, segp->tree_probs);
+ no_pred_cost = cost_segmap(no_pred_segcounts, no_pred_tree);
+
+ // Key frames cannot use temporal prediction
+ if (!frame_is_intra_only(cm) && !cm->error_resilient_mode) {
+ // Work out probability tree for coding those segments not
+ // predicted using the temporal method and the cost.
+ calc_segtree_probs(t_unpred_seg_counts, t_pred_tree, segp->tree_probs);
+ t_pred_cost = cost_segmap(t_unpred_seg_counts, t_pred_tree);
+
+ // Add in the cost of the signaling for each prediction context.
+ for (i = 0; i < PREDICTION_PROBS; i++) {
+ const int count0 = temporal_predictor_count[i][0];
+ const int count1 = temporal_predictor_count[i][1];
+
+ t_nopred_prob[i] = get_binary_prob(count0, count1);
+ vp10_prob_diff_update_savings_search(temporal_predictor_count[i],
+ segp->pred_probs[i],
+ &t_nopred_prob[i], DIFF_UPDATE_PROB);
+
+ // Add in the predictor signaling cost
+ t_pred_cost += count0 * vp10_cost_zero(t_nopred_prob[i]) +
+ count1 * vp10_cost_one(t_nopred_prob[i]);
+ }
+ }
+
+ // Now choose which coding method to use.
+ if (t_pred_cost < no_pred_cost) {
+ assert(!cm->error_resilient_mode);
+ seg->temporal_update = 1;
+ } else {
+ seg->temporal_update = 0;
+ }
+}
+
+void vp10_reset_segment_features(VP10_COMMON *cm) {
+ struct segmentation *seg = &cm->seg;
+
+ // Set up default state for MB feature flags
+ seg->enabled = 0;
+ seg->update_map = 0;
+ seg->update_data = 0;
+ vp10_clearall_segfeatures(seg);
+}
diff --git a/av1/encoder/segmentation.h b/av1/encoder/segmentation.h
new file mode 100644
index 0000000..3c79bd1
--- /dev/null
+++ b/av1/encoder/segmentation.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_SEGMENTATION_H_
+#define VP10_ENCODER_SEGMENTATION_H_
+
+#include "av1/common/blockd.h"
+#include "av1/encoder/encoder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_enable_segmentation(struct segmentation *seg);
+void vp10_disable_segmentation(struct segmentation *seg);
+
+void vp10_disable_segfeature(struct segmentation *seg, int segment_id,
+ SEG_LVL_FEATURES feature_id);
+void vp10_clear_segdata(struct segmentation *seg, int segment_id,
+ SEG_LVL_FEATURES feature_id);
+
+// The values given for each segment can be either deltas (from the default
+// value chosen for the frame) or absolute values.
+//
+// Valid range for abs values is (0-127 for MB_LVL_ALT_Q), (0-63 for
+// SEGMENT_ALT_LF)
+// Valid range for delta values are (+/-127 for MB_LVL_ALT_Q), (+/-63 for
+// SEGMENT_ALT_LF)
+//
+// abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use
+// the absolute values given).
+void vp10_set_segment_data(struct segmentation *seg, signed char *feature_data,
+ unsigned char abs_delta);
+
+void vp10_choose_segmap_coding_method(VP10_COMMON *cm, MACROBLOCKD *xd);
+
+void vp10_reset_segment_features(VP10_COMMON *cm);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_SEGMENTATION_H_
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
new file mode 100644
index 0000000..ab66250
--- /dev/null
+++ b/av1/encoder/speed_features.c
@@ -0,0 +1,635 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/speed_features.h"
+#include "av1/encoder/rdopt.h"
+
+#include "aom_dsp/vpx_dsp_common.h"
+
+// Mesh search patters for various speed settings
+static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] = {
+ { 64, 4 }, { 28, 2 }, { 15, 1 }, { 7, 1 }
+};
+
+#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
+static MESH_PATTERN
+ good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
+ { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
+ { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
+ { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
+ { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
+ { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
+ { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
+ };
+static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = {
+ 50, 25, 15, 5, 1, 1
+};
+
+// Intra only frames, golden frames (except alt ref overlays) and
+// alt ref frames tend to be coded at a higher than ambient quality
+static int frame_is_boosted(const VP10_COMP *cpi) {
+ return frame_is_kf_gf_arf(cpi);
+}
+
+// Sets a partition size down to which the auto partition code will always
+// search (can go lower), based on the image dimensions. The logic here
+// is that the extent to which ringing artefacts are offensive, depends
+// partly on the screen area that over which they propogate. Propogation is
+// limited by transform block size but the screen area take up by a given block
+// size will be larger for a small image format stretched to full screen.
+static BLOCK_SIZE set_partition_min_limit(VP10_COMMON *const cm) {
+ unsigned int screen_area = (cm->width * cm->height);
+
+ // Select block size based on image format size.
+ if (screen_area < 1280 * 720) {
+ // Formats smaller in area than 720P
+ return BLOCK_4X4;
+ } else if (screen_area < 1920 * 1080) {
+ // Format >= 720P and < 1080P
+ return BLOCK_8X8;
+ } else {
+ // Formats 1080P and up
+ return BLOCK_16X16;
+ }
+}
+
+static void set_good_speed_feature_framesize_dependent(VP10_COMP *cpi,
+ SPEED_FEATURES *sf,
+ int speed) {
+ VP10_COMMON *const cm = &cpi->common;
+
+ if (speed >= 1) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
+ sf->disable_split_mask =
+ cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
+ sf->partition_search_breakout_dist_thr = (1 << 23);
+ } else {
+ sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
+ sf->partition_search_breakout_dist_thr = (1 << 21);
+ }
+ }
+
+ if (speed >= 2) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
+ sf->disable_split_mask =
+ cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
+ sf->adaptive_pred_interp_filter = 0;
+ sf->partition_search_breakout_dist_thr = (1 << 24);
+ sf->partition_search_breakout_rate_thr = 120;
+ } else {
+ sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
+ sf->partition_search_breakout_dist_thr = (1 << 22);
+ sf->partition_search_breakout_rate_thr = 100;
+ }
+ sf->rd_auto_partition_min_limit = set_partition_min_limit(cm);
+ }
+
+ if (speed >= 3) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
+ sf->disable_split_mask = DISABLE_ALL_SPLIT;
+ sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0;
+ sf->partition_search_breakout_dist_thr = (1 << 25);
+ sf->partition_search_breakout_rate_thr = 200;
+ } else {
+ sf->max_intra_bsize = BLOCK_32X32;
+ sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
+ sf->schedule_mode_search = cm->base_qindex < 175 ? 1 : 0;
+ sf->partition_search_breakout_dist_thr = (1 << 23);
+ sf->partition_search_breakout_rate_thr = 120;
+ }
+ }
+
+ // If this is a two pass clip that fits the criteria for animated or
+ // graphics content then reset disable_split_mask for speeds 1-4.
+ // Also if the image edge is internal to the coded area.
+ if ((speed >= 1) && (cpi->oxcf.pass == 2) &&
+ ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ||
+ (vp10_internal_image_edge(cpi)))) {
+ sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
+ }
+
+ if (speed >= 4) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
+ sf->partition_search_breakout_dist_thr = (1 << 26);
+ } else {
+ sf->partition_search_breakout_dist_thr = (1 << 24);
+ }
+ sf->disable_split_mask = DISABLE_ALL_SPLIT;
+ }
+}
+
+static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
+ SPEED_FEATURES *sf, int speed) {
+ const int boosted = frame_is_boosted(cpi);
+
+ if (speed >= 1) {
+ if ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ||
+ vp10_internal_image_edge(cpi)) {
+ sf->use_square_partition_only = !frame_is_boosted(cpi);
+ } else {
+ sf->use_square_partition_only = !frame_is_intra_only(cm);
+ }
+
+ sf->less_rectangular_check = 1;
+
+ sf->use_rd_breakout = 1;
+ sf->adaptive_motion_search = 1;
+ sf->mv.auto_mv_step_size = 1;
+ sf->adaptive_rd_thresh = 1;
+ sf->mv.subpel_iters_per_step = 1;
+ sf->mode_skip_start = 10;
+ sf->adaptive_pred_interp_filter = 1;
+
+ sf->recode_loop = ALLOW_RECODE_KFARFGF;
+ sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
+ sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+
+ sf->tx_size_search_breakout = 1;
+ sf->partition_search_breakout_rate_thr = 80;
+ sf->tx_type_search.prune_mode = PRUNE_ONE;
+ sf->tx_type_search.fast_intra_tx_type_search = 1;
+ sf->tx_type_search.fast_inter_tx_type_search = 1;
+ // Use transform domain distortion.
+ // Note var-tx expt always uses pixel domain distortion.
+ sf->use_transform_domain_distortion = 1;
+#if CONFIG_EXT_INTER
+ sf->disable_wedge_search_var_thresh = 100;
+ sf->fast_wedge_sign_estimate = 1;
+#endif // CONFIG_EXT_INTER
+ }
+
+ if (speed >= 2) {
+ sf->tx_size_search_method =
+ frame_is_boosted(cpi) ? USE_FULL_RD : USE_LARGESTALL;
+ sf->mode_search_skip_flags =
+ (cm->frame_type == KEY_FRAME) ? 0 : FLAG_SKIP_INTRA_DIRMISMATCH |
+ FLAG_SKIP_INTRA_BESTINTER |
+ FLAG_SKIP_COMP_BESTINTRA |
+ FLAG_SKIP_INTRA_LOWVAR;
+ sf->disable_filter_search_var_thresh = 100;
+ sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
+ sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
+ sf->allow_partition_search_skip = 1;
+ sf->use_upsampled_references = 0;
+ sf->adaptive_rd_thresh = 2;
+#if CONFIG_EXT_TX
+ sf->tx_type_search.prune_mode = PRUNE_TWO;
+#endif
+ }
+
+ if (speed >= 3) {
+ sf->use_square_partition_only = !frame_is_intra_only(cm);
+ sf->tx_size_search_method =
+ frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
+ sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
+ sf->adaptive_pred_interp_filter = 0;
+ sf->adaptive_mode_search = 1;
+ sf->cb_partition_search = !boosted;
+ sf->cb_pred_filter_search = 1;
+ sf->alt_ref_search_fp = 1;
+ sf->recode_loop = ALLOW_RECODE_KFMAXBW;
+ sf->adaptive_rd_thresh = 3;
+ sf->mode_skip_start = 6;
+ sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
+ sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
+ sf->adaptive_interp_filter_search = 1;
+ }
+
+ if (speed >= 4) {
+ sf->use_square_partition_only = 1;
+ sf->tx_size_search_method = USE_LARGESTALL;
+ sf->mv.search_method = BIGDIA;
+ sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
+ sf->adaptive_rd_thresh = 4;
+ if (cm->frame_type != KEY_FRAME)
+ sf->mode_search_skip_flags |= FLAG_EARLY_TERMINATE;
+ sf->disable_filter_search_var_thresh = 200;
+ sf->use_lp32x32fdct = 1;
+ sf->use_fast_coef_updates = ONE_LOOP_REDUCED;
+ sf->use_fast_coef_costing = 1;
+ sf->partition_search_breakout_rate_thr = 300;
+ }
+
+ if (speed >= 5) {
+ int i;
+ sf->optimize_coefficients = 0;
+ sf->mv.search_method = HEX;
+ sf->disable_filter_search_var_thresh = 500;
+ for (i = 0; i < TX_SIZES; ++i) {
+ sf->intra_y_mode_mask[i] = INTRA_DC;
+ sf->intra_uv_mode_mask[i] = INTRA_DC;
+ }
+ sf->partition_search_breakout_rate_thr = 500;
+ sf->mv.reduce_first_step_size = 1;
+ sf->simple_model_rd_from_var = 1;
+ }
+}
+
+static void set_rt_speed_feature_framesize_dependent(VP10_COMP *cpi,
+ SPEED_FEATURES *sf,
+ int speed) {
+ VP10_COMMON *const cm = &cpi->common;
+ if (speed >= 1) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
+ sf->disable_split_mask =
+ cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
+ } else {
+ sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
+ }
+ }
+
+ if (speed >= 2) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
+ sf->disable_split_mask =
+ cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
+ } else {
+ sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
+ }
+ }
+
+ if (speed >= 5) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
+ sf->partition_search_breakout_dist_thr = (1 << 25);
+ } else {
+ sf->partition_search_breakout_dist_thr = (1 << 23);
+ }
+ }
+
+ if (speed >= 7) {
+ sf->encode_breakout_thresh =
+ (VPXMIN(cm->width, cm->height) >= 720) ? 800 : 300;
+ }
+}
+
+static void set_rt_speed_feature(VP10_COMP *cpi, SPEED_FEATURES *sf, int speed,
+ vpx_tune_content content) {
+ VP10_COMMON *const cm = &cpi->common;
+ const int is_keyframe = cm->frame_type == KEY_FRAME;
+ const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key;
+ sf->static_segmentation = 0;
+ sf->adaptive_rd_thresh = 1;
+ sf->use_fast_coef_costing = 1;
+ sf->allow_exhaustive_searches = 0;
+ sf->exhaustive_searches_thresh = INT_MAX;
+ sf->use_upsampled_references = 0;
+#if CONFIG_EXT_INTER
+ sf->disable_wedge_search_var_thresh = 100;
+ sf->fast_wedge_sign_estimate = 1;
+#endif // CONFIG_EXT_INTER
+
+ // Use transform domain distortion computation
+ // Note var-tx expt always uses pixel domain distortion.
+ sf->use_transform_domain_distortion = 1;
+
+ if (speed >= 1) {
+ sf->use_square_partition_only = !frame_is_intra_only(cm);
+ sf->less_rectangular_check = 1;
+ sf->tx_size_search_method =
+ frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
+
+ sf->use_rd_breakout = 1;
+
+ sf->adaptive_motion_search = 1;
+ sf->adaptive_pred_interp_filter = 1;
+ sf->mv.auto_mv_step_size = 1;
+ sf->adaptive_rd_thresh = 2;
+ sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+ }
+
+ if (speed >= 2) {
+ sf->mode_search_skip_flags =
+ (cm->frame_type == KEY_FRAME) ? 0 : FLAG_SKIP_INTRA_DIRMISMATCH |
+ FLAG_SKIP_INTRA_BESTINTER |
+ FLAG_SKIP_COMP_BESTINTRA |
+ FLAG_SKIP_INTRA_LOWVAR;
+ sf->adaptive_pred_interp_filter = 2;
+ sf->disable_filter_search_var_thresh = 50;
+ sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
+ sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
+ sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
+ sf->adjust_partitioning_from_last_frame = 1;
+ sf->last_partitioning_redo_frequency = 3;
+ sf->use_lp32x32fdct = 1;
+ sf->mode_skip_start = 11;
+ sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
+ }
+
+ if (speed >= 3) {
+ sf->use_square_partition_only = 1;
+ sf->disable_filter_search_var_thresh = 100;
+ sf->use_uv_intra_rd_estimate = 1;
+ sf->mv.subpel_iters_per_step = 1;
+ sf->adaptive_rd_thresh = 4;
+ sf->mode_skip_start = 6;
+ sf->optimize_coefficients = 0;
+ sf->disable_split_mask = DISABLE_ALL_SPLIT;
+ sf->lpf_pick = LPF_PICK_FROM_Q;
+ }
+
+ if (speed >= 4) {
+ int i;
+ sf->last_partitioning_redo_frequency = 4;
+ sf->adaptive_rd_thresh = 5;
+ sf->use_fast_coef_costing = 0;
+ sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX;
+ sf->adjust_partitioning_from_last_frame =
+ cm->last_frame_type != cm->frame_type ||
+ (0 == (frames_since_key + 1) % sf->last_partitioning_redo_frequency);
+ sf->mv.subpel_force_stop = 1;
+ for (i = 0; i < TX_SIZES; i++) {
+ sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
+ sf->intra_uv_mode_mask[i] = INTRA_DC;
+ }
+ sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
+ sf->frame_parameter_update = 0;
+ sf->mv.search_method = FAST_HEX;
+
+ sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW;
+ sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST;
+ sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST;
+ sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST;
+#if CONFIG_EXT_PARTITION
+ sf->inter_mode_mask[BLOCK_64X128] = INTER_NEAREST;
+ sf->inter_mode_mask[BLOCK_128X64] = INTER_NEAREST;
+ sf->inter_mode_mask[BLOCK_128X128] = INTER_NEAREST;
+#endif // CONFIG_EXT_PARTITION
+ sf->max_intra_bsize = BLOCK_32X32;
+ }
+
+ if (speed >= 5) {
+ sf->auto_min_max_partition_size =
+ is_keyframe ? RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
+ sf->default_max_partition_size = BLOCK_32X32;
+ sf->default_min_partition_size = BLOCK_8X8;
+ sf->force_frame_boost =
+ is_keyframe ||
+ (frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1);
+ sf->max_delta_qindex = is_keyframe ? 20 : 15;
+ sf->partition_search_type = REFERENCE_PARTITION;
+ sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
+ sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
+ sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
+ sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
+#if CONFIG_EXT_PARTITION
+ sf->inter_mode_mask[BLOCK_64X128] = INTER_NEAREST_NEW_ZERO;
+ sf->inter_mode_mask[BLOCK_128X64] = INTER_NEAREST_NEW_ZERO;
+ sf->inter_mode_mask[BLOCK_128X128] = INTER_NEAREST_NEW_ZERO;
+#endif // CONFIG_EXT_PARTITION
+ sf->adaptive_rd_thresh = 2;
+ // This feature is only enabled when partition search is disabled.
+ sf->reuse_inter_pred_sby = 1;
+ sf->partition_search_breakout_rate_thr = 200;
+ sf->coeff_prob_appx_step = 4;
+ sf->use_fast_coef_updates = is_keyframe ? TWO_LOOP : ONE_LOOP_REDUCED;
+ sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH;
+ sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
+ sf->simple_model_rd_from_var = 1;
+
+ if (!is_keyframe) {
+ int i;
+ if (content == VPX_CONTENT_SCREEN) {
+ for (i = 0; i < BLOCK_SIZES; ++i)
+ sf->intra_y_mode_bsize_mask[i] = INTRA_DC_TM_H_V;
+ } else {
+ for (i = 0; i < BLOCK_SIZES; ++i)
+ if (i >= BLOCK_16X16)
+ sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
+ else
+ // Use H and V intra mode for block sizes <= 16X16.
+ sf->intra_y_mode_bsize_mask[i] = INTRA_DC_H_V;
+ }
+ }
+ }
+
+ if (speed >= 6) {
+ // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
+ sf->partition_search_type = VAR_BASED_PARTITION;
+ // Turn on this to use non-RD key frame coding mode.
+ sf->mv.search_method = NSTEP;
+ sf->mv.reduce_first_step_size = 1;
+ }
+
+ if (speed >= 7) {
+ sf->adaptive_rd_thresh = 3;
+ sf->mv.search_method = FAST_DIAMOND;
+ sf->mv.fullpel_search_step_param = 10;
+ }
+ if (speed >= 8) {
+ sf->adaptive_rd_thresh = 4;
+ sf->mv.subpel_force_stop = 2;
+ sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
+ }
+}
+
+void vp10_set_speed_features_framesize_dependent(VP10_COMP *cpi) {
+ SPEED_FEATURES *const sf = &cpi->sf;
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ RD_OPT *const rd = &cpi->rd;
+ int i;
+
+ if (oxcf->mode == REALTIME) {
+ set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
+ } else if (oxcf->mode == GOOD) {
+ set_good_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
+ }
+
+ if (sf->disable_split_mask == DISABLE_ALL_SPLIT) {
+ sf->adaptive_pred_interp_filter = 0;
+ }
+
+ if (cpi->encode_breakout && oxcf->mode == REALTIME &&
+ sf->encode_breakout_thresh > cpi->encode_breakout) {
+ cpi->encode_breakout = sf->encode_breakout_thresh;
+ }
+
+ // Check for masked out split cases.
+ for (i = 0; i < MAX_REFS; ++i) {
+ if (sf->disable_split_mask & (1 << i)) {
+ rd->thresh_mult_sub8x8[i] = INT_MAX;
+ }
+ }
+}
+
+void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
+ SPEED_FEATURES *const sf = &cpi->sf;
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->td.mb;
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ int i;
+
+ // best quality defaults
+ sf->frame_parameter_update = 1;
+ sf->mv.search_method = NSTEP;
+ sf->recode_loop = ALLOW_RECODE;
+ sf->mv.subpel_search_method = SUBPEL_TREE;
+ sf->mv.subpel_iters_per_step = 2;
+ sf->mv.subpel_force_stop = 0;
+ sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf);
+ sf->mv.reduce_first_step_size = 0;
+ sf->coeff_prob_appx_step = 1;
+ sf->mv.auto_mv_step_size = 0;
+ sf->mv.fullpel_search_step_param = 6;
+ sf->comp_inter_joint_search_thresh = BLOCK_4X4;
+ sf->adaptive_rd_thresh = 0;
+ sf->tx_size_search_method = USE_FULL_RD;
+ sf->use_lp32x32fdct = 0;
+ sf->adaptive_motion_search = 0;
+ sf->adaptive_pred_interp_filter = 0;
+ sf->adaptive_mode_search = 0;
+ sf->cb_pred_filter_search = 0;
+ sf->cb_partition_search = 0;
+ sf->alt_ref_search_fp = 0;
+ sf->partition_search_type = SEARCH_PARTITION;
+ sf->tx_type_search.prune_mode = NO_PRUNE;
+ sf->tx_type_search.fast_intra_tx_type_search = 0;
+ sf->tx_type_search.fast_inter_tx_type_search = 0;
+ sf->less_rectangular_check = 0;
+ sf->use_square_partition_only = 0;
+ sf->auto_min_max_partition_size = NOT_IN_USE;
+ sf->rd_auto_partition_min_limit = BLOCK_4X4;
+ sf->default_max_partition_size = BLOCK_LARGEST;
+ sf->default_min_partition_size = BLOCK_4X4;
+ sf->adjust_partitioning_from_last_frame = 0;
+ sf->last_partitioning_redo_frequency = 4;
+ sf->disable_split_mask = 0;
+ sf->mode_search_skip_flags = 0;
+ sf->force_frame_boost = 0;
+ sf->max_delta_qindex = 0;
+ sf->disable_filter_search_var_thresh = 0;
+ sf->adaptive_interp_filter_search = 0;
+ sf->allow_partition_search_skip = 0;
+#if CONFIG_EXT_TILE
+ sf->use_upsampled_references = 0;
+#else
+ sf->use_upsampled_references = 1;
+#endif // CONFIG_EXT_TILE
+#if CONFIG_EXT_INTER
+ sf->disable_wedge_search_var_thresh = 0;
+ sf->fast_wedge_sign_estimate = 0;
+#endif // CONFIG_EXT_INTER
+
+ for (i = 0; i < TX_SIZES; i++) {
+ sf->intra_y_mode_mask[i] = INTRA_ALL;
+ sf->intra_uv_mode_mask[i] = INTRA_ALL;
+ }
+ sf->use_rd_breakout = 0;
+ sf->use_uv_intra_rd_estimate = 0;
+ sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
+ sf->use_fast_coef_updates = TWO_LOOP;
+ sf->use_fast_coef_costing = 0;
+ sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
+ sf->schedule_mode_search = 0;
+ for (i = 0; i < BLOCK_SIZES; ++i) sf->inter_mode_mask[i] = INTER_ALL;
+ sf->max_intra_bsize = BLOCK_LARGEST;
+ sf->reuse_inter_pred_sby = 0;
+ // This setting only takes effect when partition_search_type is set
+ // to FIXED_PARTITION.
+ sf->always_this_block_size = BLOCK_16X16;
+ sf->search_type_check_frequency = 50;
+ sf->encode_breakout_thresh = 0;
+ // Recode loop tolerance %.
+ sf->recode_tolerance = 25;
+ sf->default_interp_filter = SWITCHABLE;
+ sf->tx_size_search_breakout = 0;
+ sf->partition_search_breakout_dist_thr = 0;
+ sf->partition_search_breakout_rate_thr = 0;
+ sf->simple_model_rd_from_var = 0;
+
+// Set this at the appropriate speed levels
+#if CONFIG_EXT_TILE
+ sf->use_transform_domain_distortion = 1;
+#else
+ sf->use_transform_domain_distortion = 0;
+#endif // CONFIG_EXT_TILE
+
+ if (oxcf->mode == REALTIME)
+ set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content);
+ else if (oxcf->mode == GOOD)
+ set_good_speed_feature(cpi, cm, sf, oxcf->speed);
+
+ // sf->partition_search_breakout_dist_thr is set assuming max 64x64
+ // blocks. Normalise this if the blocks are bigger.
+ if (MAX_SB_SIZE_LOG2 > 6) {
+ sf->partition_search_breakout_dist_thr <<= 2 * (MAX_SB_SIZE_LOG2 - 6);
+ }
+
+ cpi->full_search_sad = vp10_full_search_sad;
+ cpi->diamond_search_sad = vp10_diamond_search_sad;
+
+ sf->allow_exhaustive_searches = 1;
+ if (oxcf->mode == BEST) {
+ if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
+ sf->exhaustive_searches_thresh = (1 << 20);
+ else
+ sf->exhaustive_searches_thresh = (1 << 21);
+ sf->max_exaustive_pct = 100;
+ for (i = 0; i < MAX_MESH_STEP; ++i) {
+ sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
+ sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
+ }
+ } else {
+ int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
+ if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
+ sf->exhaustive_searches_thresh = (1 << 22);
+ else
+ sf->exhaustive_searches_thresh = (1 << 23);
+ sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
+ if (speed > 0)
+ sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
+
+ for (i = 0; i < MAX_MESH_STEP; ++i) {
+ sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range;
+ sf->mesh_patterns[i].interval =
+ good_quality_mesh_patterns[speed][i].interval;
+ }
+ }
+
+ // Slow quant, dct and trellis not worthwhile for first pass
+ // so make sure they are always turned off.
+ if (oxcf->pass == 1) sf->optimize_coefficients = 0;
+
+ // No recode for 1 pass.
+ if (oxcf->pass == 0) {
+ sf->recode_loop = DISALLOW_RECODE;
+ sf->optimize_coefficients = 0;
+ }
+
+ if (sf->mv.subpel_search_method == SUBPEL_TREE) {
+ cpi->find_fractional_mv_step = vp10_find_best_sub_pixel_tree;
+ } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) {
+ cpi->find_fractional_mv_step = vp10_find_best_sub_pixel_tree_pruned;
+ } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
+ cpi->find_fractional_mv_step = vp10_find_best_sub_pixel_tree_pruned_more;
+ } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_EVENMORE) {
+ cpi->find_fractional_mv_step =
+ vp10_find_best_sub_pixel_tree_pruned_evenmore;
+ }
+
+#if !CONFIG_AOM_QM
+ x->optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1;
+#else
+ // FIXME: trellis not very efficient for quantisation matrices
+ x->optimize = 0;
+#endif
+
+ x->min_partition_size = sf->default_min_partition_size;
+ x->max_partition_size = sf->default_max_partition_size;
+
+ if (!cpi->oxcf.frame_periodic_boost) {
+ sf->max_delta_qindex = 0;
+ }
+}
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
new file mode 100644
index 0000000..2457c5b
--- /dev/null
+++ b/av1/encoder/speed_features.h
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_SPEED_FEATURES_H_
+#define VP10_ENCODER_SPEED_FEATURES_H_
+
+#include "av1/common/enums.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+ INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) |
+ (1 << D135_PRED) | (1 << D117_PRED) | (1 << D153_PRED) |
+ (1 << D207_PRED) | (1 << D63_PRED) | (1 << TM_PRED),
+ INTRA_DC = (1 << DC_PRED),
+ INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED),
+ INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
+ INTRA_DC_TM_H_V =
+ (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) | (1 << H_PRED)
+};
+
+#if CONFIG_EXT_INTER
+enum {
+ INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV) |
+ (1 << NEWFROMNEARMV) | (1 << NEAREST_NEARESTMV) |
+ (1 << NEAR_NEARMV) | (1 << NEAREST_NEARMV) |
+ (1 << NEAR_NEARESTMV) | (1 << NEW_NEWMV) | (1 << NEAREST_NEWMV) |
+ (1 << NEAR_NEWMV) | (1 << NEW_NEARMV) | (1 << NEW_NEARESTMV) |
+ (1 << ZERO_ZEROMV),
+ INTER_NEAREST = (1 << NEARESTMV) | (1 << NEAREST_NEARESTMV) |
+ (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) |
+ (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV),
+ INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV) | (1 << NEWFROMNEARMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) |
+ (1 << NEAR_NEARESTMV) | (1 << NEAREST_NEARMV) |
+ (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) |
+ (1 << NEW_NEARMV) | (1 << NEAR_NEWMV),
+ INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) |
+ (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) |
+ (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV),
+ INTER_NEAREST_NEW_ZERO =
+ (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV) | (1 << NEWFROMNEARMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | (1 << NEW_NEWMV) |
+ (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) | (1 << NEW_NEARESTMV) |
+ (1 << NEAREST_NEWMV) | (1 << NEW_NEARMV) | (1 << NEAR_NEWMV),
+ INTER_NEAREST_NEAR_NEW =
+ (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV) | (1 << NEWFROMNEARMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) | (1 << NEAREST_NEARMV) |
+ (1 << NEAR_NEARESTMV) | (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) |
+ (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | (1 << NEAR_NEARMV),
+ INTER_NEAREST_NEAR_ZERO =
+ (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | (1 << NEAREST_NEARMV) |
+ (1 << NEAR_NEARESTMV) | (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) |
+ (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | (1 << NEAR_NEARMV),
+};
+#else
+enum {
+ INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV),
+ INTER_NEAREST = (1 << NEARESTMV),
+ INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
+ INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV),
+ INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV),
+ INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
+ INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV),
+};
+#endif // CONFIG_EXT_INTER
+
+enum {
+ DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) |
+ (1 << THR_ALTR) | (1 << THR_GOLD) | (1 << THR_LAST),
+
+ DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT,
+
+ DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA),
+
+ LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) |
+ (1 << THR_ALTR) | (1 << THR_GOLD)
+};
+
+typedef enum {
+ DIAMOND = 0,
+ NSTEP = 1,
+ HEX = 2,
+ BIGDIA = 3,
+ SQUARE = 4,
+ FAST_HEX = 5,
+ FAST_DIAMOND = 6
+} SEARCH_METHODS;
+
+typedef enum {
+ // No recode.
+ DISALLOW_RECODE = 0,
+ // Allow recode for KF and exceeding maximum frame bandwidth.
+ ALLOW_RECODE_KFMAXBW = 1,
+ // Allow recode only for KF/ARF/GF frames.
+ ALLOW_RECODE_KFARFGF = 2,
+ // Allow recode for all frames based on bitrate constraints.
+ ALLOW_RECODE = 3,
+} RECODE_LOOP_TYPE;
+
+typedef enum {
+ SUBPEL_TREE = 0,
+ SUBPEL_TREE_PRUNED = 1, // Prunes 1/2-pel searches
+ SUBPEL_TREE_PRUNED_MORE = 2, // Prunes 1/2-pel searches more aggressively
+ SUBPEL_TREE_PRUNED_EVENMORE = 3, // Prunes 1/2- and 1/4-pel searches
+ // Other methods to come
+} SUBPEL_SEARCH_METHODS;
+
+typedef enum {
+ NO_MOTION_THRESHOLD = 0,
+ LOW_MOTION_THRESHOLD = 7
+} MOTION_THRESHOLD;
+
+typedef enum {
+ USE_FULL_RD = 0,
+ USE_LARGESTALL,
+ USE_TX_8X8
+} TX_SIZE_SEARCH_METHOD;
+
+typedef enum {
+ NOT_IN_USE = 0,
+ RELAXED_NEIGHBORING_MIN_MAX = 1,
+ STRICT_NEIGHBORING_MIN_MAX = 2
+} AUTO_MIN_MAX_MODE;
+
+typedef enum {
+ // Try the full image with different values.
+ LPF_PICK_FROM_FULL_IMAGE,
+ // Try a small portion of the image with different values.
+ LPF_PICK_FROM_SUBIMAGE,
+ // Estimate the level based on quantizer and frame type
+ LPF_PICK_FROM_Q,
+ // Pick 0 to disable LPF if LPF was enabled last frame
+ LPF_PICK_MINIMAL_LPF
+} LPF_PICK_METHOD;
+
+typedef enum {
+ // Terminate search early based on distortion so far compared to
+ // qp step, distortion in the neighborhood of the frame, etc.
+ FLAG_EARLY_TERMINATE = 1 << 0,
+
+ // Skips comp inter modes if the best so far is an intra mode.
+ FLAG_SKIP_COMP_BESTINTRA = 1 << 1,
+
+ // Skips oblique intra modes if the best so far is an inter mode.
+ FLAG_SKIP_INTRA_BESTINTER = 1 << 3,
+
+ // Skips oblique intra modes at angles 27, 63, 117, 153 if the best
+ // intra so far is not one of the neighboring directions.
+ FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4,
+
+ // Skips intra modes other than DC_PRED if the source variance is small
+ FLAG_SKIP_INTRA_LOWVAR = 1 << 5,
+} MODE_SEARCH_SKIP_LOGIC;
+
+typedef enum {
+ FLAG_SKIP_EIGHTTAP_REGULAR = 1 << EIGHTTAP_REGULAR,
+ FLAG_SKIP_EIGHTTAP_SMOOTH = 1 << EIGHTTAP_SMOOTH,
+ FLAG_SKIP_MULTITAP_SHARP = 1 << MULTITAP_SHARP,
+} INTERP_FILTER_MASK;
+
+typedef enum {
+ NO_PRUNE = 0,
+ // eliminates one tx type in vertical and horizontal direction
+ PRUNE_ONE = 1,
+#if CONFIG_EXT_TX
+ // eliminates two tx types in each direction
+ PRUNE_TWO = 2,
+#endif
+} TX_TYPE_PRUNE_MODE;
+
+typedef struct {
+ TX_TYPE_PRUNE_MODE prune_mode;
+ int fast_intra_tx_type_search;
+ int fast_inter_tx_type_search;
+} TX_TYPE_SEARCH;
+
+typedef enum {
+ // Search partitions using RD criterion
+ SEARCH_PARTITION,
+
+ // Always use a fixed size partition
+ FIXED_PARTITION,
+
+ REFERENCE_PARTITION,
+
+ // Use an arbitrary partitioning scheme based on source variance within
+ // a 64X64 SB
+ VAR_BASED_PARTITION,
+
+ // Use non-fixed partitions based on source variance
+ SOURCE_VAR_BASED_PARTITION
+} PARTITION_SEARCH_TYPE;
+
+typedef enum {
+ // Does a dry run to see if any of the contexts need to be updated or not,
+ // before the final run.
+ TWO_LOOP = 0,
+
+ // No dry run, also only half the coef contexts and bands are updated.
+ // The rest are not updated at all.
+ ONE_LOOP_REDUCED = 1
+} FAST_COEFF_UPDATE;
+
+typedef struct MV_SPEED_FEATURES {
+ // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
+ SEARCH_METHODS search_method;
+
+ // This parameter controls which step in the n-step process we start at.
+ // It's changed adaptively based on circumstances.
+ int reduce_first_step_size;
+
+ // If this is set to 1, we limit the motion search range to 2 times the
+ // largest motion vector found in the last frame.
+ int auto_mv_step_size;
+
+ // Subpel_search_method can only be subpel_tree which does a subpixel
+ // logarithmic search that keeps stepping at 1/2 pixel units until
+ // you stop getting a gain, and then goes on to 1/4 and repeats
+ // the same process. Along the way it skips many diagonals.
+ SUBPEL_SEARCH_METHODS subpel_search_method;
+
+ // Maximum number of steps in logarithmic subpel search before giving up.
+ int subpel_iters_per_step;
+
+ // Control when to stop subpel search
+ int subpel_force_stop;
+
+ // This variable sets the step_param used in full pel motion search.
+ int fullpel_search_step_param;
+} MV_SPEED_FEATURES;
+
+#define MAX_MESH_STEP 4
+
+typedef struct MESH_PATTERN {
+ int range;
+ int interval;
+} MESH_PATTERN;
+
+typedef struct SPEED_FEATURES {
+ MV_SPEED_FEATURES mv;
+
+ // Frame level coding parameter update
+ int frame_parameter_update;
+
+ RECODE_LOOP_TYPE recode_loop;
+
+ // Trellis (dynamic programming) optimization of quantized values (+1, 0).
+ int optimize_coefficients;
+
+ // Always set to 0. If on it enables 0 cost background transmission
+ // (except for the initial transmission of the segmentation). The feature is
+ // disabled because the addition of very large block sizes make the
+ // backgrounds very to cheap to encode, and the segmentation we have
+ // adds overhead.
+ int static_segmentation;
+
+ // If 1 we iterate finding a best reference for 2 ref frames together - via
+ // a log search that iterates 4 times (check around mv for last for best
+ // error of combined predictor then check around mv for alt). If 0 we
+ // we just use the best motion vector found for each frame by itself.
+ BLOCK_SIZE comp_inter_joint_search_thresh;
+
+ // This variable is used to cap the maximum number of times we skip testing a
+ // mode to be evaluated. A high value means we will be faster.
+ int adaptive_rd_thresh;
+
+ // Coefficient probability model approximation step size
+ int coeff_prob_appx_step;
+
+ // The threshold is to determine how slow the motino is, it is used when
+ // use_lastframe_partitioning is set to LAST_FRAME_PARTITION_LOW_MOTION
+ MOTION_THRESHOLD lf_motion_threshold;
+
+ // Determine which method we use to determine transform size. We can choose
+ // between options like full rd, largest for prediction size, largest
+ // for intra and model coefs for the rest.
+ TX_SIZE_SEARCH_METHOD tx_size_search_method;
+
+ // Low precision 32x32 fdct keeps everything in 16 bits and thus is less
+ // precise but significantly faster than the non lp version.
+ int use_lp32x32fdct;
+
+ // After looking at the first set of modes (set by index here), skip
+ // checking modes for reference frames that don't match the reference frame
+ // of the best so far.
+ int mode_skip_start;
+
+ PARTITION_SEARCH_TYPE partition_search_type;
+
+ TX_TYPE_SEARCH tx_type_search;
+
+ // Used if partition_search_type = FIXED_SIZE_PARTITION
+ BLOCK_SIZE always_this_block_size;
+
+ // Skip rectangular partition test when partition type none gives better
+ // rd than partition type split.
+ int less_rectangular_check;
+
+ // Disable testing non square partitions. (eg 16x32)
+ int use_square_partition_only;
+
+ // Sets min and max partition sizes for this superblock based on the
+ // same superblock in last encoded frame, and the left and above neighbor.
+ AUTO_MIN_MAX_MODE auto_min_max_partition_size;
+ // Ensures the rd based auto partition search will always
+ // go down at least to the specified level.
+ BLOCK_SIZE rd_auto_partition_min_limit;
+
+ // Min and max partition size we enable (block_size) as per auto
+ // min max, but also used by adjust partitioning, and pick_partitioning.
+ BLOCK_SIZE default_min_partition_size;
+ BLOCK_SIZE default_max_partition_size;
+
+ // Whether or not we allow partitions one smaller or one greater than the last
+ // frame's partitioning. Only used if use_lastframe_partitioning is set.
+ int adjust_partitioning_from_last_frame;
+
+ // How frequently we re do the partitioning from scratch. Only used if
+ // use_lastframe_partitioning is set.
+ int last_partitioning_redo_frequency;
+
+ // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable
+ // it always, to allow it for only Last frame and Intra, disable it for all
+ // inter modes or to enable it always.
+ int disable_split_mask;
+
+ // TODO(jingning): combine the related motion search speed features
+ // This allows us to use motion search at other sizes as a starting
+ // point for this motion search and limits the search range around it.
+ int adaptive_motion_search;
+
+ // Flag for allowing some use of exhaustive searches;
+ int allow_exhaustive_searches;
+
+ // Threshold for allowing exhaistive motion search.
+ int exhaustive_searches_thresh;
+
+ // Maximum number of exhaustive searches for a frame.
+ int max_exaustive_pct;
+
+ // Pattern to be used for any exhaustive mesh searches.
+ MESH_PATTERN mesh_patterns[MAX_MESH_STEP];
+
+ int schedule_mode_search;
+
+ // Allows sub 8x8 modes to use the prediction filter that was determined
+ // best for 8x8 mode. If set to 0 we always re check all the filters for
+ // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter
+ // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected.
+ int adaptive_pred_interp_filter;
+
+ // Adaptive prediction mode search
+ int adaptive_mode_search;
+
+ // Chessboard pattern prediction filter type search
+ int cb_pred_filter_search;
+
+ int cb_partition_search;
+
+ int alt_ref_search_fp;
+
+ // Use finer quantizer in every other few frames that run variable block
+ // partition type search.
+ int force_frame_boost;
+
+ // Maximally allowed base quantization index fluctuation.
+ int max_delta_qindex;
+
+ // Implements various heuristics to skip searching modes
+ // The heuristics selected are based on flags
+ // defined in the MODE_SEARCH_SKIP_HEURISTICS enum
+ unsigned int mode_search_skip_flags;
+
+ // A source variance threshold below which filter search is disabled
+ // Choose a very large value (UINT_MAX) to use 8-tap always
+ unsigned int disable_filter_search_var_thresh;
+
+#if CONFIG_EXT_INTER
+ // A source variance threshold below which wedge search is disabled
+ unsigned int disable_wedge_search_var_thresh;
+
+ // Whether fast wedge sign estimate is used
+ int fast_wedge_sign_estimate;
+#endif // CONFIG_EXT_INTER
+
+ // These bit masks allow you to enable or disable intra modes for each
+ // transform size separately.
+ int intra_y_mode_mask[TX_SIZES];
+ int intra_uv_mode_mask[TX_SIZES];
+
+ // These bit masks allow you to enable or disable intra modes for each
+ // prediction block size separately.
+ int intra_y_mode_bsize_mask[BLOCK_SIZES];
+
+ // This variable enables an early break out of mode testing if the model for
+ // rd built from the prediction signal indicates a value that's much
+ // higher than the best rd we've seen so far.
+ int use_rd_breakout;
+
+ // This enables us to use an estimate for intra rd based on dc mode rather
+ // than choosing an actual uv mode in the stage of encoding before the actual
+ // final encode.
+ int use_uv_intra_rd_estimate;
+
+ // This feature controls how the loop filter level is determined.
+ LPF_PICK_METHOD lpf_pick;
+
+ // This feature limits the number of coefficients updates we actually do
+ // by only looking at counts from 1/2 the bands.
+ FAST_COEFF_UPDATE use_fast_coef_updates;
+
+ // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
+ // modes are used in order from LSB to MSB for each BLOCK_SIZE.
+ int inter_mode_mask[BLOCK_SIZES];
+
+ // This feature controls whether we do the expensive context update and
+ // calculation in the rd coefficient costing loop.
+ int use_fast_coef_costing;
+
+ // This feature controls the tolerence vs target used in deciding whether to
+ // recode a frame. It has no meaning if recode is disabled.
+ int recode_tolerance;
+
+ // This variable controls the maximum block size where intra blocks can be
+ // used in inter frames.
+ // TODO(aconverse): Fold this into one of the other many mode skips
+ BLOCK_SIZE max_intra_bsize;
+
+ // The frequency that we check if SOURCE_VAR_BASED_PARTITION or
+ // FIXED_PARTITION search type should be used.
+ int search_type_check_frequency;
+
+ // When partition is pre-set, the inter prediction result from pick_inter_mode
+ // can be reused in final block encoding process. It is enabled only for real-
+ // time mode speed 6.
+ int reuse_inter_pred_sby;
+
+ // This variable sets the encode_breakout threshold. Currently, it is only
+ // enabled in real time mode.
+ int encode_breakout_thresh;
+
+ // default interp filter choice
+ INTERP_FILTER default_interp_filter;
+
+ // Early termination in transform size search, which only applies while
+ // tx_size_search_method is USE_FULL_RD.
+ int tx_size_search_breakout;
+
+ // adaptive interp_filter search to allow skip of certain filter types.
+ int adaptive_interp_filter_search;
+
+ // mask for skip evaluation of certain interp_filter type.
+ INTERP_FILTER_MASK interp_filter_search_mask;
+
+ // Partition search early breakout thresholds.
+ int64_t partition_search_breakout_dist_thr;
+ int partition_search_breakout_rate_thr;
+
+ // Allow skipping partition search for still image frame
+ int allow_partition_search_skip;
+
+ // Fast approximation of vp10_model_rd_from_var_lapndz
+ int simple_model_rd_from_var;
+
+ // Do sub-pixel search in up-sampled reference frames
+ int use_upsampled_references;
+
+ // Whether to compute distortion in the image domain (slower but
+ // more accurate), or in the transform domain (faster but less acurate).
+ int use_transform_domain_distortion;
+} SPEED_FEATURES;
+
+struct VP10_COMP;
+
+void vp10_set_speed_features_framesize_independent(struct VP10_COMP *cpi);
+void vp10_set_speed_features_framesize_dependent(struct VP10_COMP *cpi);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_SPEED_FEATURES_H_
diff --git a/av1/encoder/subexp.c b/av1/encoder/subexp.c
new file mode 100644
index 0000000..d722654
--- /dev/null
+++ b/av1/encoder/subexp.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "av1/encoder/bitwriter.h"
+
+#include "av1/common/common.h"
+#include "av1/common/entropy.h"
+#include "av1/encoder/cost.h"
+#include "av1/encoder/subexp.h"
+
+#define vp10_cost_upd256 ((int)(vp10_cost_one(upd) - vp10_cost_zero(upd)))
+
+static const uint8_t update_bits[255] = {
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 11, 11, 0,
+};
+
+static int recenter_nonneg(int v, int m) {
+ if (v > (m << 1))
+ return v;
+ else if (v >= m)
+ return ((v - m) << 1);
+ else
+ return ((m - v) << 1) - 1;
+}
+
+static int remap_prob(int v, int m) {
+ int i;
+ static const uint8_t map_table[MAX_PROB - 1] = {
+ // generated by:
+ // map_table[j] = split_index(j, MAX_PROB - 1, MODULUS_PARAM);
+ 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33,
+ 34, 35, 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 2, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+ 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 4, 74,
+ 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88,
+ 89, 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102,
+ 103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129, 130,
+ 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
+ 145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11,
+ 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171,
+ 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 13, 182, 183, 184, 185,
+ 186, 187, 188, 189, 190, 191, 192, 193, 14, 194, 195, 196, 197, 198, 199,
+ 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212, 213,
+ 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227,
+ 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
+ 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19,
+ };
+ v--;
+ m--;
+ if ((m << 1) <= MAX_PROB)
+ i = recenter_nonneg(v, m) - 1;
+ else
+ i = recenter_nonneg(MAX_PROB - 1 - v, MAX_PROB - 1 - m) - 1;
+
+ i = map_table[i];
+ return i;
+}
+
+static int prob_diff_update_cost(vpx_prob newp, vpx_prob oldp) {
+ int delp = remap_prob(newp, oldp);
+ return update_bits[delp] << VP10_PROB_COST_SHIFT;
+}
+
+static void encode_uniform(vp10_writer *w, int v) {
+ const int l = 8;
+ const int m = (1 << l) - 190;
+ if (v < m) {
+ vp10_write_literal(w, v, l - 1);
+ } else {
+ vp10_write_literal(w, m + ((v - m) >> 1), l - 1);
+ vp10_write_literal(w, (v - m) & 1, 1);
+ }
+}
+
+static INLINE int write_bit_gte(vp10_writer *w, int word, int test) {
+ vp10_write_literal(w, word >= test, 1);
+ return word >= test;
+}
+
+static void encode_term_subexp(vp10_writer *w, int word) {
+ if (!write_bit_gte(w, word, 16)) {
+ vp10_write_literal(w, word, 4);
+ } else if (!write_bit_gte(w, word, 32)) {
+ vp10_write_literal(w, word - 16, 4);
+ } else if (!write_bit_gte(w, word, 64)) {
+ vp10_write_literal(w, word - 32, 5);
+ } else {
+ encode_uniform(w, word - 64);
+ }
+}
+
+void vp10_write_prob_diff_update(vp10_writer *w, vpx_prob newp, vpx_prob oldp) {
+ const int delp = remap_prob(newp, oldp);
+ encode_term_subexp(w, delp);
+}
+
+int vp10_prob_diff_update_savings_search(const unsigned int *ct, vpx_prob oldp,
+ vpx_prob *bestp, vpx_prob upd) {
+ const int old_b = cost_branch256(ct, oldp);
+ int bestsavings = 0;
+ vpx_prob newp, bestnewp = oldp;
+ const int step = *bestp > oldp ? -1 : 1;
+
+ for (newp = *bestp; newp != oldp; newp += step) {
+ const int new_b = cost_branch256(ct, newp);
+ const int update_b = prob_diff_update_cost(newp, oldp) + vp10_cost_upd256;
+ const int savings = old_b - new_b - update_b;
+ if (savings > bestsavings) {
+ bestsavings = savings;
+ bestnewp = newp;
+ }
+ }
+ *bestp = bestnewp;
+ return bestsavings;
+}
+
+int vp10_prob_diff_update_savings_search_model(const unsigned int *ct,
+ const vpx_prob *oldp,
+ vpx_prob *bestp, vpx_prob upd,
+ int stepsize) {
+ int i, old_b, new_b, update_b, savings, bestsavings;
+ int newp;
+ const int step_sign = *bestp > oldp[PIVOT_NODE] ? -1 : 1;
+ const int step = stepsize * step_sign;
+ vpx_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES];
+ vp10_model_to_full_probs(oldp, oldplist);
+ memcpy(newplist, oldp, sizeof(vpx_prob) * UNCONSTRAINED_NODES);
+ for (i = UNCONSTRAINED_NODES, old_b = 0; i < ENTROPY_NODES; ++i)
+ old_b += cost_branch256(ct + 2 * i, oldplist[i]);
+ old_b += cost_branch256(ct + 2 * PIVOT_NODE, oldplist[PIVOT_NODE]);
+
+ bestsavings = 0;
+ bestnewp = oldp[PIVOT_NODE];
+
+ assert(stepsize > 0);
+
+ for (newp = *bestp; (newp - oldp[PIVOT_NODE]) * step_sign < 0; newp += step) {
+ if (newp < 1 || newp > 255) continue;
+ newplist[PIVOT_NODE] = newp;
+ vp10_model_to_full_probs(newplist, newplist);
+ for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i)
+ new_b += cost_branch256(ct + 2 * i, newplist[i]);
+ new_b += cost_branch256(ct + 2 * PIVOT_NODE, newplist[PIVOT_NODE]);
+ update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) + vp10_cost_upd256;
+ savings = old_b - new_b - update_b;
+ if (savings > bestsavings) {
+ bestsavings = savings;
+ bestnewp = newp;
+ }
+ }
+
+ *bestp = bestnewp;
+ return bestsavings;
+}
+
+#if CONFIG_ENTROPY
+static int get_cost(unsigned int ct[][2], vpx_prob p, int n) {
+ int i, p0 = p;
+ unsigned int total_ct[2] = { 0, 0 };
+ int cost = 0;
+
+ for (i = 0; i <= n; ++i) {
+ cost += cost_branch256(ct[i], p);
+ total_ct[0] += ct[i][0];
+ total_ct[1] += ct[i][1];
+ if (i < n)
+ p = vp10_merge_probs(p0, total_ct, COEF_COUNT_SAT_BITS,
+ COEF_MAX_UPDATE_FACTOR_BITS);
+ }
+ return cost;
+}
+
+int vp10_prob_update_search_subframe(unsigned int ct[][2], vpx_prob oldp,
+ vpx_prob *bestp, vpx_prob upd, int n) {
+ const int old_b = get_cost(ct, oldp, n);
+ int bestsavings = 0;
+ vpx_prob newp, bestnewp = oldp;
+ const int step = *bestp > oldp ? -1 : 1;
+
+ for (newp = *bestp; newp != oldp; newp += step) {
+ const int new_b = get_cost(ct, newp, n);
+ const int update_b = prob_diff_update_cost(newp, oldp) + vp10_cost_upd256;
+ const int savings = old_b - new_b - update_b;
+ if (savings > bestsavings) {
+ bestsavings = savings;
+ bestnewp = newp;
+ }
+ }
+ *bestp = bestnewp;
+ return bestsavings;
+}
+
+int vp10_prob_update_search_model_subframe(
+ unsigned int ct[ENTROPY_NODES][COEF_PROBS_BUFS][2], const vpx_prob *oldp,
+ vpx_prob *bestp, vpx_prob upd, int stepsize, int n) {
+ int i, old_b, new_b, update_b, savings, bestsavings;
+ int newp;
+ const int step_sign = *bestp > oldp[PIVOT_NODE] ? -1 : 1;
+ const int step = stepsize * step_sign;
+ vpx_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES];
+ vp10_model_to_full_probs(oldp, oldplist);
+ memcpy(newplist, oldp, sizeof(vpx_prob) * UNCONSTRAINED_NODES);
+ for (i = UNCONSTRAINED_NODES, old_b = 0; i < ENTROPY_NODES; ++i)
+ old_b += get_cost(ct[i], oldplist[i], n);
+ old_b += get_cost(ct[PIVOT_NODE], oldplist[PIVOT_NODE], n);
+
+ bestsavings = 0;
+ bestnewp = oldp[PIVOT_NODE];
+
+ assert(stepsize > 0);
+
+ for (newp = *bestp; (newp - oldp[PIVOT_NODE]) * step_sign < 0; newp += step) {
+ if (newp < 1 || newp > 255) continue;
+ newplist[PIVOT_NODE] = newp;
+ vp10_model_to_full_probs(newplist, newplist);
+ for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i)
+ new_b += get_cost(ct[i], newplist[i], n);
+ new_b += get_cost(ct[PIVOT_NODE], newplist[PIVOT_NODE], n);
+ update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) + vp10_cost_upd256;
+ savings = old_b - new_b - update_b;
+ if (savings > bestsavings) {
+ bestsavings = savings;
+ bestnewp = newp;
+ }
+ }
+
+ *bestp = bestnewp;
+ return bestsavings;
+}
+#endif // CONFIG_ENTROPY
+
+void vp10_cond_prob_diff_update(vp10_writer *w, vpx_prob *oldp,
+ const unsigned int ct[2]) {
+ const vpx_prob upd = DIFF_UPDATE_PROB;
+ vpx_prob newp = get_binary_prob(ct[0], ct[1]);
+ const int savings =
+ vp10_prob_diff_update_savings_search(ct, *oldp, &newp, upd);
+ assert(newp >= 1);
+ if (savings > 0) {
+ vp10_write(w, 1, upd);
+ vp10_write_prob_diff_update(w, newp, *oldp);
+ *oldp = newp;
+ } else {
+ vp10_write(w, 0, upd);
+ }
+}
+
+int vp10_cond_prob_diff_update_savings(vpx_prob *oldp,
+ const unsigned int ct[2]) {
+ const vpx_prob upd = DIFF_UPDATE_PROB;
+ vpx_prob newp = get_binary_prob(ct[0], ct[1]);
+ const int savings =
+ vp10_prob_diff_update_savings_search(ct, *oldp, &newp, upd);
+ return savings;
+}
+
+void vp10_write_primitive_symmetric(vp10_writer *w, int word,
+ unsigned int abs_bits) {
+ if (word == 0) {
+ vp10_write_bit(w, 0);
+ } else {
+ const int x = abs(word);
+ const int s = word < 0;
+ vp10_write_bit(w, 1);
+ vp10_write_bit(w, s);
+ vp10_write_literal(w, x - 1, abs_bits);
+ }
+}
diff --git a/av1/encoder/subexp.h b/av1/encoder/subexp.h
new file mode 100644
index 0000000..82ce2e0
--- /dev/null
+++ b/av1/encoder/subexp.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_SUBEXP_H_
+#define VP10_ENCODER_SUBEXP_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "aom_dsp/prob.h"
+
+struct vp10_writer;
+
+void vp10_write_prob_diff_update(struct vp10_writer *w, vpx_prob newp,
+ vpx_prob oldp);
+
+void vp10_cond_prob_diff_update(struct vp10_writer *w, vpx_prob *oldp,
+ const unsigned int ct[2]);
+
+int vp10_prob_diff_update_savings_search(const unsigned int *ct, vpx_prob oldp,
+ vpx_prob *bestp, vpx_prob upd);
+
+int vp10_prob_diff_update_savings_search_model(const unsigned int *ct,
+ const vpx_prob *oldp,
+ vpx_prob *bestp, vpx_prob upd,
+ int stepsize);
+int vp10_cond_prob_diff_update_savings(vpx_prob *oldp,
+ const unsigned int ct[2]);
+
+#if CONFIG_ENTROPY
+int vp10_prob_update_search_subframe(unsigned int ct[][2], vpx_prob oldp,
+ vpx_prob *bestp, vpx_prob upd, int n);
+int vp10_prob_update_search_model_subframe(
+ unsigned int ct[ENTROPY_NODES][COEF_PROBS_BUFS][2], const vpx_prob *oldp,
+ vpx_prob *bestp, vpx_prob upd, int stepsize, int n);
+#endif // CONFIG_ENTROPY
+
+//
+// mag_bits is number of bits for magnitude. The alphabet is of size
+// 2 * 2^mag_bits + 1, symmetric around 0, where one bit is used to
+// indicate 0 or non-zero, mag_bits bits are used to indicate magnitide
+// and 1 more bit for the sign if non-zero.
+void vp10_write_primitive_symmetric(vp10_writer *w, int word,
+ unsigned int mag_bits);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_SUBEXP_H_
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
new file mode 100644
index 0000000..32490cc
--- /dev/null
+++ b/av1/encoder/temporal_filter.c
@@ -0,0 +1,683 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <limits.h>
+
+#include "av1/common/alloccommon.h"
+#include "av1/common/onyxc_int.h"
+#include "av1/common/quant_common.h"
+#include "av1/common/reconinter.h"
+#include "av1/common/odintrin.h"
+#include "av1/encoder/extend.h"
+#include "av1/encoder/firstpass.h"
+#include "av1/encoder/mcomp.h"
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/quantize.h"
+#include "av1/encoder/ratectrl.h"
+#include "av1/encoder/segmentation.h"
+#include "av1/encoder/temporal_filter.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_mem/vpx_mem.h"
+#include "aom_ports/mem.h"
+#include "aom_ports/vpx_timer.h"
+#include "aom_scale/vpx_scale.h"
+
+static void temporal_filter_predictors_mb_c(
+ MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr,
+ int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col,
+ uint8_t *pred, struct scale_factors *scale, int x, int y) {
+ const int which_mv = 0;
+ const MV mv = { mv_row, mv_col };
+ enum mv_precision mv_precision_uv;
+ int uv_stride;
+
+#if USE_TEMPORALFILTER_12TAP
+#if CONFIG_DUAL_FILTER
+ const INTERP_FILTER interp_filter[4] = { TEMPORALFILTER_12TAP,
+ TEMPORALFILTER_12TAP,
+ TEMPORALFILTER_12TAP,
+ TEMPORALFILTER_12TAP };
+#else
+ const INTERP_FILTER interp_filter = TEMPORALFILTER_12TAP;
+#endif
+ (void)xd;
+#else
+ const INTERP_FILTER interp_filter = xd->mi[0]->mbmi.interp_filter;
+#endif // USE_TEMPORALFILTER_12TAP
+
+ if (uv_block_width == 8) {
+ uv_stride = (stride + 1) >> 1;
+ mv_precision_uv = MV_PRECISION_Q4;
+ } else {
+ uv_stride = stride;
+ mv_precision_uv = MV_PRECISION_Q3;
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp10_highbd_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv,
+ scale, 16, 16, which_mv, interp_filter,
+ MV_PRECISION_Q3, x, y, xd->bd);
+
+ vp10_highbd_build_inter_predictor(
+ u_mb_ptr, uv_stride, &pred[256], uv_block_width, &mv, scale,
+ uv_block_width, uv_block_height, which_mv, interp_filter,
+ mv_precision_uv, x, y, xd->bd);
+
+ vp10_highbd_build_inter_predictor(
+ v_mb_ptr, uv_stride, &pred[512], uv_block_width, &mv, scale,
+ uv_block_width, uv_block_height, which_mv, interp_filter,
+ mv_precision_uv, x, y, xd->bd);
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vp10_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16,
+ which_mv, interp_filter, MV_PRECISION_Q3, x, y);
+
+ vp10_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_width,
+ &mv, scale, uv_block_width, uv_block_height,
+ which_mv, interp_filter, mv_precision_uv, x, y);
+
+ vp10_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_width,
+ &mv, scale, uv_block_width, uv_block_height,
+ which_mv, interp_filter, mv_precision_uv, x, y);
+}
+
+void vp10_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride,
+ uint8_t *frame2, unsigned int block_width,
+ unsigned int block_height, int strength,
+ int filter_weight, unsigned int *accumulator,
+ uint16_t *count) {
+ unsigned int i, j, k;
+ int modifier;
+ int byte = 0;
+ const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
+
+ for (i = 0, k = 0; i < block_height; i++) {
+ for (j = 0; j < block_width; j++, k++) {
+ int pixel_value = *frame2;
+
+ // non-local mean approach
+ int diff_sse[9] = { 0 };
+ int idx, idy, index = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ int row = i + idy;
+ int col = j + idx;
+
+ if (row >= 0 && row < (int)block_height && col >= 0 &&
+ col < (int)block_width) {
+ int diff = frame1[byte + idy * (int)stride + idx] -
+ frame2[idy * (int)block_width + idx];
+ diff_sse[index] = diff * diff;
+ ++index;
+ }
+ }
+ }
+
+ assert(index > 0);
+
+ modifier = 0;
+ for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
+
+ modifier *= 3;
+ modifier /= index;
+
+ ++frame2;
+
+ modifier += rounding;
+ modifier >>= strength;
+
+ if (modifier > 16) modifier = 16;
+
+ modifier = 16 - modifier;
+ modifier *= filter_weight;
+
+ count[k] += modifier;
+ accumulator[k] += modifier * pixel_value;
+
+ byte++;
+ }
+
+ byte += stride - block_width;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_temporal_filter_apply_c(
+ uint8_t *frame1_8, unsigned int stride, uint8_t *frame2_8,
+ unsigned int block_width, unsigned int block_height, int strength,
+ int filter_weight, unsigned int *accumulator, uint16_t *count) {
+ uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
+ uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
+ unsigned int i, j, k;
+ int modifier;
+ int byte = 0;
+ const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
+
+ for (i = 0, k = 0; i < block_height; i++) {
+ for (j = 0; j < block_width; j++, k++) {
+ int pixel_value = *frame2;
+
+ // non-local mean approach
+ int diff_sse[9] = { 0 };
+ int idx, idy, index = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ int row = i + idy;
+ int col = j + idx;
+
+ if (row >= 0 && row < (int)block_height && col >= 0 &&
+ col < (int)block_width) {
+ int diff = frame1[byte + idy * (int)stride + idx] -
+ frame2[idy * (int)block_width + idx];
+ diff_sse[index] = diff * diff;
+ ++index;
+ }
+ }
+ }
+
+ assert(index > 0);
+
+ modifier = 0;
+ for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
+
+ modifier *= 3;
+ modifier /= index;
+
+ ++frame2;
+
+ modifier += rounding;
+ modifier >>= strength;
+
+ if (modifier > 16) modifier = 16;
+
+ modifier = 16 - modifier;
+ modifier *= filter_weight;
+
+ count[k] += modifier;
+ accumulator[k] += modifier * pixel_value;
+
+ byte++;
+ }
+
+ byte += stride - block_width;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static int temporal_filter_find_matching_mb_c(VP10_COMP *cpi,
+ uint8_t *arf_frame_buf,
+ uint8_t *frame_ptr_buf,
+ int stride) {
+ MACROBLOCK *const x = &cpi->td.mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
+ int step_param;
+ int sadpb = x->sadperbit16;
+ int bestsme = INT_MAX;
+ int distortion;
+ unsigned int sse;
+ int cost_list[5];
+
+ MV best_ref_mv1 = { 0, 0 };
+ MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
+
+ // Save input state
+ struct buf_2d src = x->plane[0].src;
+ struct buf_2d pre = xd->plane[0].pre[0];
+
+ best_ref_mv1_full.col = best_ref_mv1.col >> 3;
+ best_ref_mv1_full.row = best_ref_mv1.row >> 3;
+
+ // Setup frame pointers
+ x->plane[0].src.buf = arf_frame_buf;
+ x->plane[0].src.stride = stride;
+ xd->plane[0].pre[0].buf = frame_ptr_buf;
+ xd->plane[0].pre[0].stride = stride;
+
+ step_param = mv_sf->reduce_first_step_size;
+ step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
+
+#if CONFIG_REF_MV
+ x->mvcost = x->mv_cost_stack[0];
+ x->nmvjointcost = x->nmv_vec_cost[0];
+ x->mvsadcost = x->mvcost;
+ x->nmvjointsadcost = x->nmvjointcost;
+#endif
+
+ // Ignore mv costing by sending NULL pointer instead of cost arrays
+ vp10_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
+ cond_cost_list(cpi, cost_list), &cpi->fn_ptr[BLOCK_16X16], 0,
+ &best_ref_mv1);
+
+ // Ignore mv costing by sending NULL pointer instead of cost array
+ bestsme = cpi->find_fractional_mv_step(
+ x, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
+ &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_iters_per_step,
+ cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
+ 0);
+
+ x->e_mbd.mi[0]->bmi[0].as_mv[0] = x->best_mv;
+
+ // Restore input state
+ x->plane[0].src = src;
+ xd->plane[0].pre[0] = pre;
+
+ return bestsme;
+}
+
+static void temporal_filter_iterate_c(VP10_COMP *cpi,
+ YV12_BUFFER_CONFIG **frames,
+ int frame_count, int alt_ref_index,
+ int strength,
+ struct scale_factors *scale) {
+ int byte;
+ int frame;
+ int mb_col, mb_row;
+ unsigned int filter_weight;
+ int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4;
+ int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4;
+ int mb_y_offset = 0;
+ int mb_uv_offset = 0;
+ DECLARE_ALIGNED(16, unsigned int, accumulator[16 * 16 * 3]);
+ DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]);
+ MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
+ YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
+ uint8_t *dst1, *dst2;
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]);
+ DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]);
+ uint8_t *predictor;
+#else
+ DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]);
+#endif
+ const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y;
+ const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x;
+
+ // Save input state
+ uint8_t *input_buffer[MAX_MB_PLANE];
+ int i;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ predictor = CONVERT_TO_BYTEPTR(predictor16);
+ } else {
+ predictor = predictor8;
+ }
+#endif
+
+ for (i = 0; i < MAX_MB_PLANE; i++) input_buffer[i] = mbd->plane[i].pre[0].buf;
+
+ for (mb_row = 0; mb_row < mb_rows; mb_row++) {
+ // Source frames are extended to 16 pixels. This is different than
+ // L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS)
+ // A 6/8 tap filter is used for motion search. This requires 2 pixels
+ // before and 3 pixels after. So the largest Y mv on a border would
+ // then be 16 - VPX_INTERP_EXTEND. The UV blocks are half the size of the
+ // Y and therefore only extended by 8. The largest mv that a UV block
+ // can support is 8 - VPX_INTERP_EXTEND. A UV mv is half of a Y mv.
+ // (16 - VPX_INTERP_EXTEND) >> 1 which is greater than
+ // 8 - VPX_INTERP_EXTEND.
+ // To keep the mv in play for both Y and UV planes the max that it
+ // can be on a border is therefore 16 - (2*VPX_INTERP_EXTEND+1).
+ cpi->td.mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VPX_INTERP_EXTEND));
+ cpi->td.mb.mv_row_max =
+ ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * VPX_INTERP_EXTEND);
+
+ for (mb_col = 0; mb_col < mb_cols; mb_col++) {
+ int i, j, k;
+ int stride;
+
+ memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));
+ memset(count, 0, 16 * 16 * 3 * sizeof(count[0]));
+
+ cpi->td.mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VPX_INTERP_EXTEND));
+ cpi->td.mb.mv_col_max =
+ ((mb_cols - 1 - mb_col) * 16) + (17 - 2 * VPX_INTERP_EXTEND);
+
+ for (frame = 0; frame < frame_count; frame++) {
+ const int thresh_low = 10000;
+ const int thresh_high = 20000;
+
+ if (frames[frame] == NULL) continue;
+
+ mbd->mi[0]->bmi[0].as_mv[0].as_mv.row = 0;
+ mbd->mi[0]->bmi[0].as_mv[0].as_mv.col = 0;
+
+ if (frame == alt_ref_index) {
+ filter_weight = 2;
+ } else {
+ // Find best match in this frame by MC
+ int err = temporal_filter_find_matching_mb_c(
+ cpi, frames[alt_ref_index]->y_buffer + mb_y_offset,
+ frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride);
+
+ // Assign higher weight to matching MB if it's error
+ // score is lower. If not applying MC default behavior
+ // is to weight all MBs equal.
+ filter_weight = err < thresh_low ? 2 : err < thresh_high ? 1 : 0;
+ }
+
+ if (filter_weight != 0) {
+ // Construct the predictors
+ temporal_filter_predictors_mb_c(
+ mbd, frames[frame]->y_buffer + mb_y_offset,
+ frames[frame]->u_buffer + mb_uv_offset,
+ frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride,
+ mb_uv_width, mb_uv_height, mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
+ mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, predictor, scale,
+ mb_col * 16, mb_row * 16);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ int adj_strength = strength + 2 * (mbd->bd - 8);
+ // Apply the filter (YUV)
+ vp10_highbd_temporal_filter_apply(
+ f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16,
+ adj_strength, filter_weight, accumulator, count);
+ vp10_highbd_temporal_filter_apply(
+ f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
+ mb_uv_width, mb_uv_height, adj_strength, filter_weight,
+ accumulator + 256, count + 256);
+ vp10_highbd_temporal_filter_apply(
+ f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
+ mb_uv_width, mb_uv_height, adj_strength, filter_weight,
+ accumulator + 512, count + 512);
+ } else {
+ // Apply the filter (YUV)
+ vp10_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
+ predictor, 16, 16, strength,
+ filter_weight, accumulator, count);
+ vp10_temporal_filter_apply_c(
+ f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
+ mb_uv_width, mb_uv_height, strength, filter_weight,
+ accumulator + 256, count + 256);
+ vp10_temporal_filter_apply_c(
+ f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
+ mb_uv_width, mb_uv_height, strength, filter_weight,
+ accumulator + 512, count + 512);
+ }
+#else
+ // Apply the filter (YUV)
+ vp10_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride,
+ predictor, 16, 16, strength,
+ filter_weight, accumulator, count);
+ vp10_temporal_filter_apply_c(f->u_buffer + mb_uv_offset, f->uv_stride,
+ predictor + 256, mb_uv_width,
+ mb_uv_height, strength, filter_weight,
+ accumulator + 256, count + 256);
+ vp10_temporal_filter_apply_c(f->v_buffer + mb_uv_offset, f->uv_stride,
+ predictor + 512, mb_uv_width,
+ mb_uv_height, strength, filter_weight,
+ accumulator + 512, count + 512);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ }
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ uint16_t *dst1_16;
+ uint16_t *dst2_16;
+ // Normalize filter output to produce AltRef frame
+ dst1 = cpi->alt_ref_buffer.y_buffer;
+ dst1_16 = CONVERT_TO_SHORTPTR(dst1);
+ stride = cpi->alt_ref_buffer.y_stride;
+ byte = mb_y_offset;
+ for (i = 0, k = 0; i < 16; i++) {
+ for (j = 0; j < 16; j++, k++) {
+ dst1_16[byte] =
+ (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
+
+ // move to next pixel
+ byte++;
+ }
+
+ byte += stride - 16;
+ }
+
+ dst1 = cpi->alt_ref_buffer.u_buffer;
+ dst2 = cpi->alt_ref_buffer.v_buffer;
+ dst1_16 = CONVERT_TO_SHORTPTR(dst1);
+ dst2_16 = CONVERT_TO_SHORTPTR(dst2);
+ stride = cpi->alt_ref_buffer.uv_stride;
+ byte = mb_uv_offset;
+ for (i = 0, k = 256; i < mb_uv_height; i++) {
+ for (j = 0; j < mb_uv_width; j++, k++) {
+ int m = k + 256;
+
+ // U
+ dst1_16[byte] =
+ (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
+
+ // V
+ dst2_16[byte] =
+ (uint16_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
+
+ // move to next pixel
+ byte++;
+ }
+
+ byte += stride - mb_uv_width;
+ }
+ } else {
+ // Normalize filter output to produce AltRef frame
+ dst1 = cpi->alt_ref_buffer.y_buffer;
+ stride = cpi->alt_ref_buffer.y_stride;
+ byte = mb_y_offset;
+ for (i = 0, k = 0; i < 16; i++) {
+ for (j = 0; j < 16; j++, k++) {
+ dst1[byte] =
+ (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
+
+ // move to next pixel
+ byte++;
+ }
+ byte += stride - 16;
+ }
+
+ dst1 = cpi->alt_ref_buffer.u_buffer;
+ dst2 = cpi->alt_ref_buffer.v_buffer;
+ stride = cpi->alt_ref_buffer.uv_stride;
+ byte = mb_uv_offset;
+ for (i = 0, k = 256; i < mb_uv_height; i++) {
+ for (j = 0; j < mb_uv_width; j++, k++) {
+ int m = k + 256;
+
+ // U
+ dst1[byte] =
+ (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
+
+ // V
+ dst2[byte] =
+ (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
+
+ // move to next pixel
+ byte++;
+ }
+ byte += stride - mb_uv_width;
+ }
+ }
+#else
+ // Normalize filter output to produce AltRef frame
+ dst1 = cpi->alt_ref_buffer.y_buffer;
+ stride = cpi->alt_ref_buffer.y_stride;
+ byte = mb_y_offset;
+ for (i = 0, k = 0; i < 16; i++) {
+ for (j = 0; j < 16; j++, k++) {
+ dst1[byte] =
+ (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
+
+ // move to next pixel
+ byte++;
+ }
+ byte += stride - 16;
+ }
+
+ dst1 = cpi->alt_ref_buffer.u_buffer;
+ dst2 = cpi->alt_ref_buffer.v_buffer;
+ stride = cpi->alt_ref_buffer.uv_stride;
+ byte = mb_uv_offset;
+ for (i = 0, k = 256; i < mb_uv_height; i++) {
+ for (j = 0; j < mb_uv_width; j++, k++) {
+ int m = k + 256;
+
+ // U
+ dst1[byte] =
+ (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
+
+ // V
+ dst2[byte] =
+ (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
+
+ // move to next pixel
+ byte++;
+ }
+ byte += stride - mb_uv_width;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ mb_y_offset += 16;
+ mb_uv_offset += mb_uv_width;
+ }
+ mb_y_offset += 16 * (f->y_stride - mb_cols);
+ mb_uv_offset += mb_uv_height * f->uv_stride - mb_uv_width * mb_cols;
+ }
+
+ // Restore input state
+ for (i = 0; i < MAX_MB_PLANE; i++) mbd->plane[i].pre[0].buf = input_buffer[i];
+}
+
+// Apply buffer limits and context specific adjustments to arnr filter.
+static void adjust_arnr_filter(VP10_COMP *cpi, int distance, int group_boost,
+ int *arnr_frames, int *arnr_strength) {
+ const VP10EncoderConfig *const oxcf = &cpi->oxcf;
+ const int frames_after_arf =
+ vp10_lookahead_depth(cpi->lookahead) - distance - 1;
+ int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1;
+ int frames_bwd;
+ int q, frames, strength;
+
+ // Define the forward and backwards filter limits for this arnr group.
+ if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf;
+ if (frames_fwd > distance) frames_fwd = distance;
+
+ frames_bwd = frames_fwd;
+
+ // For even length filter there is one more frame backward
+ // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
+ if (frames_bwd < distance) frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1;
+
+ // Set the baseline active filter size.
+ frames = frames_bwd + 1 + frames_fwd;
+
+ // Adjust the strength based on active max q.
+ if (cpi->common.current_video_frame > 1)
+ q = ((int)vp10_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
+ cpi->common.bit_depth));
+ else
+ q = ((int)vp10_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME],
+ cpi->common.bit_depth));
+ if (q > 16) {
+ strength = oxcf->arnr_strength;
+ } else {
+ strength = oxcf->arnr_strength - ((16 - q) / 2);
+ if (strength < 0) strength = 0;
+ }
+
+ // Adjust number of frames in filter and strength based on gf boost level.
+ if (frames > group_boost / 150) {
+ frames = group_boost / 150;
+ frames += !(frames & 1);
+ }
+
+ if (strength > group_boost / 300) {
+ strength = group_boost / 300;
+ }
+
+ // Adjustments for second level arf in multi arf case.
+ if (cpi->oxcf.pass == 2 && cpi->multi_arf_allowed) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ if (gf_group->rf_level[gf_group->index] != GF_ARF_STD) {
+ strength >>= 1;
+ }
+ }
+
+ *arnr_frames = frames;
+ *arnr_strength = strength;
+}
+
+void vp10_temporal_filter(VP10_COMP *cpi, int distance) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ int frame;
+ int frames_to_blur;
+ int start_frame;
+ int strength;
+ int frames_to_blur_backward;
+ int frames_to_blur_forward;
+ struct scale_factors sf;
+ YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
+#if CONFIG_EXT_REFS
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+#endif
+
+ // Apply context specific adjustments to the arnr filter parameters.
+ adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength);
+// TODO(weitinglin): Currently, we enforce the filtering strength on
+// extra ARFs' to be zeros. We should investigate in which
+// case it is more beneficial to use non-zero strength
+// filtering.
+#if CONFIG_EXT_REFS
+ if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW) {
+ strength = 0;
+ frames_to_blur = 1;
+ }
+#endif
+
+#if CONFIG_EXT_REFS
+ if (strength == 0 && frames_to_blur == 1) {
+ cpi->is_arf_filter_off[gf_group->arf_update_idx[gf_group->index]] = 1;
+ } else {
+ cpi->is_arf_filter_off[gf_group->arf_update_idx[gf_group->index]] = 0;
+ }
+#endif
+
+ frames_to_blur_backward = (frames_to_blur / 2);
+ frames_to_blur_forward = ((frames_to_blur - 1) / 2);
+ start_frame = distance + frames_to_blur_forward;
+
+ // Setup frame pointers, NULL indicates frame not included in filter.
+ for (frame = 0; frame < frames_to_blur; ++frame) {
+ const int which_buffer = start_frame - frame;
+ struct lookahead_entry *buf =
+ vp10_lookahead_peek(cpi->lookahead, which_buffer);
+ frames[frames_to_blur - 1 - frame] = &buf->img;
+ }
+
+ if (frames_to_blur > 0) {
+// Setup scaling factors. Scaling on each of the arnr frames is not
+// supported.
+// ARF is produced at the native frame size and resized when coded.
+#if CONFIG_VP9_HIGHBITDEPTH
+ vp10_setup_scale_factors_for_frame(
+ &sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
+ frames[0]->y_crop_width, frames[0]->y_crop_height,
+ cpi->common.use_highbitdepth);
+#else
+ vp10_setup_scale_factors_for_frame(
+ &sf, frames[0]->y_crop_width, frames[0]->y_crop_height,
+ frames[0]->y_crop_width, frames[0]->y_crop_height);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+
+ temporal_filter_iterate_c(cpi, frames, frames_to_blur,
+ frames_to_blur_backward, strength, &sf);
+}
diff --git a/av1/encoder/temporal_filter.h b/av1/encoder/temporal_filter.h
new file mode 100644
index 0000000..ce5291a
--- /dev/null
+++ b/av1/encoder/temporal_filter.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_TEMPORAL_FILTER_H_
+#define VP10_ENCODER_TEMPORAL_FILTER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_temporal_filter(VP10_COMP *cpi, int distance);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_TEMPORAL_FILTER_H_
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
new file mode 100644
index 0000000..c841fa6
--- /dev/null
+++ b/av1/encoder/tokenize.c
@@ -0,0 +1,737 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "aom_mem/vpx_mem.h"
+
+#include "av1/common/entropy.h"
+#include "av1/common/pred_common.h"
+#include "av1/common/scan.h"
+#include "av1/common/seg_common.h"
+
+#include "av1/encoder/cost.h"
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/tokenize.h"
+
+static const TOKENVALUE dct_cat_lt_10_value_tokens[] = {
+ { 9, 63 }, { 9, 61 }, { 9, 59 }, { 9, 57 }, { 9, 55 }, { 9, 53 }, { 9, 51 },
+ { 9, 49 }, { 9, 47 }, { 9, 45 }, { 9, 43 }, { 9, 41 }, { 9, 39 }, { 9, 37 },
+ { 9, 35 }, { 9, 33 }, { 9, 31 }, { 9, 29 }, { 9, 27 }, { 9, 25 }, { 9, 23 },
+ { 9, 21 }, { 9, 19 }, { 9, 17 }, { 9, 15 }, { 9, 13 }, { 9, 11 }, { 9, 9 },
+ { 9, 7 }, { 9, 5 }, { 9, 3 }, { 9, 1 }, { 8, 31 }, { 8, 29 }, { 8, 27 },
+ { 8, 25 }, { 8, 23 }, { 8, 21 }, { 8, 19 }, { 8, 17 }, { 8, 15 }, { 8, 13 },
+ { 8, 11 }, { 8, 9 }, { 8, 7 }, { 8, 5 }, { 8, 3 }, { 8, 1 }, { 7, 15 },
+ { 7, 13 }, { 7, 11 }, { 7, 9 }, { 7, 7 }, { 7, 5 }, { 7, 3 }, { 7, 1 },
+ { 6, 7 }, { 6, 5 }, { 6, 3 }, { 6, 1 }, { 5, 3 }, { 5, 1 }, { 4, 1 },
+ { 3, 1 }, { 2, 1 }, { 1, 1 }, { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },
+ { 4, 0 }, { 5, 0 }, { 5, 2 }, { 6, 0 }, { 6, 2 }, { 6, 4 }, { 6, 6 },
+ { 7, 0 }, { 7, 2 }, { 7, 4 }, { 7, 6 }, { 7, 8 }, { 7, 10 }, { 7, 12 },
+ { 7, 14 }, { 8, 0 }, { 8, 2 }, { 8, 4 }, { 8, 6 }, { 8, 8 }, { 8, 10 },
+ { 8, 12 }, { 8, 14 }, { 8, 16 }, { 8, 18 }, { 8, 20 }, { 8, 22 }, { 8, 24 },
+ { 8, 26 }, { 8, 28 }, { 8, 30 }, { 9, 0 }, { 9, 2 }, { 9, 4 }, { 9, 6 },
+ { 9, 8 }, { 9, 10 }, { 9, 12 }, { 9, 14 }, { 9, 16 }, { 9, 18 }, { 9, 20 },
+ { 9, 22 }, { 9, 24 }, { 9, 26 }, { 9, 28 }, { 9, 30 }, { 9, 32 }, { 9, 34 },
+ { 9, 36 }, { 9, 38 }, { 9, 40 }, { 9, 42 }, { 9, 44 }, { 9, 46 }, { 9, 48 },
+ { 9, 50 }, { 9, 52 }, { 9, 54 }, { 9, 56 }, { 9, 58 }, { 9, 60 }, { 9, 62 }
+};
+const TOKENVALUE *vp10_dct_cat_lt_10_value_tokens =
+ dct_cat_lt_10_value_tokens +
+ (sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens)) /
+ 2;
+// The corresponding costs of the extrabits for the tokens in the above table
+// are stored in the table below. The values are obtained from looking up the
+// entry for the specified extrabits in the table corresponding to the token
+// (as defined in cost element vp10_extra_bits)
+// e.g. {9, 63} maps to cat5_cost[63 >> 1], {1, 1} maps to sign_cost[1 >> 1]
+static const int dct_cat_lt_10_value_cost[] = {
+ 3773, 3750, 3704, 3681, 3623, 3600, 3554, 3531, 3432, 3409, 3363, 3340, 3282,
+ 3259, 3213, 3190, 3136, 3113, 3067, 3044, 2986, 2963, 2917, 2894, 2795, 2772,
+ 2726, 2703, 2645, 2622, 2576, 2553, 3197, 3116, 3058, 2977, 2881, 2800, 2742,
+ 2661, 2615, 2534, 2476, 2395, 2299, 2218, 2160, 2079, 2566, 2427, 2334, 2195,
+ 2023, 1884, 1791, 1652, 1893, 1696, 1453, 1256, 1229, 864, 512, 512, 512,
+ 512, 0, 512, 512, 512, 512, 864, 1229, 1256, 1453, 1696, 1893, 1652,
+ 1791, 1884, 2023, 2195, 2334, 2427, 2566, 2079, 2160, 2218, 2299, 2395, 2476,
+ 2534, 2615, 2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197, 2553, 2576, 2622,
+ 2645, 2703, 2726, 2772, 2795, 2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136,
+ 3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432, 3531, 3554, 3600, 3623, 3681,
+ 3704, 3750, 3773,
+};
+const int *vp10_dct_cat_lt_10_value_cost =
+ dct_cat_lt_10_value_cost +
+ (sizeof(dct_cat_lt_10_value_cost) / sizeof(*dct_cat_lt_10_value_cost)) / 2;
+
+// Array indices are identical to previously-existing CONTEXT_NODE indices
+/* clang-format off */
+const vpx_tree_index vp10_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
+ -EOB_TOKEN, 2, // 0 = EOB
+ -ZERO_TOKEN, 4, // 1 = ZERO
+ -ONE_TOKEN, 6, // 2 = ONE
+ 8, 12, // 3 = LOW_VAL
+ -TWO_TOKEN, 10, // 4 = TWO
+ -THREE_TOKEN, -FOUR_TOKEN, // 5 = THREE
+ 14, 16, // 6 = HIGH_LOW
+ -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 7 = CAT_ONE
+ 18, 20, // 8 = CAT_THREEFOUR
+ -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 9 = CAT_THREE
+ -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 10 = CAT_FIVE
+};
+/* clang-format on */
+
+static const vpx_tree_index cat1[2] = { 0, 0 };
+static const vpx_tree_index cat2[4] = { 2, 2, 0, 0 };
+static const vpx_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 };
+static const vpx_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 };
+static const vpx_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 };
+static const vpx_tree_index cat6[28] = { 2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
+ 12, 12, 14, 14, 16, 16, 18, 18, 20, 20,
+ 22, 22, 24, 24, 26, 26, 0, 0 };
+
+static const int16_t zero_cost[] = { 0 };
+static const int16_t sign_cost[1] = { 512 };
+static const int16_t cat1_cost[1 << 1] = { 864, 1229 };
+static const int16_t cat2_cost[1 << 2] = { 1256, 1453, 1696, 1893 };
+static const int16_t cat3_cost[1 << 3] = { 1652, 1791, 1884, 2023,
+ 2195, 2334, 2427, 2566 };
+static const int16_t cat4_cost[1 << 4] = { 2079, 2160, 2218, 2299, 2395, 2476,
+ 2534, 2615, 2661, 2742, 2800, 2881,
+ 2977, 3058, 3116, 3197 };
+static const int16_t cat5_cost[1 << 5] = {
+ 2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795, 2894, 2917, 2963,
+ 2986, 3044, 3067, 3113, 3136, 3190, 3213, 3259, 3282, 3340, 3363,
+ 3409, 3432, 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773
+};
+const int16_t vp10_cat6_low_cost[256] = {
+ 3378, 3390, 3401, 3413, 3435, 3447, 3458, 3470, 3517, 3529, 3540, 3552, 3574,
+ 3586, 3597, 3609, 3671, 3683, 3694, 3706, 3728, 3740, 3751, 3763, 3810, 3822,
+ 3833, 3845, 3867, 3879, 3890, 3902, 3973, 3985, 3996, 4008, 4030, 4042, 4053,
+ 4065, 4112, 4124, 4135, 4147, 4169, 4181, 4192, 4204, 4266, 4278, 4289, 4301,
+ 4323, 4335, 4346, 4358, 4405, 4417, 4428, 4440, 4462, 4474, 4485, 4497, 4253,
+ 4265, 4276, 4288, 4310, 4322, 4333, 4345, 4392, 4404, 4415, 4427, 4449, 4461,
+ 4472, 4484, 4546, 4558, 4569, 4581, 4603, 4615, 4626, 4638, 4685, 4697, 4708,
+ 4720, 4742, 4754, 4765, 4777, 4848, 4860, 4871, 4883, 4905, 4917, 4928, 4940,
+ 4987, 4999, 5010, 5022, 5044, 5056, 5067, 5079, 5141, 5153, 5164, 5176, 5198,
+ 5210, 5221, 5233, 5280, 5292, 5303, 5315, 5337, 5349, 5360, 5372, 4988, 5000,
+ 5011, 5023, 5045, 5057, 5068, 5080, 5127, 5139, 5150, 5162, 5184, 5196, 5207,
+ 5219, 5281, 5293, 5304, 5316, 5338, 5350, 5361, 5373, 5420, 5432, 5443, 5455,
+ 5477, 5489, 5500, 5512, 5583, 5595, 5606, 5618, 5640, 5652, 5663, 5675, 5722,
+ 5734, 5745, 5757, 5779, 5791, 5802, 5814, 5876, 5888, 5899, 5911, 5933, 5945,
+ 5956, 5968, 6015, 6027, 6038, 6050, 6072, 6084, 6095, 6107, 5863, 5875, 5886,
+ 5898, 5920, 5932, 5943, 5955, 6002, 6014, 6025, 6037, 6059, 6071, 6082, 6094,
+ 6156, 6168, 6179, 6191, 6213, 6225, 6236, 6248, 6295, 6307, 6318, 6330, 6352,
+ 6364, 6375, 6387, 6458, 6470, 6481, 6493, 6515, 6527, 6538, 6550, 6597, 6609,
+ 6620, 6632, 6654, 6666, 6677, 6689, 6751, 6763, 6774, 6786, 6808, 6820, 6831,
+ 6843, 6890, 6902, 6913, 6925, 6947, 6959, 6970, 6982
+};
+const int vp10_cat6_high_cost[64] = {
+ 88, 2251, 2727, 4890, 3148, 5311, 5787, 7950, 3666, 5829, 6305,
+ 8468, 6726, 8889, 9365, 11528, 3666, 5829, 6305, 8468, 6726, 8889,
+ 9365, 11528, 7244, 9407, 9883, 12046, 10304, 12467, 12943, 15106, 3666,
+ 5829, 6305, 8468, 6726, 8889, 9365, 11528, 7244, 9407, 9883, 12046,
+ 10304, 12467, 12943, 15106, 7244, 9407, 9883, 12046, 10304, 12467, 12943,
+ 15106, 10822, 12985, 13461, 15624, 13882, 16045, 16521, 18684
+};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+const int vp10_cat6_high10_high_cost[256] = {
+ 94, 2257, 2733, 4896, 3154, 5317, 5793, 7956, 3672, 5835, 6311,
+ 8474, 6732, 8895, 9371, 11534, 3672, 5835, 6311, 8474, 6732, 8895,
+ 9371, 11534, 7250, 9413, 9889, 12052, 10310, 12473, 12949, 15112, 3672,
+ 5835, 6311, 8474, 6732, 8895, 9371, 11534, 7250, 9413, 9889, 12052,
+ 10310, 12473, 12949, 15112, 7250, 9413, 9889, 12052, 10310, 12473, 12949,
+ 15112, 10828, 12991, 13467, 15630, 13888, 16051, 16527, 18690, 4187, 6350,
+ 6826, 8989, 7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825,
+ 12988, 13464, 15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627,
+ 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404,
+ 12567, 10825, 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566,
+ 17042, 19205, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921,
+ 17084, 17560, 19723, 17981, 20144, 20620, 22783, 4187, 6350, 6826, 8989,
+ 7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825, 12988, 13464,
+ 15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, 11343, 13506,
+ 13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404, 12567, 10825,
+ 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205,
+ 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921, 17084, 17560,
+ 19723, 17981, 20144, 20620, 22783, 8280, 10443, 10919, 13082, 11340, 13503,
+ 13979, 16142, 11858, 14021, 14497, 16660, 14918, 17081, 17557, 19720, 11858,
+ 14021, 14497, 16660, 14918, 17081, 17557, 19720, 15436, 17599, 18075, 20238,
+ 18496, 20659, 21135, 23298, 11858, 14021, 14497, 16660, 14918, 17081, 17557,
+ 19720, 15436, 17599, 18075, 20238, 18496, 20659, 21135, 23298, 15436, 17599,
+ 18075, 20238, 18496, 20659, 21135, 23298, 19014, 21177, 21653, 23816, 22074,
+ 24237, 24713, 26876
+};
+const int vp10_cat6_high12_high_cost[1024] = {
+ 100, 2263, 2739, 4902, 3160, 5323, 5799, 7962, 3678, 5841, 6317,
+ 8480, 6738, 8901, 9377, 11540, 3678, 5841, 6317, 8480, 6738, 8901,
+ 9377, 11540, 7256, 9419, 9895, 12058, 10316, 12479, 12955, 15118, 3678,
+ 5841, 6317, 8480, 6738, 8901, 9377, 11540, 7256, 9419, 9895, 12058,
+ 10316, 12479, 12955, 15118, 7256, 9419, 9895, 12058, 10316, 12479, 12955,
+ 15118, 10834, 12997, 13473, 15636, 13894, 16057, 16533, 18696, 4193, 6356,
+ 6832, 8995, 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831,
+ 12994, 13470, 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633,
+ 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410,
+ 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572,
+ 17048, 19211, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927,
+ 17090, 17566, 19729, 17987, 20150, 20626, 22789, 4193, 6356, 6832, 8995,
+ 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470,
+ 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512,
+ 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831,
+ 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211,
+ 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566,
+ 19729, 17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509,
+ 13985, 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864,
+ 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244,
+ 18502, 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563,
+ 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605,
+ 18081, 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080,
+ 24243, 24719, 26882, 4193, 6356, 6832, 8995, 7253, 9416, 9892, 12055,
+ 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 7771, 9934, 10410,
+ 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572,
+ 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349,
+ 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349, 13512, 13988, 16151,
+ 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729, 17987, 20150, 20626,
+ 22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027,
+ 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924,
+ 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304,
+ 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081,
+ 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665,
+ 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 8286,
+ 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666,
+ 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563,
+ 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027,
+ 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502,
+ 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304,
+ 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018,
+ 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180,
+ 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535,
+ 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759,
+ 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234,
+ 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276,
+ 25752, 27915, 26173, 28336, 28812, 30975, 4193, 6356, 6832, 8995, 7253,
+ 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633,
+ 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988,
+ 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994,
+ 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349,
+ 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729,
+ 17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985,
+ 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027,
+ 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502,
+ 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726,
+ 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081,
+ 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243,
+ 24719, 26882, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864,
+ 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666,
+ 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141,
+ 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605,
+ 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502,
+ 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882,
+ 12379, 14542, 15018, 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596,
+ 20759, 19017, 21180, 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180,
+ 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957,
+ 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337,
+ 22595, 24758, 25234, 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234,
+ 27397, 23113, 25276, 25752, 27915, 26173, 28336, 28812, 30975, 8286, 10449,
+ 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, 14924,
+ 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726,
+ 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, 14503,
+ 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665,
+ 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 19020,
+ 21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018, 17181,
+ 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656,
+ 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698,
+ 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017,
+ 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397,
+ 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752,
+ 27915, 26173, 28336, 28812, 30975, 12379, 14542, 15018, 17181, 15439, 17602,
+ 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 15957,
+ 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337,
+ 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017, 21180, 21656,
+ 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 19535, 21698,
+ 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752, 27915, 26173,
+ 28336, 28812, 30975, 16472, 18635, 19111, 21274, 19532, 21695, 22171, 24334,
+ 20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 20050, 22213, 22689,
+ 24852, 23110, 25273, 25749, 27912, 23628, 25791, 26267, 28430, 26688, 28851,
+ 29327, 31490, 20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 23628,
+ 25791, 26267, 28430, 26688, 28851, 29327, 31490, 23628, 25791, 26267, 28430,
+ 26688, 28851, 29327, 31490, 27206, 29369, 29845, 32008, 30266, 32429, 32905,
+ 35068
+};
+#endif
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static const vpx_tree_index cat1_high10[2] = { 0, 0 };
+static const vpx_tree_index cat2_high10[4] = { 2, 2, 0, 0 };
+static const vpx_tree_index cat3_high10[6] = { 2, 2, 4, 4, 0, 0 };
+static const vpx_tree_index cat4_high10[8] = { 2, 2, 4, 4, 6, 6, 0, 0 };
+static const vpx_tree_index cat5_high10[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 };
+static const vpx_tree_index cat6_high10[32] = { 2, 2, 4, 4, 6, 6, 8, 8,
+ 10, 10, 12, 12, 14, 14, 16, 16,
+ 18, 18, 20, 20, 22, 22, 24, 24,
+ 26, 26, 28, 28, 30, 30, 0, 0 };
+static const vpx_tree_index cat1_high12[2] = { 0, 0 };
+static const vpx_tree_index cat2_high12[4] = { 2, 2, 0, 0 };
+static const vpx_tree_index cat3_high12[6] = { 2, 2, 4, 4, 0, 0 };
+static const vpx_tree_index cat4_high12[8] = { 2, 2, 4, 4, 6, 6, 0, 0 };
+static const vpx_tree_index cat5_high12[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 };
+static const vpx_tree_index cat6_high12[36] = {
+ 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, 16, 16, 18, 18,
+ 20, 20, 22, 22, 24, 24, 26, 26, 28, 28, 30, 30, 32, 32, 34, 34, 0, 0
+};
+#endif
+
+const vp10_extra_bit vp10_extra_bits[ENTROPY_TOKENS] = {
+ { 0, 0, 0, 0, zero_cost }, // ZERO_TOKEN
+ { 0, 0, 0, 1, sign_cost }, // ONE_TOKEN
+ { 0, 0, 0, 2, sign_cost }, // TWO_TOKEN
+ { 0, 0, 0, 3, sign_cost }, // THREE_TOKEN
+ { 0, 0, 0, 4, sign_cost }, // FOUR_TOKEN
+ { cat1, vp10_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost }, // CATEGORY1_TOKEN
+ { cat2, vp10_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost }, // CATEGORY2_TOKEN
+ { cat3, vp10_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost }, // CATEGORY3_TOKEN
+ { cat4, vp10_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost }, // CATEGORY4_TOKEN
+ { cat5, vp10_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost }, // CATEGORY5_TOKEN
+ { cat6, vp10_cat6_prob, 14, CAT6_MIN_VAL, 0 }, // CATEGORY6_TOKEN
+ { 0, 0, 0, 0, zero_cost } // EOB_TOKEN
+};
+
+#if CONFIG_VP9_HIGHBITDEPTH
+const vp10_extra_bit vp10_extra_bits_high10[ENTROPY_TOKENS] = {
+ { 0, 0, 0, 0, zero_cost }, // ZERO
+ { 0, 0, 0, 1, sign_cost }, // ONE
+ { 0, 0, 0, 2, sign_cost }, // TWO
+ { 0, 0, 0, 3, sign_cost }, // THREE
+ { 0, 0, 0, 4, sign_cost }, // FOUR
+ { cat1_high10, vp10_cat1_prob_high10, 1, CAT1_MIN_VAL, cat1_cost }, // CAT1
+ { cat2_high10, vp10_cat2_prob_high10, 2, CAT2_MIN_VAL, cat2_cost }, // CAT2
+ { cat3_high10, vp10_cat3_prob_high10, 3, CAT3_MIN_VAL, cat3_cost }, // CAT3
+ { cat4_high10, vp10_cat4_prob_high10, 4, CAT4_MIN_VAL, cat4_cost }, // CAT4
+ { cat5_high10, vp10_cat5_prob_high10, 5, CAT5_MIN_VAL, cat5_cost }, // CAT5
+ { cat6_high10, vp10_cat6_prob_high10, 16, CAT6_MIN_VAL, 0 }, // CAT6
+ { 0, 0, 0, 0, zero_cost } // EOB
+};
+const vp10_extra_bit vp10_extra_bits_high12[ENTROPY_TOKENS] = {
+ { 0, 0, 0, 0, zero_cost }, // ZERO
+ { 0, 0, 0, 1, sign_cost }, // ONE
+ { 0, 0, 0, 2, sign_cost }, // TWO
+ { 0, 0, 0, 3, sign_cost }, // THREE
+ { 0, 0, 0, 4, sign_cost }, // FOUR
+ { cat1_high12, vp10_cat1_prob_high12, 1, CAT1_MIN_VAL, cat1_cost }, // CAT1
+ { cat2_high12, vp10_cat2_prob_high12, 2, CAT2_MIN_VAL, cat2_cost }, // CAT2
+ { cat3_high12, vp10_cat3_prob_high12, 3, CAT3_MIN_VAL, cat3_cost }, // CAT3
+ { cat4_high12, vp10_cat4_prob_high12, 4, CAT4_MIN_VAL, cat4_cost }, // CAT4
+ { cat5_high12, vp10_cat5_prob_high12, 5, CAT5_MIN_VAL, cat5_cost }, // CAT5
+ { cat6_high12, vp10_cat6_prob_high12, 18, CAT6_MIN_VAL, 0 }, // CAT6
+ { 0, 0, 0, 0, zero_cost } // EOB
+};
+#endif
+
+#if !CONFIG_ANS
+const struct vp10_token vp10_coef_encodings[ENTROPY_TOKENS] = {
+ { 2, 2 }, { 6, 3 }, { 28, 5 }, { 58, 6 }, { 59, 6 }, { 60, 6 },
+ { 61, 6 }, { 124, 7 }, { 125, 7 }, { 126, 7 }, { 127, 7 }, { 0, 1 }
+};
+#endif // !CONFIG_ANS
+
+struct tokenize_b_args {
+ VP10_COMP *cpi;
+ ThreadData *td;
+ TOKENEXTRA **tp;
+};
+
+static void set_entropy_context_b(int plane, int block, int blk_row,
+ int blk_col, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, void *arg) {
+ struct tokenize_b_args *const args = arg;
+ ThreadData *const td = args->td;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblock_plane *p = &x->plane[plane];
+ struct macroblockd_plane *pd = &xd->plane[plane];
+ vp10_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0, blk_col,
+ blk_row);
+}
+
+static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree,
+#if CONFIG_ANS
+ const rans_dec_lut *token_cdf,
+#endif // CONFIG_ANS
+ int32_t extra, uint8_t token,
+ uint8_t skip_eob_node, unsigned int *counts) {
+ (*t)->token = token;
+ (*t)->extra = extra;
+ (*t)->context_tree = context_tree;
+#if CONFIG_ANS
+ (*t)->token_cdf = token_cdf;
+#endif // CONFIG_ANS
+ (*t)->skip_eob_node = skip_eob_node;
+ (*t)++;
+ ++counts[token];
+}
+
+static INLINE void add_token_no_extra(TOKENEXTRA **t,
+ const vpx_prob *context_tree,
+ uint8_t token, uint8_t skip_eob_node,
+ unsigned int *counts) {
+ (*t)->token = token;
+ (*t)->context_tree = context_tree;
+ (*t)->skip_eob_node = skip_eob_node;
+ (*t)++;
+ ++counts[token];
+}
+
+static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
+ TX_SIZE tx_size) {
+ const int eob_max = num_4x4_blocks_txsize_lookup[tx_size] << 4;
+ return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
+}
+
+void vp10_tokenize_palette_sb(struct ThreadData *const td, BLOCK_SIZE bsize,
+ int plane, TOKENEXTRA **t) {
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ uint8_t *color_map = xd->plane[plane != 0].color_index_map;
+ PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info;
+ int n = pmi->palette_size[plane != 0];
+ int i, j, k;
+ int color_new_idx = -1, color_ctx, color_order[PALETTE_MAX_SIZE];
+ const int rows = (4 * num_4x4_blocks_high_lookup[bsize]) >>
+ (xd->plane[plane != 0].subsampling_y);
+ const int cols = (4 * num_4x4_blocks_wide_lookup[bsize]) >>
+ (xd->plane[plane != 0].subsampling_x);
+ const vpx_prob (*const probs)[PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] =
+ plane == 0 ? vp10_default_palette_y_color_prob
+ : vp10_default_palette_uv_color_prob;
+
+ for (i = 0; i < rows; ++i) {
+ for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
+ color_ctx =
+ vp10_get_palette_color_context(color_map, cols, i, j, n, color_order);
+ for (k = 0; k < n; ++k)
+ if (color_map[i * cols + j] == color_order[k]) {
+ color_new_idx = k;
+ break;
+ }
+ assert(color_new_idx >= 0 && color_new_idx < n);
+ (*t)->token = color_new_idx;
+ (*t)->context_tree = probs[n - 2][color_ctx];
+ (*t)->skip_eob_node = 0;
+ ++(*t);
+ }
+ }
+}
+
+static void tokenize_b(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
+ struct tokenize_b_args *const args = arg;
+ VP10_COMP *cpi = args->cpi;
+ ThreadData *const td = args->td;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ TOKENEXTRA **tp = args->tp;
+ uint8_t token_cache[MAX_TX_SQUARE];
+ struct macroblock_plane *p = &x->plane[plane];
+ struct macroblockd_plane *pd = &xd->plane[plane];
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ int pt; /* near block/prev token context index */
+ int c;
+ TOKENEXTRA *t = *tp; /* store tokens starting here */
+ int eob = p->eobs[block];
+ const PLANE_TYPE type = pd->plane_type;
+ const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+#if CONFIG_SUPERTX
+ const int segment_id = VPXMIN(mbmi->segment_id, mbmi->segment_id_supertx);
+#else
+ const int segment_id = mbmi->segment_id;
+#endif // CONFIG_SUEPRTX
+ const int16_t *scan, *nb;
+ const TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
+ const scan_order *const so = get_scan(tx_size, tx_type, is_inter_block(mbmi));
+ const int ref = is_inter_block(mbmi);
+ unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
+ td->rd_counts.coef_counts[txsize_sqr_map[tx_size]][type][ref];
+#if CONFIG_ENTROPY
+ vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
+ cpi->subframe_stats.coef_probs_buf[cpi->common.coef_probs_update_idx]
+ [txsize_sqr_map[tx_size]][type][ref];
+#else
+ vpx_prob (*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
+ cpi->common.fc->coef_probs[txsize_sqr_map[tx_size]][type][ref];
+#endif // CONFIG_ENTROPY
+#if CONFIG_ANS
+ rans_dec_lut (*const coef_cdfs)[COEFF_CONTEXTS] =
+ cpi->common.fc->coef_cdfs[txsize_sqr_map[tx_size]][type][ref];
+#endif // CONFIG_ANS
+ unsigned int (*const eob_branch)[COEFF_CONTEXTS] =
+ td->counts->eob_branch[txsize_sqr_map[tx_size]][type][ref];
+ const uint8_t *const band = get_band_translate(tx_size);
+ const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size);
+ int skip_eob = 0;
+ int16_t token;
+ EXTRABIT extra;
+ pt = get_entropy_context(tx_size, pd->above_context + blk_col,
+ pd->left_context + blk_row);
+ scan = so->scan;
+ nb = so->neighbors;
+ c = 0;
+
+ while (c < eob) {
+ const int v = qcoeff[scan[c]];
+ eob_branch[band[c]][pt] += !skip_eob;
+
+ vp10_get_token_extra(v, &token, &extra);
+
+ add_token(&t, coef_probs[band[c]][pt],
+#if CONFIG_ANS
+ (const rans_dec_lut *)&coef_cdfs[band[c]][pt],
+#endif // CONFIG_ANS
+ extra, (uint8_t)token, (uint8_t)skip_eob, counts[band[c]][pt]);
+
+ token_cache[scan[c]] = vp10_pt_energy_class[token];
+ ++c;
+ pt = get_coef_context(nb, token_cache, c);
+ skip_eob = (token == ZERO_TOKEN);
+ }
+ if (c < seg_eob) {
+ add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN, 0,
+ counts[band[c]][pt]);
+ ++eob_branch[band[c]][pt];
+ }
+
+ *tp = t;
+
+ vp10_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, blk_col, blk_row);
+}
+
+struct is_skippable_args {
+ uint16_t *eobs;
+ int *skippable;
+};
+static void is_skippable(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *argv) {
+ struct is_skippable_args *args = argv;
+ (void)plane;
+ (void)plane_bsize;
+ (void)tx_size;
+ (void)blk_row;
+ (void)blk_col;
+ args->skippable[0] &= (!args->eobs[block]);
+}
+
+// TODO(yaowu): rewrite and optimize this function to remove the usage of
+// vp10_foreach_transform_block() and simplify is_skippable().
+int vp10_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
+ int result = 1;
+ struct is_skippable_args args = { x->plane[plane].eobs, &result };
+ vp10_foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane, is_skippable,
+ &args);
+ return result;
+}
+
+static void has_high_freq_coeff(int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+ void *argv) {
+ struct is_skippable_args *args = argv;
+ int eobs = (tx_size == TX_4X4) ? 3 : 10;
+ (void)plane;
+ (void)plane_bsize;
+ (void)blk_row;
+ (void)blk_col;
+
+ *(args->skippable) |= (args->eobs[block] > eobs);
+}
+
+int vp10_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
+ int result = 0;
+ struct is_skippable_args args = { x->plane[plane].eobs, &result };
+ vp10_foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane,
+ has_high_freq_coeff, &args);
+ return result;
+}
+
+#if CONFIG_VAR_TX
+void tokenize_tx(ThreadData *td, TOKENEXTRA **t, int dry_run, TX_SIZE tx_size,
+ BLOCK_SIZE plane_bsize, int blk_row, int blk_col, int block,
+ int plane, void *arg) {
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+ const int tx_row = blk_row >> (1 - pd->subsampling_y);
+ const int tx_col = blk_col >> (1 - pd->subsampling_x);
+ TX_SIZE plane_tx_size;
+
+ int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+ int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+ assert(tx_size < TX_SIZES);
+
+ if (xd->mb_to_bottom_edge < 0)
+ max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+ if (xd->mb_to_right_edge < 0)
+ max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+ if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
+
+ plane_tx_size = plane ? get_uv_tx_size_impl(
+ mbmi->inter_tx_size[tx_row][tx_col], bsize, 0, 0)
+ : mbmi->inter_tx_size[tx_row][tx_col];
+
+ if (tx_size == plane_tx_size) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
+ if (!dry_run)
+ tokenize_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
+ else
+ set_entropy_context_b(plane, block, blk_row, blk_col, plane_bsize,
+ tx_size, arg);
+ } else {
+ int bsl = b_width_log2_lookup[bsize];
+ int i;
+
+ assert(bsl > 0);
+ --bsl;
+
+ for (i = 0; i < 4; ++i) {
+ const int offsetr = blk_row + ((i >> 1) << bsl);
+ const int offsetc = blk_col + ((i & 0x01) << bsl);
+ int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
+
+ if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
+
+ tokenize_tx(td, t, dry_run, tx_size - 1, plane_bsize, offsetr, offsetc,
+ block + i * step, plane, arg);
+ }
+ }
+}
+
+void vp10_tokenize_sb_inter(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+ int dry_run, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ TOKENEXTRA *t_backup = *t;
+ const int ctx = vp10_get_skip_context(xd);
+ const int skip_inc =
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
+ struct tokenize_b_args arg = { cpi, td, t };
+ int plane;
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+ if (mbmi->skip) {
+ if (!dry_run) td->counts->skip[ctx][1] += skip_inc;
+ reset_skip_context(xd, bsize);
+ if (dry_run) *t = t_backup;
+ return;
+ }
+
+ if (!dry_run)
+ td->counts->skip[ctx][0] += skip_inc;
+ else
+ *t = t_backup;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+ const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
+ int bh = num_4x4_blocks_wide_lookup[txb_size];
+ int idx, idy;
+ int block = 0;
+ int step = num_4x4_blocks_txsize_lookup[max_tx_size];
+ for (idy = 0; idy < mi_height; idy += bh) {
+ for (idx = 0; idx < mi_width; idx += bh) {
+ tokenize_tx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx, block,
+ plane, &arg);
+ block += step;
+ }
+ }
+
+ if (!dry_run) {
+ (*t)->token = EOSB_TOKEN;
+ (*t)++;
+ }
+ }
+}
+#endif // CONFIG_VAR_TX
+
+void vp10_tokenize_sb(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+ int dry_run, BLOCK_SIZE bsize) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const int ctx = vp10_get_skip_context(xd);
+ const int skip_inc =
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
+ struct tokenize_b_args arg = { cpi, td, t };
+ if (mbmi->skip) {
+ if (!dry_run) td->counts->skip[ctx][1] += skip_inc;
+ reset_skip_context(xd, bsize);
+ return;
+ }
+
+ if (!dry_run) {
+ int plane;
+
+ td->counts->skip[ctx][0] += skip_inc;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_b,
+ &arg);
+ (*t)->token = EOSB_TOKEN;
+ (*t)++;
+ }
+ } else {
+ vp10_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg);
+ }
+}
+
+#if CONFIG_SUPERTX
+void vp10_tokenize_sb_supertx(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+ int dry_run, BLOCK_SIZE bsize) {
+ VP10_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &td->mb.e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ TOKENEXTRA *t_backup = *t;
+ const int ctx = vp10_get_skip_context(xd);
+ const int skip_inc =
+ !segfeature_active(&cm->seg, mbmi->segment_id_supertx, SEG_LVL_SKIP);
+ struct tokenize_b_args arg = { cpi, td, t };
+ if (mbmi->skip) {
+ if (!dry_run) td->counts->skip[ctx][1] += skip_inc;
+ reset_skip_context(xd, bsize);
+ if (dry_run) *t = t_backup;
+ return;
+ }
+
+ if (!dry_run) {
+ int plane;
+ td->counts->skip[ctx][0] += skip_inc;
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ vp10_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_b,
+ &arg);
+ (*t)->token = EOSB_TOKEN;
+ (*t)++;
+ }
+ } else {
+ vp10_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg);
+ *t = t_backup;
+ }
+}
+#endif // CONFIG_SUPERTX
diff --git a/av1/encoder/tokenize.h b/av1/encoder/tokenize.h
new file mode 100644
index 0000000..7ae8676
--- /dev/null
+++ b/av1/encoder/tokenize.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_TOKENIZE_H_
+#define VP10_ENCODER_TOKENIZE_H_
+
+#include "av1/common/entropy.h"
+
+#include "av1/encoder/block.h"
+#include "av1/encoder/treewriter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define EOSB_TOKEN 127 // Not signalled, encoder only
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef int32_t EXTRABIT;
+#else
+typedef int16_t EXTRABIT;
+#endif
+
+typedef struct {
+ int16_t token;
+ EXTRABIT extra;
+} TOKENVALUE;
+
+typedef struct {
+ const vpx_prob *context_tree;
+#if CONFIG_ANS
+ const rans_dec_lut *token_cdf;
+#endif // CONFIG_ANS
+ EXTRABIT extra;
+ uint8_t token;
+ uint8_t skip_eob_node;
+} TOKENEXTRA;
+
+extern const vpx_tree_index vp10_coef_tree[];
+extern const vpx_tree_index vp10_coef_con_tree[];
+#if !CONFIG_ANS
+extern const struct vp10_token vp10_coef_encodings[];
+#endif // !CONFIG_ANS
+
+int vp10_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
+int vp10_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
+
+struct VP10_COMP;
+struct ThreadData;
+
+#if CONFIG_VAR_TX
+void vp10_tokenize_sb_inter(struct VP10_COMP *cpi, struct ThreadData *td,
+ TOKENEXTRA **t, int dry_run, int mi_row, int mi_col,
+ BLOCK_SIZE bsize);
+#endif
+
+void vp10_tokenize_palette_sb(struct ThreadData *const td, BLOCK_SIZE bsize,
+ int plane, TOKENEXTRA **t);
+void vp10_tokenize_sb(struct VP10_COMP *cpi, struct ThreadData *td,
+ TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);
+#if CONFIG_SUPERTX
+void vp10_tokenize_sb_supertx(struct VP10_COMP *cpi, struct ThreadData *td,
+ TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);
+#endif
+
+extern const int16_t *vp10_dct_value_cost_ptr;
+/* TODO: The Token field should be broken out into a separate char array to
+ * improve cache locality, since it's needed for costing when the rest of the
+ * fields are not.
+ */
+extern const TOKENVALUE *vp10_dct_value_tokens_ptr;
+extern const TOKENVALUE *vp10_dct_cat_lt_10_value_tokens;
+extern const int *vp10_dct_cat_lt_10_value_cost;
+extern const int16_t vp10_cat6_low_cost[256];
+extern const int vp10_cat6_high_cost[64];
+extern const int vp10_cat6_high10_high_cost[256];
+extern const int vp10_cat6_high12_high_cost[1024];
+static INLINE int vp10_get_cost(int16_t token, EXTRABIT extrabits,
+ const int *cat6_high_table) {
+ if (token != CATEGORY6_TOKEN)
+ return vp10_extra_bits[token].cost[extrabits >> 1];
+ return vp10_cat6_low_cost[(extrabits >> 1) & 0xff] +
+ cat6_high_table[extrabits >> 9];
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static INLINE const int *vp10_get_high_cost_table(int bit_depth) {
+ return bit_depth == 8 ? vp10_cat6_high_cost
+ : (bit_depth == 10 ? vp10_cat6_high10_high_cost
+ : vp10_cat6_high12_high_cost);
+}
+#else
+static INLINE const int *vp10_get_high_cost_table(int bit_depth) {
+ (void)bit_depth;
+ return vp10_cat6_high_cost;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static INLINE void vp10_get_token_extra(int v, int16_t *token,
+ EXTRABIT *extra) {
+ if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) {
+ *token = CATEGORY6_TOKEN;
+ if (v >= CAT6_MIN_VAL)
+ *extra = 2 * v - 2 * CAT6_MIN_VAL;
+ else
+ *extra = -2 * v - 2 * CAT6_MIN_VAL + 1;
+ return;
+ }
+ *token = vp10_dct_cat_lt_10_value_tokens[v].token;
+ *extra = vp10_dct_cat_lt_10_value_tokens[v].extra;
+}
+static INLINE int16_t vp10_get_token(int v) {
+ if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) return 10;
+ return vp10_dct_cat_lt_10_value_tokens[v].token;
+}
+
+static INLINE int vp10_get_token_cost(int v, int16_t *token,
+ const int *cat6_high_table) {
+ if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) {
+ EXTRABIT extrabits;
+ *token = CATEGORY6_TOKEN;
+ extrabits = abs(v) - CAT6_MIN_VAL;
+ return vp10_cat6_low_cost[extrabits & 0xff] +
+ cat6_high_table[extrabits >> 8];
+ }
+ *token = vp10_dct_cat_lt_10_value_tokens[v].token;
+ return vp10_dct_cat_lt_10_value_cost[v];
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_TOKENIZE_H_
diff --git a/av1/encoder/treewriter.c b/av1/encoder/treewriter.c
new file mode 100644
index 0000000..d3fcd45
--- /dev/null
+++ b/av1/encoder/treewriter.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/encoder/treewriter.h"
+
+static void tree2tok(struct vp10_token *tokens, const vpx_tree_index *tree,
+ int i, int v, int l) {
+ v += v;
+ ++l;
+
+ do {
+ const vpx_tree_index j = tree[i++];
+ if (j <= 0) {
+ tokens[-j].value = v;
+ tokens[-j].len = l;
+ } else {
+ tree2tok(tokens, tree, j, v, l);
+ }
+ } while (++v & 1);
+}
+
+void vp10_tokens_from_tree(struct vp10_token *tokens,
+ const vpx_tree_index *tree) {
+ tree2tok(tokens, tree, 0, 0, 0);
+}
+
+static unsigned int convert_distribution(unsigned int i, vpx_tree tree,
+ unsigned int branch_ct[][2],
+ const unsigned int num_events[]) {
+ unsigned int left, right;
+
+ if (tree[i] <= 0)
+ left = num_events[-tree[i]];
+ else
+ left = convert_distribution(tree[i], tree, branch_ct, num_events);
+
+ if (tree[i + 1] <= 0)
+ right = num_events[-tree[i + 1]];
+ else
+ right = convert_distribution(tree[i + 1], tree, branch_ct, num_events);
+
+ branch_ct[i >> 1][0] = left;
+ branch_ct[i >> 1][1] = right;
+ return left + right;
+}
+
+void vp10_tree_probs_from_distribution(vpx_tree tree,
+ unsigned int branch_ct[/* n-1 */][2],
+ const unsigned int num_events[/* n */]) {
+ convert_distribution(0, tree, branch_ct, num_events);
+}
diff --git a/av1/encoder/treewriter.h b/av1/encoder/treewriter.h
new file mode 100644
index 0000000..43c615f
--- /dev/null
+++ b/av1/encoder/treewriter.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_TREEWRITER_H_
+#define VP10_ENCODER_TREEWRITER_H_
+
+#ifdef VP10_FORCE_VPXBOOL_TREEWRITER
+#include "aom_dsp/bitwriter.h"
+#define tree_writer vpx_writer
+#define tree_bit_write vpx_write
+#else
+#include "av1/encoder/bitwriter.h"
+#define tree_writer vp10_writer
+#define tree_bit_write vp10_write
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp10_tree_probs_from_distribution(vpx_tree tree,
+ unsigned int branch_ct[/* n - 1 */][2],
+ const unsigned int num_events[/* n */]);
+
+struct vp10_token {
+ int value;
+ int len;
+};
+
+void vp10_tokens_from_tree(struct vp10_token *, const vpx_tree_index *);
+
+static INLINE void vp10_write_tree(tree_writer *w, const vpx_tree_index *tree,
+ const vpx_prob *probs, int bits, int len,
+ vpx_tree_index i) {
+ do {
+ const int bit = (bits >> --len) & 1;
+ tree_bit_write(w, bit, probs[i >> 1]);
+ i = tree[i + bit];
+ } while (len);
+}
+
+static INLINE void vp10_write_token(tree_writer *w, const vpx_tree_index *tree,
+ const vpx_prob *probs,
+ const struct vp10_token *token) {
+ vp10_write_tree(w, tree, probs, token->value, token->len, 0);
+}
+
+#undef tree_writer
+#undef tree_bit_write
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP10_ENCODER_TREEWRITER_H_
diff --git a/av1/encoder/variance_tree.c b/av1/encoder/variance_tree.c
new file mode 100644
index 0000000..219d39a
--- /dev/null
+++ b/av1/encoder/variance_tree.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "av1/encoder/variance_tree.h"
+#include "av1/encoder/encoder.h"
+
+void vp10_setup_var_tree(struct VP10Common *cm, ThreadData *td) {
+ int i, j;
+#if CONFIG_EXT_PARTITION
+ const int leaf_nodes = 1024;
+ const int tree_nodes = 1024 + 256 + 64 + 16 + 4 + 1;
+#else
+ const int leaf_nodes = 256;
+ const int tree_nodes = 256 + 64 + 16 + 4 + 1;
+#endif // CONFIG_EXT_PARTITION
+ int index = 0;
+ VAR_TREE *this_var;
+ int nodes;
+
+ vpx_free(td->var_tree);
+ CHECK_MEM_ERROR(cm, td->var_tree,
+ vpx_calloc(tree_nodes, sizeof(*td->var_tree)));
+
+ this_var = &td->var_tree[0];
+
+ // Sets up all the leaf nodes in the tree.
+ for (index = 0; index < leaf_nodes; ++index) {
+ VAR_TREE *const leaf = &td->var_tree[index];
+ leaf->split[0] = NULL;
+ }
+
+ // Each node has 4 leaf nodes, fill in the child pointers
+ // from leafs to the root.
+ for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) {
+ for (i = 0; i < nodes; ++i, ++index) {
+ VAR_TREE *const node = &td->var_tree[index];
+ for (j = 0; j < 4; j++) node->split[j] = this_var++;
+ }
+ }
+
+ // Set up the root node for the largest superblock size
+ i = MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2;
+ td->var_root[i] = &td->var_tree[tree_nodes - 1];
+ // Set up the root nodes for the rest of the possible superblock sizes
+ while (--i >= 0) {
+ td->var_root[i] = td->var_root[i + 1]->split[0];
+ }
+}
+
+void vp10_free_var_tree(ThreadData *td) {
+ vpx_free(td->var_tree);
+ td->var_tree = NULL;
+}
diff --git a/av1/encoder/variance_tree.h b/av1/encoder/variance_tree.h
new file mode 100644
index 0000000..08c40d3
--- /dev/null
+++ b/av1/encoder/variance_tree.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP10_ENCODER_VARIANCE_TREE_H_
+#define VP10_ENCODER_VARIANCE_TREE_H_
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+
+#include "aom/vpx_integer.h"
+
+#include "av1/common/enums.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP10Common;
+struct ThreadData;
+
+typedef struct {
+ int64_t sum_square_error;
+ int64_t sum_error;
+ int log2_count;
+ int variance;
+} var;
+
+typedef struct {
+ var none;
+ var horz[2];
+ var vert[2];
+} partition_variance;
+
+typedef struct VAR_TREE {
+ int force_split;
+ partition_variance variances;
+ struct VAR_TREE *split[4];
+ BLOCK_SIZE bsize;
+ const uint8_t *src;
+ const uint8_t *ref;
+ int src_stride;
+ int ref_stride;
+ int width;
+ int height;
+#if CONFIG_VP9_HIGHBITDEPTH
+ int highbd;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+} VAR_TREE;
+
+void vp10_setup_var_tree(struct VP10Common *cm, struct ThreadData *td);
+void vp10_free_var_tree(struct ThreadData *td);
+
+// Set variance values given sum square error, sum error, count.
+static INLINE void fill_variance(int64_t s2, int64_t s, int c, var *v) {
+ v->sum_square_error = s2;
+ v->sum_error = s;
+ v->log2_count = c;
+ v->variance =
+ (int)(256 * (v->sum_square_error -
+ ((v->sum_error * v->sum_error) >> v->log2_count)) >>
+ v->log2_count);
+}
+
+static INLINE void sum_2_variances(const var *a, const var *b, var *r) {
+ assert(a->log2_count == b->log2_count);
+ fill_variance(a->sum_square_error + b->sum_square_error,
+ a->sum_error + b->sum_error, a->log2_count + 1, r);
+}
+
+static INLINE void fill_variance_node(VAR_TREE *vt) {
+ sum_2_variances(&vt->split[0]->variances.none, &vt->split[1]->variances.none,
+ &vt->variances.horz[0]);
+ sum_2_variances(&vt->split[2]->variances.none, &vt->split[3]->variances.none,
+ &vt->variances.horz[1]);
+ sum_2_variances(&vt->split[0]->variances.none, &vt->split[2]->variances.none,
+ &vt->variances.vert[0]);
+ sum_2_variances(&vt->split[1]->variances.none, &vt->split[3]->variances.none,
+ &vt->variances.vert[1]);
+ sum_2_variances(&vt->variances.vert[0], &vt->variances.vert[1],
+ &vt->variances.none);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* VP10_ENCODER_VARIANCE_TREE_H_ */
diff --git a/av1/encoder/wedge_utils.c b/av1/encoder/wedge_utils.c
new file mode 100644
index 0000000..548bc48
--- /dev/null
+++ b/av1/encoder/wedge_utils.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "aom/vpx_integer.h"
+
+#include "aom_ports/mem.h"
+
+#include "aom_dsp/vpx_dsp_common.h"
+
+#include "av1/common/reconinter.h"
+
+#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
+
+/**
+ * Computes SSE of a compound predictor constructed from 2 fundamental
+ * predictors p0 and p1 using blending with mask.
+ *
+ * r1: Residuals of p1.
+ * (source - p1)
+ * d: Difference of p1 and p0.
+ * (p1 - p0)
+ * m: The blending mask
+ * N: Number of pixels
+ *
+ * 'r1', 'd', and 'm' are contiguous.
+ *
+ * Computes:
+ * Sum((MAX_MASK_VALUE*r1 + mask*d)**2), which is equivalent to:
+ * Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2),
+ * where r0 is (source - p0), and r1 is (source - p1), which is in turn
+ * is equivalent to:
+ * Sum((source*MAX_MASK_VALUE - (mask*p0 + (MAX_MASK_VALUE-mask)*p1))**2),
+ * which is the SSE of the residuals of the compound predictor scaled up by
+ * MAX_MASK_VALUE**2.
+ *
+ * Note that we clamp the partial term in the loop to 16 bits signed. This is
+ * to facilitate equivalent SIMD implementation. It should have no effect if
+ * residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always
+ * holds for 8 bit input, and on real input, it should hold practically always,
+ * as residuals are expected to be small.
+ */
+uint64_t vp10_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d,
+ const uint8_t *m, int N) {
+ uint64_t csse = 0;
+ int i;
+ assert(N % 64 == 0);
+ for (i = 0; i < N; i++) {
+ int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i];
+ t = clamp(t, INT16_MIN, INT16_MAX);
+ csse += t * t;
+ }
+ return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
+}
+
+/**
+ * Choose the mask sign for a compound predictor.
+ *
+ * ds: Difference of the squares of the residuals.
+ * r0**2 - r1**2
+ * m: The blending mask
+ * N: Number of pixels
+ * limit: Pre-computed threshold value.
+ * MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
+ *
+ * 'ds' and 'm' are contiguous.
+ *
+ * Returns true if the negated mask has lower SSE compared to the positive
+ * mask. Computation is based on:
+ * Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2)
+ * >
+ * Sum(((MAX_MASK_VALUE-mask)*r0 + mask*r1)**2)
+ *
+ * which can be simplified to:
+ *
+ * Sum(mask*(r0**2 - r1**2)) > MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
+ *
+ * The right hand side does not depend on the mask, and needs to be passed as
+ * the 'limit' parameter.
+ *
+ * After pre-computing (r0**2 - r1**2), which is passed in as 'ds', the left
+ * hand side is simply a scalar product between an int16_t and uint8_t vector.
+ *
+ * Note that for efficiency, ds is stored on 16 bits. Real input residuals
+ * being small, this should not cause a noticeable issue.
+ */
+int vp10_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m, int N,
+ int64_t limit) {
+ int64_t acc = 0;
+
+ assert(N % 64 == 0);
+
+ do {
+ acc += *ds++ * *m++;
+ } while (--N);
+
+ return acc > limit;
+}
+
+/**
+ * Compute the element-wise difference of the squares of 2 arrays.
+ *
+ * d: Difference of the squares of the inputs: a**2 - b**2
+ * a: First input array
+ * b: Second input array
+ * N: Number of elements
+ *
+ * 'd', 'a', and 'b' are contiguous.
+ *
+ * The result is saturated to signed 16 bits.
+ */
+void vp10_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a,
+ const int16_t *b, int N) {
+ int i;
+
+ assert(N % 64 == 0);
+
+ for (i = 0; i < N; i++)
+ d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX);
+}
diff --git a/av1/encoder/x86/dct_intrin_sse2.c b/av1/encoder/x86/dct_intrin_sse2.c
new file mode 100644
index 0000000..6fe3ada
--- /dev/null
+++ b/av1/encoder/x86/dct_intrin_sse2.c
@@ -0,0 +1,2583 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <emmintrin.h> // SSE2
+
+#include "./vp10_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_dsp/x86/fwd_txfm_sse2.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+#include "aom_ports/mem.h"
+
+static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in,
+ int stride, int flipud, int fliplr) {
+ const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1);
+ const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
+ __m128i mask;
+
+ if (!flipud) {
+ in[0] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+ in[1] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+ in[2] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
+ in[3] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
+ } else {
+ in[0] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
+ in[1] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
+ in[2] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+ in[3] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+ }
+
+ if (fliplr) {
+ in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
+ in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
+ in[2] = _mm_shufflelo_epi16(in[2], 0x1b);
+ in[3] = _mm_shufflelo_epi16(in[3], 0x1b);
+ }
+
+ in[0] = _mm_slli_epi16(in[0], 4);
+ in[1] = _mm_slli_epi16(in[1], 4);
+ in[2] = _mm_slli_epi16(in[2], 4);
+ in[3] = _mm_slli_epi16(in[3], 4);
+
+ mask = _mm_cmpeq_epi16(in[0], k__nonzero_bias_a);
+ in[0] = _mm_add_epi16(in[0], mask);
+ in[0] = _mm_add_epi16(in[0], k__nonzero_bias_b);
+}
+
+static INLINE void write_buffer_4x4(tran_low_t *output, __m128i *res) {
+ const __m128i kOne = _mm_set1_epi16(1);
+ __m128i in01 = _mm_unpacklo_epi64(res[0], res[1]);
+ __m128i in23 = _mm_unpacklo_epi64(res[2], res[3]);
+ __m128i out01 = _mm_add_epi16(in01, kOne);
+ __m128i out23 = _mm_add_epi16(in23, kOne);
+ out01 = _mm_srai_epi16(out01, 2);
+ out23 = _mm_srai_epi16(out23, 2);
+ store_output(&out01, (output + 0 * 8));
+ store_output(&out23, (output + 1 * 8));
+}
+
+static INLINE void transpose_4x4(__m128i *res) {
+ // Combine and transpose
+ // 00 01 02 03 20 21 22 23
+ // 10 11 12 13 30 31 32 33
+ const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]);
+ const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]);
+
+ // 00 10 01 11 02 12 03 13
+ // 20 30 21 31 22 32 23 33
+ res[0] = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ res[2] = _mm_unpackhi_epi32(tr0_0, tr0_1);
+
+ // 00 10 20 30 01 11 21 31
+ // 02 12 22 32 03 13 23 33
+ // only use the first 4 16-bit integers
+ res[1] = _mm_unpackhi_epi64(res[0], res[0]);
+ res[3] = _mm_unpackhi_epi64(res[2], res[2]);
+}
+
+static void fdct4_sse2(__m128i *in) {
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+
+ __m128i u[4], v[4];
+ u[0] = _mm_unpacklo_epi16(in[0], in[1]);
+ u[1] = _mm_unpacklo_epi16(in[3], in[2]);
+
+ v[0] = _mm_add_epi16(u[0], u[1]);
+ v[1] = _mm_sub_epi16(u[0], u[1]);
+
+ u[0] = _mm_madd_epi16(v[0], k__cospi_p16_p16); // 0
+ u[1] = _mm_madd_epi16(v[0], k__cospi_p16_m16); // 2
+ u[2] = _mm_madd_epi16(v[1], k__cospi_p08_p24); // 1
+ u[3] = _mm_madd_epi16(v[1], k__cospi_p24_m08); // 3
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+
+ in[0] = _mm_packs_epi32(u[0], u[1]);
+ in[1] = _mm_packs_epi32(u[2], u[3]);
+ transpose_4x4(in);
+}
+
+static void fadst4_sse2(__m128i *in) {
+ const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9);
+ const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9);
+ const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9);
+ const __m128i k__sinpi_m03_p02 = pair_set_epi16(-sinpi_3_9, sinpi_2_9);
+ const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9);
+ const __m128i kZero = _mm_set1_epi16(0);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ __m128i u[8], v[8];
+ __m128i in7 = _mm_add_epi16(in[0], in[1]);
+
+ u[0] = _mm_unpacklo_epi16(in[0], in[1]);
+ u[1] = _mm_unpacklo_epi16(in[2], in[3]);
+ u[2] = _mm_unpacklo_epi16(in7, kZero);
+ u[3] = _mm_unpacklo_epi16(in[2], kZero);
+ u[4] = _mm_unpacklo_epi16(in[3], kZero);
+
+ v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p02); // s0 + s2
+ v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p04); // s4 + s5
+ v[2] = _mm_madd_epi16(u[2], k__sinpi_p03_p03); // x1
+ v[3] = _mm_madd_epi16(u[0], k__sinpi_p04_m01); // s1 - s3
+ v[4] = _mm_madd_epi16(u[1], k__sinpi_m03_p02); // -s4 + s6
+ v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03); // s4
+ v[6] = _mm_madd_epi16(u[4], k__sinpi_p03_p03);
+
+ u[0] = _mm_add_epi32(v[0], v[1]);
+ u[1] = _mm_sub_epi32(v[2], v[6]);
+ u[2] = _mm_add_epi32(v[3], v[4]);
+ u[3] = _mm_sub_epi32(u[2], u[0]);
+ u[4] = _mm_slli_epi32(v[5], 2);
+ u[5] = _mm_sub_epi32(u[4], v[5]);
+ u[6] = _mm_add_epi32(u[3], u[5]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+
+ in[0] = _mm_packs_epi32(u[0], u[2]);
+ in[1] = _mm_packs_epi32(u[1], u[3]);
+ transpose_4x4(in);
+}
+
+#if CONFIG_EXT_TX
+static void fidtx4_sse2(__m128i *in) {
+ const __m128i k__zero_epi16 = _mm_set1_epi16((int16_t)0);
+ const __m128i k__sqrt2_epi16 = _mm_set1_epi16((int16_t)Sqrt2);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+
+ __m128i v0, v1, v2, v3;
+ __m128i u0, u1, u2, u3;
+
+ v0 = _mm_unpacklo_epi16(in[0], k__zero_epi16);
+ v1 = _mm_unpacklo_epi16(in[1], k__zero_epi16);
+ v2 = _mm_unpacklo_epi16(in[2], k__zero_epi16);
+ v3 = _mm_unpacklo_epi16(in[3], k__zero_epi16);
+
+ u0 = _mm_madd_epi16(v0, k__sqrt2_epi16);
+ u1 = _mm_madd_epi16(v1, k__sqrt2_epi16);
+ u2 = _mm_madd_epi16(v2, k__sqrt2_epi16);
+ u3 = _mm_madd_epi16(v3, k__sqrt2_epi16);
+
+ v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+
+ u0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ u1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ u2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ u3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+
+ in[0] = _mm_packs_epi32(u0, u2);
+ in[1] = _mm_packs_epi32(u1, u3);
+ transpose_4x4(in);
+}
+#endif // CONFIG_EXT_TX
+
+void vp10_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ __m128i in[4];
+
+ switch (tx_type) {
+ case DCT_DCT: vpx_fdct4x4_sse2(input, output, stride); break;
+ case ADST_DCT:
+ load_buffer_4x4(input, in, stride, 0, 0);
+ fadst4_sse2(in);
+ fdct4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case DCT_ADST:
+ load_buffer_4x4(input, in, stride, 0, 0);
+ fdct4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case ADST_ADST:
+ load_buffer_4x4(input, in, stride, 0, 0);
+ fadst4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ load_buffer_4x4(input, in, stride, 1, 0);
+ fadst4_sse2(in);
+ fdct4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case DCT_FLIPADST:
+ load_buffer_4x4(input, in, stride, 0, 1);
+ fdct4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case FLIPADST_FLIPADST:
+ load_buffer_4x4(input, in, stride, 1, 1);
+ fadst4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case ADST_FLIPADST:
+ load_buffer_4x4(input, in, stride, 0, 1);
+ fadst4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case FLIPADST_ADST:
+ load_buffer_4x4(input, in, stride, 1, 0);
+ fadst4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case V_DCT:
+ load_buffer_4x4(input, in, stride, 0, 0);
+ fdct4_sse2(in);
+ fidtx4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case H_DCT:
+ load_buffer_4x4(input, in, stride, 0, 0);
+ fidtx4_sse2(in);
+ fdct4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case V_ADST:
+ load_buffer_4x4(input, in, stride, 0, 0);
+ fadst4_sse2(in);
+ fidtx4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case H_ADST:
+ load_buffer_4x4(input, in, stride, 0, 0);
+ fidtx4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case V_FLIPADST:
+ load_buffer_4x4(input, in, stride, 1, 0);
+ fadst4_sse2(in);
+ fidtx4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+ case H_FLIPADST:
+ load_buffer_4x4(input, in, stride, 0, 1);
+ fidtx4_sse2(in);
+ fadst4_sse2(in);
+ write_buffer_4x4(output, in);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0);
+ }
+}
+
+void vp10_fdct8x8_quant_sse2(
+ const int16_t *input, int stride, int16_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+ __m128i zero;
+ int pass;
+ // Constants
+ // When we use them, in one case, they are all the same. In all others
+ // it's a pair of them that we need to repeat four times. This is done
+ // by constructing the 32 bit constant corresponding to that pair.
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
+ const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ // Load input
+ __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ __m128i *in[8];
+ int index = 0;
+
+ (void)scan_ptr;
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)coeff_ptr;
+
+ // Pre-condition input (shift by two)
+ in0 = _mm_slli_epi16(in0, 2);
+ in1 = _mm_slli_epi16(in1, 2);
+ in2 = _mm_slli_epi16(in2, 2);
+ in3 = _mm_slli_epi16(in3, 2);
+ in4 = _mm_slli_epi16(in4, 2);
+ in5 = _mm_slli_epi16(in5, 2);
+ in6 = _mm_slli_epi16(in6, 2);
+ in7 = _mm_slli_epi16(in7, 2);
+
+ in[0] = &in0;
+ in[1] = &in1;
+ in[2] = &in2;
+ in[3] = &in3;
+ in[4] = &in4;
+ in[5] = &in5;
+ in[6] = &in6;
+ in[7] = &in7;
+
+ // We do two passes, first the columns, then the rows. The results of the
+ // first pass are transposed so that the same column code can be reused. The
+ // results of the second pass are also transposed so that the rows (processed
+ // as columns) are put back in row positions.
+ for (pass = 0; pass < 2; pass++) {
+ // To store results of each pass before the transpose.
+ __m128i res0, res1, res2, res3, res4, res5, res6, res7;
+ // Add/subtract
+ const __m128i q0 = _mm_add_epi16(in0, in7);
+ const __m128i q1 = _mm_add_epi16(in1, in6);
+ const __m128i q2 = _mm_add_epi16(in2, in5);
+ const __m128i q3 = _mm_add_epi16(in3, in4);
+ const __m128i q4 = _mm_sub_epi16(in3, in4);
+ const __m128i q5 = _mm_sub_epi16(in2, in5);
+ const __m128i q6 = _mm_sub_epi16(in1, in6);
+ const __m128i q7 = _mm_sub_epi16(in0, in7);
+ // Work on first four results
+ {
+ // Add/subtract
+ const __m128i r0 = _mm_add_epi16(q0, q3);
+ const __m128i r1 = _mm_add_epi16(q1, q2);
+ const __m128i r2 = _mm_sub_epi16(q1, q2);
+ const __m128i r3 = _mm_sub_epi16(q0, q3);
+ // Interleave to do the multiply by constants which gets us into 32bits
+ const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
+ const __m128i t1 = _mm_unpackhi_epi16(r0, r1);
+ const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
+ const __m128i t3 = _mm_unpackhi_epi16(r2, r3);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16);
+ const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08);
+ const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08);
+ const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24);
+ const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ // Combine
+ res0 = _mm_packs_epi32(w0, w1);
+ res4 = _mm_packs_epi32(w2, w3);
+ res2 = _mm_packs_epi32(w4, w5);
+ res6 = _mm_packs_epi32(w6, w7);
+ }
+ // Work on next four results
+ {
+ // Interleave to do the multiply by constants which gets us into 32bits
+ const __m128i d0 = _mm_unpacklo_epi16(q6, q5);
+ const __m128i d1 = _mm_unpackhi_epi16(q6, q5);
+ const __m128i e0 = _mm_madd_epi16(d0, k__cospi_p16_m16);
+ const __m128i e1 = _mm_madd_epi16(d1, k__cospi_p16_m16);
+ const __m128i e2 = _mm_madd_epi16(d0, k__cospi_p16_p16);
+ const __m128i e3 = _mm_madd_epi16(d1, k__cospi_p16_p16);
+ // dct_const_round_shift
+ const __m128i f0 = _mm_add_epi32(e0, k__DCT_CONST_ROUNDING);
+ const __m128i f1 = _mm_add_epi32(e1, k__DCT_CONST_ROUNDING);
+ const __m128i f2 = _mm_add_epi32(e2, k__DCT_CONST_ROUNDING);
+ const __m128i f3 = _mm_add_epi32(e3, k__DCT_CONST_ROUNDING);
+ const __m128i s0 = _mm_srai_epi32(f0, DCT_CONST_BITS);
+ const __m128i s1 = _mm_srai_epi32(f1, DCT_CONST_BITS);
+ const __m128i s2 = _mm_srai_epi32(f2, DCT_CONST_BITS);
+ const __m128i s3 = _mm_srai_epi32(f3, DCT_CONST_BITS);
+ // Combine
+ const __m128i r0 = _mm_packs_epi32(s0, s1);
+ const __m128i r1 = _mm_packs_epi32(s2, s3);
+ // Add/subtract
+ const __m128i x0 = _mm_add_epi16(q4, r0);
+ const __m128i x1 = _mm_sub_epi16(q4, r0);
+ const __m128i x2 = _mm_sub_epi16(q7, r1);
+ const __m128i x3 = _mm_add_epi16(q7, r1);
+ // Interleave to do the multiply by constants which gets us into 32bits
+ const __m128i t0 = _mm_unpacklo_epi16(x0, x3);
+ const __m128i t1 = _mm_unpackhi_epi16(x0, x3);
+ const __m128i t2 = _mm_unpacklo_epi16(x1, x2);
+ const __m128i t3 = _mm_unpackhi_epi16(x1, x2);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28);
+ const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20);
+ const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20);
+ const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12);
+ const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ // Combine
+ res1 = _mm_packs_epi32(w0, w1);
+ res7 = _mm_packs_epi32(w2, w3);
+ res5 = _mm_packs_epi32(w4, w5);
+ res3 = _mm_packs_epi32(w6, w7);
+ }
+ // Transpose the 8x8.
+ {
+ // 00 01 02 03 04 05 06 07
+ // 10 11 12 13 14 15 16 17
+ // 20 21 22 23 24 25 26 27
+ // 30 31 32 33 34 35 36 37
+ // 40 41 42 43 44 45 46 47
+ // 50 51 52 53 54 55 56 57
+ // 60 61 62 63 64 65 66 67
+ // 70 71 72 73 74 75 76 77
+ const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3);
+ const __m128i tr0_2 = _mm_unpackhi_epi16(res0, res1);
+ const __m128i tr0_3 = _mm_unpackhi_epi16(res2, res3);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(res6, res7);
+ const __m128i tr0_6 = _mm_unpackhi_epi16(res4, res5);
+ const __m128i tr0_7 = _mm_unpackhi_epi16(res6, res7);
+ // 00 10 01 11 02 12 03 13
+ // 20 30 21 31 22 32 23 33
+ // 04 14 05 15 06 16 07 17
+ // 24 34 25 35 26 36 27 37
+ // 40 50 41 51 42 52 43 53
+ // 60 70 61 71 62 72 63 73
+ // 54 54 55 55 56 56 57 57
+ // 64 74 65 75 66 76 67 77
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+ // 00 10 20 30 01 11 21 31
+ // 40 50 60 70 41 51 61 71
+ // 02 12 22 32 03 13 23 33
+ // 42 52 62 72 43 53 63 73
+ // 04 14 24 34 05 15 21 36
+ // 44 54 64 74 45 55 61 76
+ // 06 16 26 36 07 17 27 37
+ // 46 56 66 76 47 57 67 77
+ in0 = _mm_unpacklo_epi64(tr1_0, tr1_4);
+ in1 = _mm_unpackhi_epi64(tr1_0, tr1_4);
+ in2 = _mm_unpacklo_epi64(tr1_2, tr1_6);
+ in3 = _mm_unpackhi_epi64(tr1_2, tr1_6);
+ in4 = _mm_unpacklo_epi64(tr1_1, tr1_5);
+ in5 = _mm_unpackhi_epi64(tr1_1, tr1_5);
+ in6 = _mm_unpacklo_epi64(tr1_3, tr1_7);
+ in7 = _mm_unpackhi_epi64(tr1_3, tr1_7);
+ // 00 10 20 30 40 50 60 70
+ // 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72
+ // 03 13 23 33 43 53 63 73
+ // 04 14 24 34 44 54 64 74
+ // 05 15 25 35 45 55 65 75
+ // 06 16 26 36 46 56 66 76
+ // 07 17 27 37 47 57 67 77
+ }
+ }
+ // Post-condition output and store it
+ {
+ // Post-condition (division by two)
+ // division of two 16 bits signed numbers using shifts
+ // n / 2 = (n - (n >> 15)) >> 1
+ const __m128i sign_in0 = _mm_srai_epi16(in0, 15);
+ const __m128i sign_in1 = _mm_srai_epi16(in1, 15);
+ const __m128i sign_in2 = _mm_srai_epi16(in2, 15);
+ const __m128i sign_in3 = _mm_srai_epi16(in3, 15);
+ const __m128i sign_in4 = _mm_srai_epi16(in4, 15);
+ const __m128i sign_in5 = _mm_srai_epi16(in5, 15);
+ const __m128i sign_in6 = _mm_srai_epi16(in6, 15);
+ const __m128i sign_in7 = _mm_srai_epi16(in7, 15);
+ in0 = _mm_sub_epi16(in0, sign_in0);
+ in1 = _mm_sub_epi16(in1, sign_in1);
+ in2 = _mm_sub_epi16(in2, sign_in2);
+ in3 = _mm_sub_epi16(in3, sign_in3);
+ in4 = _mm_sub_epi16(in4, sign_in4);
+ in5 = _mm_sub_epi16(in5, sign_in5);
+ in6 = _mm_sub_epi16(in6, sign_in6);
+ in7 = _mm_sub_epi16(in7, sign_in7);
+ in0 = _mm_srai_epi16(in0, 1);
+ in1 = _mm_srai_epi16(in1, 1);
+ in2 = _mm_srai_epi16(in2, 1);
+ in3 = _mm_srai_epi16(in3, 1);
+ in4 = _mm_srai_epi16(in4, 1);
+ in5 = _mm_srai_epi16(in5, 1);
+ in6 = _mm_srai_epi16(in6, 1);
+ in7 = _mm_srai_epi16(in7, 1);
+ }
+
+ iscan_ptr += n_coeffs;
+ qcoeff_ptr += n_coeffs;
+ dqcoeff_ptr += n_coeffs;
+ n_coeffs = -n_coeffs;
+ zero = _mm_setzero_si128();
+
+ if (!skip_block) {
+ __m128i eob;
+ __m128i round, quant, dequant;
+ {
+ __m128i coeff0, coeff1;
+
+ // Setup global values
+ {
+ round = _mm_load_si128((const __m128i *)round_ptr);
+ quant = _mm_load_si128((const __m128i *)quant_ptr);
+ dequant = _mm_load_si128((const __m128i *)dequant_ptr);
+ }
+
+ {
+ __m128i coeff0_sign, coeff1_sign;
+ __m128i qcoeff0, qcoeff1;
+ __m128i qtmp0, qtmp1;
+ // Do DC and first 15 AC
+ coeff0 = *in[0];
+ coeff1 = *in[1];
+
+ // Poor man's sign extract
+ coeff0_sign = _mm_srai_epi16(coeff0, 15);
+ coeff1_sign = _mm_srai_epi16(coeff1, 15);
+ qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+ round = _mm_unpackhi_epi64(round, round);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+ qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+ quant = _mm_unpackhi_epi64(quant, quant);
+ qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+ coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+ dequant = _mm_unpackhi_epi64(dequant, dequant);
+ coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ }
+
+ {
+ // Scan for eob
+ __m128i zero_coeff0, zero_coeff1;
+ __m128i nzero_coeff0, nzero_coeff1;
+ __m128i iscan0, iscan1;
+ __m128i eob1;
+ zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+ zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+ nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+ nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ // Add one to convert from indices to counts
+ iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+ iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+ eob = _mm_and_si128(iscan0, nzero_coeff0);
+ eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+ eob = _mm_max_epi16(eob, eob1);
+ }
+ n_coeffs += 8 * 2;
+ }
+
+ // AC only loop
+ index = 2;
+ while (n_coeffs < 0) {
+ __m128i coeff0, coeff1;
+ {
+ __m128i coeff0_sign, coeff1_sign;
+ __m128i qcoeff0, qcoeff1;
+ __m128i qtmp0, qtmp1;
+
+ assert(index < (int)(sizeof(in) / sizeof(in[0])) - 1);
+ coeff0 = *in[index];
+ coeff1 = *in[index + 1];
+
+ // Poor man's sign extract
+ coeff0_sign = _mm_srai_epi16(coeff0, 15);
+ coeff1_sign = _mm_srai_epi16(coeff1, 15);
+ qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+ qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+ qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+ coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+ coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ }
+
+ {
+ // Scan for eob
+ __m128i zero_coeff0, zero_coeff1;
+ __m128i nzero_coeff0, nzero_coeff1;
+ __m128i iscan0, iscan1;
+ __m128i eob0, eob1;
+ zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+ zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+ nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+ nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ // Add one to convert from indices to counts
+ iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+ iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+ eob0 = _mm_and_si128(iscan0, nzero_coeff0);
+ eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+ eob0 = _mm_max_epi16(eob0, eob1);
+ eob = _mm_max_epi16(eob, eob0);
+ }
+ n_coeffs += 8 * 2;
+ index += 2;
+ }
+
+ // Accumulate EOB
+ {
+ __m128i eob_shuffled;
+ eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ *eob_ptr = _mm_extract_epi16(eob, 1);
+ }
+ } else {
+ do {
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
+ n_coeffs += 8 * 2;
+ } while (n_coeffs < 0);
+ *eob_ptr = 0;
+ }
+}
+
+// load 8x8 array
+static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in,
+ int stride, int flipud, int fliplr) {
+ if (!flipud) {
+ in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ } else {
+ in[0] = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ in[1] = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ in[2] = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ in[3] = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ in[4] = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ in[5] = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ in[6] = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ in[7] = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ }
+
+ if (fliplr) {
+ in[0] = mm_reverse_epi16(in[0]);
+ in[1] = mm_reverse_epi16(in[1]);
+ in[2] = mm_reverse_epi16(in[2]);
+ in[3] = mm_reverse_epi16(in[3]);
+ in[4] = mm_reverse_epi16(in[4]);
+ in[5] = mm_reverse_epi16(in[5]);
+ in[6] = mm_reverse_epi16(in[6]);
+ in[7] = mm_reverse_epi16(in[7]);
+ }
+
+ in[0] = _mm_slli_epi16(in[0], 2);
+ in[1] = _mm_slli_epi16(in[1], 2);
+ in[2] = _mm_slli_epi16(in[2], 2);
+ in[3] = _mm_slli_epi16(in[3], 2);
+ in[4] = _mm_slli_epi16(in[4], 2);
+ in[5] = _mm_slli_epi16(in[5], 2);
+ in[6] = _mm_slli_epi16(in[6], 2);
+ in[7] = _mm_slli_epi16(in[7], 2);
+}
+
+// right shift and rounding
+static INLINE void right_shift_8x8(__m128i *res, const int bit) {
+ __m128i sign0 = _mm_srai_epi16(res[0], 15);
+ __m128i sign1 = _mm_srai_epi16(res[1], 15);
+ __m128i sign2 = _mm_srai_epi16(res[2], 15);
+ __m128i sign3 = _mm_srai_epi16(res[3], 15);
+ __m128i sign4 = _mm_srai_epi16(res[4], 15);
+ __m128i sign5 = _mm_srai_epi16(res[5], 15);
+ __m128i sign6 = _mm_srai_epi16(res[6], 15);
+ __m128i sign7 = _mm_srai_epi16(res[7], 15);
+
+ if (bit == 2) {
+ const __m128i const_rounding = _mm_set1_epi16(1);
+ res[0] = _mm_add_epi16(res[0], const_rounding);
+ res[1] = _mm_add_epi16(res[1], const_rounding);
+ res[2] = _mm_add_epi16(res[2], const_rounding);
+ res[3] = _mm_add_epi16(res[3], const_rounding);
+ res[4] = _mm_add_epi16(res[4], const_rounding);
+ res[5] = _mm_add_epi16(res[5], const_rounding);
+ res[6] = _mm_add_epi16(res[6], const_rounding);
+ res[7] = _mm_add_epi16(res[7], const_rounding);
+ }
+
+ res[0] = _mm_sub_epi16(res[0], sign0);
+ res[1] = _mm_sub_epi16(res[1], sign1);
+ res[2] = _mm_sub_epi16(res[2], sign2);
+ res[3] = _mm_sub_epi16(res[3], sign3);
+ res[4] = _mm_sub_epi16(res[4], sign4);
+ res[5] = _mm_sub_epi16(res[5], sign5);
+ res[6] = _mm_sub_epi16(res[6], sign6);
+ res[7] = _mm_sub_epi16(res[7], sign7);
+
+ if (bit == 1) {
+ res[0] = _mm_srai_epi16(res[0], 1);
+ res[1] = _mm_srai_epi16(res[1], 1);
+ res[2] = _mm_srai_epi16(res[2], 1);
+ res[3] = _mm_srai_epi16(res[3], 1);
+ res[4] = _mm_srai_epi16(res[4], 1);
+ res[5] = _mm_srai_epi16(res[5], 1);
+ res[6] = _mm_srai_epi16(res[6], 1);
+ res[7] = _mm_srai_epi16(res[7], 1);
+ } else {
+ res[0] = _mm_srai_epi16(res[0], 2);
+ res[1] = _mm_srai_epi16(res[1], 2);
+ res[2] = _mm_srai_epi16(res[2], 2);
+ res[3] = _mm_srai_epi16(res[3], 2);
+ res[4] = _mm_srai_epi16(res[4], 2);
+ res[5] = _mm_srai_epi16(res[5], 2);
+ res[6] = _mm_srai_epi16(res[6], 2);
+ res[7] = _mm_srai_epi16(res[7], 2);
+ }
+}
+
+// write 8x8 array
+static INLINE void write_buffer_8x8(tran_low_t *output, __m128i *res,
+ int stride) {
+ store_output(&res[0], (output + 0 * stride));
+ store_output(&res[1], (output + 1 * stride));
+ store_output(&res[2], (output + 2 * stride));
+ store_output(&res[3], (output + 3 * stride));
+ store_output(&res[4], (output + 4 * stride));
+ store_output(&res[5], (output + 5 * stride));
+ store_output(&res[6], (output + 6 * stride));
+ store_output(&res[7], (output + 7 * stride));
+}
+
+// perform in-place transpose
+static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
+ const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
+ const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]);
+ const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
+ const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]);
+ const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]);
+ // 00 10 01 11 02 12 03 13
+ // 20 30 21 31 22 32 23 33
+ // 04 14 05 15 06 16 07 17
+ // 24 34 25 35 26 36 27 37
+ // 40 50 41 51 42 52 43 53
+ // 60 70 61 71 62 72 63 73
+ // 44 54 45 55 46 56 47 57
+ // 64 74 65 75 66 76 67 77
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+ // 00 10 20 30 01 11 21 31
+ // 40 50 60 70 41 51 61 71
+ // 02 12 22 32 03 13 23 33
+ // 42 52 62 72 43 53 63 73
+ // 04 14 24 34 05 15 25 35
+ // 44 54 64 74 45 55 65 75
+ // 06 16 26 36 07 17 27 37
+ // 46 56 66 76 47 57 67 77
+ res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1);
+ res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1);
+ res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3);
+ res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3);
+ res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5);
+ res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5);
+ res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7);
+ res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7);
+ // 00 10 20 30 40 50 60 70
+ // 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72
+ // 03 13 23 33 43 53 63 73
+ // 04 14 24 34 44 54 64 74
+ // 05 15 25 35 45 55 65 75
+ // 06 16 26 36 46 56 66 76
+ // 07 17 27 37 47 57 67 77
+}
+
+static void fdct8_sse2(__m128i *in) {
+ // constants
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
+ const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ __m128i u0, u1, u2, u3, u4, u5, u6, u7;
+ __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+ __m128i s0, s1, s2, s3, s4, s5, s6, s7;
+
+ // stage 1
+ s0 = _mm_add_epi16(in[0], in[7]);
+ s1 = _mm_add_epi16(in[1], in[6]);
+ s2 = _mm_add_epi16(in[2], in[5]);
+ s3 = _mm_add_epi16(in[3], in[4]);
+ s4 = _mm_sub_epi16(in[3], in[4]);
+ s5 = _mm_sub_epi16(in[2], in[5]);
+ s6 = _mm_sub_epi16(in[1], in[6]);
+ s7 = _mm_sub_epi16(in[0], in[7]);
+
+ u0 = _mm_add_epi16(s0, s3);
+ u1 = _mm_add_epi16(s1, s2);
+ u2 = _mm_sub_epi16(s1, s2);
+ u3 = _mm_sub_epi16(s0, s3);
+ // interleave and perform butterfly multiplication/addition
+ v0 = _mm_unpacklo_epi16(u0, u1);
+ v1 = _mm_unpackhi_epi16(u0, u1);
+ v2 = _mm_unpacklo_epi16(u2, u3);
+ v3 = _mm_unpackhi_epi16(u2, u3);
+
+ u0 = _mm_madd_epi16(v0, k__cospi_p16_p16);
+ u1 = _mm_madd_epi16(v1, k__cospi_p16_p16);
+ u2 = _mm_madd_epi16(v0, k__cospi_p16_m16);
+ u3 = _mm_madd_epi16(v1, k__cospi_p16_m16);
+ u4 = _mm_madd_epi16(v2, k__cospi_p24_p08);
+ u5 = _mm_madd_epi16(v3, k__cospi_p24_p08);
+ u6 = _mm_madd_epi16(v2, k__cospi_m08_p24);
+ u7 = _mm_madd_epi16(v3, k__cospi_m08_p24);
+
+ // shift and rounding
+ v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+
+ u0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ u1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ u2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ u3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ u4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ u5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ u6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ u7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+
+ in[0] = _mm_packs_epi32(u0, u1);
+ in[2] = _mm_packs_epi32(u4, u5);
+ in[4] = _mm_packs_epi32(u2, u3);
+ in[6] = _mm_packs_epi32(u6, u7);
+
+ // stage 2
+ // interleave and perform butterfly multiplication/addition
+ u0 = _mm_unpacklo_epi16(s6, s5);
+ u1 = _mm_unpackhi_epi16(s6, s5);
+ v0 = _mm_madd_epi16(u0, k__cospi_p16_m16);
+ v1 = _mm_madd_epi16(u1, k__cospi_p16_m16);
+ v2 = _mm_madd_epi16(u0, k__cospi_p16_p16);
+ v3 = _mm_madd_epi16(u1, k__cospi_p16_p16);
+
+ // shift and rounding
+ u0 = _mm_add_epi32(v0, k__DCT_CONST_ROUNDING);
+ u1 = _mm_add_epi32(v1, k__DCT_CONST_ROUNDING);
+ u2 = _mm_add_epi32(v2, k__DCT_CONST_ROUNDING);
+ u3 = _mm_add_epi32(v3, k__DCT_CONST_ROUNDING);
+
+ v0 = _mm_srai_epi32(u0, DCT_CONST_BITS);
+ v1 = _mm_srai_epi32(u1, DCT_CONST_BITS);
+ v2 = _mm_srai_epi32(u2, DCT_CONST_BITS);
+ v3 = _mm_srai_epi32(u3, DCT_CONST_BITS);
+
+ u0 = _mm_packs_epi32(v0, v1);
+ u1 = _mm_packs_epi32(v2, v3);
+
+ // stage 3
+ s0 = _mm_add_epi16(s4, u0);
+ s1 = _mm_sub_epi16(s4, u0);
+ s2 = _mm_sub_epi16(s7, u1);
+ s3 = _mm_add_epi16(s7, u1);
+
+ // stage 4
+ u0 = _mm_unpacklo_epi16(s0, s3);
+ u1 = _mm_unpackhi_epi16(s0, s3);
+ u2 = _mm_unpacklo_epi16(s1, s2);
+ u3 = _mm_unpackhi_epi16(s1, s2);
+
+ v0 = _mm_madd_epi16(u0, k__cospi_p28_p04);
+ v1 = _mm_madd_epi16(u1, k__cospi_p28_p04);
+ v2 = _mm_madd_epi16(u2, k__cospi_p12_p20);
+ v3 = _mm_madd_epi16(u3, k__cospi_p12_p20);
+ v4 = _mm_madd_epi16(u2, k__cospi_m20_p12);
+ v5 = _mm_madd_epi16(u3, k__cospi_m20_p12);
+ v6 = _mm_madd_epi16(u0, k__cospi_m04_p28);
+ v7 = _mm_madd_epi16(u1, k__cospi_m04_p28);
+
+ // shift and rounding
+ u0 = _mm_add_epi32(v0, k__DCT_CONST_ROUNDING);
+ u1 = _mm_add_epi32(v1, k__DCT_CONST_ROUNDING);
+ u2 = _mm_add_epi32(v2, k__DCT_CONST_ROUNDING);
+ u3 = _mm_add_epi32(v3, k__DCT_CONST_ROUNDING);
+ u4 = _mm_add_epi32(v4, k__DCT_CONST_ROUNDING);
+ u5 = _mm_add_epi32(v5, k__DCT_CONST_ROUNDING);
+ u6 = _mm_add_epi32(v6, k__DCT_CONST_ROUNDING);
+ u7 = _mm_add_epi32(v7, k__DCT_CONST_ROUNDING);
+
+ v0 = _mm_srai_epi32(u0, DCT_CONST_BITS);
+ v1 = _mm_srai_epi32(u1, DCT_CONST_BITS);
+ v2 = _mm_srai_epi32(u2, DCT_CONST_BITS);
+ v3 = _mm_srai_epi32(u3, DCT_CONST_BITS);
+ v4 = _mm_srai_epi32(u4, DCT_CONST_BITS);
+ v5 = _mm_srai_epi32(u5, DCT_CONST_BITS);
+ v6 = _mm_srai_epi32(u6, DCT_CONST_BITS);
+ v7 = _mm_srai_epi32(u7, DCT_CONST_BITS);
+
+ in[1] = _mm_packs_epi32(v0, v1);
+ in[3] = _mm_packs_epi32(v4, v5);
+ in[5] = _mm_packs_epi32(v2, v3);
+ in[7] = _mm_packs_epi32(v6, v7);
+
+ // transpose
+ array_transpose_8x8(in, in);
+}
+
+static void fadst8_sse2(__m128i *in) {
+ // Constants
+ const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
+ const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
+ const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
+ const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64);
+ const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64);
+ const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
+ const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64);
+ const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64);
+ const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+ const __m128i k__const_0 = _mm_set1_epi16(0);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+
+ __m128i u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15;
+ __m128i v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15;
+ __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
+ __m128i s0, s1, s2, s3, s4, s5, s6, s7;
+ __m128i in0, in1, in2, in3, in4, in5, in6, in7;
+
+ // properly aligned for butterfly input
+ in0 = in[7];
+ in1 = in[0];
+ in2 = in[5];
+ in3 = in[2];
+ in4 = in[3];
+ in5 = in[4];
+ in6 = in[1];
+ in7 = in[6];
+
+ // column transformation
+ // stage 1
+ // interleave and multiply/add into 32-bit integer
+ s0 = _mm_unpacklo_epi16(in0, in1);
+ s1 = _mm_unpackhi_epi16(in0, in1);
+ s2 = _mm_unpacklo_epi16(in2, in3);
+ s3 = _mm_unpackhi_epi16(in2, in3);
+ s4 = _mm_unpacklo_epi16(in4, in5);
+ s5 = _mm_unpackhi_epi16(in4, in5);
+ s6 = _mm_unpacklo_epi16(in6, in7);
+ s7 = _mm_unpackhi_epi16(in6, in7);
+
+ u0 = _mm_madd_epi16(s0, k__cospi_p02_p30);
+ u1 = _mm_madd_epi16(s1, k__cospi_p02_p30);
+ u2 = _mm_madd_epi16(s0, k__cospi_p30_m02);
+ u3 = _mm_madd_epi16(s1, k__cospi_p30_m02);
+ u4 = _mm_madd_epi16(s2, k__cospi_p10_p22);
+ u5 = _mm_madd_epi16(s3, k__cospi_p10_p22);
+ u6 = _mm_madd_epi16(s2, k__cospi_p22_m10);
+ u7 = _mm_madd_epi16(s3, k__cospi_p22_m10);
+ u8 = _mm_madd_epi16(s4, k__cospi_p18_p14);
+ u9 = _mm_madd_epi16(s5, k__cospi_p18_p14);
+ u10 = _mm_madd_epi16(s4, k__cospi_p14_m18);
+ u11 = _mm_madd_epi16(s5, k__cospi_p14_m18);
+ u12 = _mm_madd_epi16(s6, k__cospi_p26_p06);
+ u13 = _mm_madd_epi16(s7, k__cospi_p26_p06);
+ u14 = _mm_madd_epi16(s6, k__cospi_p06_m26);
+ u15 = _mm_madd_epi16(s7, k__cospi_p06_m26);
+
+ // addition
+ w0 = _mm_add_epi32(u0, u8);
+ w1 = _mm_add_epi32(u1, u9);
+ w2 = _mm_add_epi32(u2, u10);
+ w3 = _mm_add_epi32(u3, u11);
+ w4 = _mm_add_epi32(u4, u12);
+ w5 = _mm_add_epi32(u5, u13);
+ w6 = _mm_add_epi32(u6, u14);
+ w7 = _mm_add_epi32(u7, u15);
+ w8 = _mm_sub_epi32(u0, u8);
+ w9 = _mm_sub_epi32(u1, u9);
+ w10 = _mm_sub_epi32(u2, u10);
+ w11 = _mm_sub_epi32(u3, u11);
+ w12 = _mm_sub_epi32(u4, u12);
+ w13 = _mm_sub_epi32(u5, u13);
+ w14 = _mm_sub_epi32(u6, u14);
+ w15 = _mm_sub_epi32(u7, u15);
+
+ // shift and rounding
+ v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING);
+ v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING);
+ v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING);
+ v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING);
+ v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING);
+ v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING);
+ v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING);
+ v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING);
+ v8 = _mm_add_epi32(w8, k__DCT_CONST_ROUNDING);
+ v9 = _mm_add_epi32(w9, k__DCT_CONST_ROUNDING);
+ v10 = _mm_add_epi32(w10, k__DCT_CONST_ROUNDING);
+ v11 = _mm_add_epi32(w11, k__DCT_CONST_ROUNDING);
+ v12 = _mm_add_epi32(w12, k__DCT_CONST_ROUNDING);
+ v13 = _mm_add_epi32(w13, k__DCT_CONST_ROUNDING);
+ v14 = _mm_add_epi32(w14, k__DCT_CONST_ROUNDING);
+ v15 = _mm_add_epi32(w15, k__DCT_CONST_ROUNDING);
+
+ u0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ u1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ u2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ u3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ u4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ u5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ u6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ u7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ u8 = _mm_srai_epi32(v8, DCT_CONST_BITS);
+ u9 = _mm_srai_epi32(v9, DCT_CONST_BITS);
+ u10 = _mm_srai_epi32(v10, DCT_CONST_BITS);
+ u11 = _mm_srai_epi32(v11, DCT_CONST_BITS);
+ u12 = _mm_srai_epi32(v12, DCT_CONST_BITS);
+ u13 = _mm_srai_epi32(v13, DCT_CONST_BITS);
+ u14 = _mm_srai_epi32(v14, DCT_CONST_BITS);
+ u15 = _mm_srai_epi32(v15, DCT_CONST_BITS);
+
+ // back to 16-bit and pack 8 integers into __m128i
+ in[0] = _mm_packs_epi32(u0, u1);
+ in[1] = _mm_packs_epi32(u2, u3);
+ in[2] = _mm_packs_epi32(u4, u5);
+ in[3] = _mm_packs_epi32(u6, u7);
+ in[4] = _mm_packs_epi32(u8, u9);
+ in[5] = _mm_packs_epi32(u10, u11);
+ in[6] = _mm_packs_epi32(u12, u13);
+ in[7] = _mm_packs_epi32(u14, u15);
+
+ // stage 2
+ s0 = _mm_add_epi16(in[0], in[2]);
+ s1 = _mm_add_epi16(in[1], in[3]);
+ s2 = _mm_sub_epi16(in[0], in[2]);
+ s3 = _mm_sub_epi16(in[1], in[3]);
+ u0 = _mm_unpacklo_epi16(in[4], in[5]);
+ u1 = _mm_unpackhi_epi16(in[4], in[5]);
+ u2 = _mm_unpacklo_epi16(in[6], in[7]);
+ u3 = _mm_unpackhi_epi16(in[6], in[7]);
+
+ v0 = _mm_madd_epi16(u0, k__cospi_p08_p24);
+ v1 = _mm_madd_epi16(u1, k__cospi_p08_p24);
+ v2 = _mm_madd_epi16(u0, k__cospi_p24_m08);
+ v3 = _mm_madd_epi16(u1, k__cospi_p24_m08);
+ v4 = _mm_madd_epi16(u2, k__cospi_m24_p08);
+ v5 = _mm_madd_epi16(u3, k__cospi_m24_p08);
+ v6 = _mm_madd_epi16(u2, k__cospi_p08_p24);
+ v7 = _mm_madd_epi16(u3, k__cospi_p08_p24);
+
+ w0 = _mm_add_epi32(v0, v4);
+ w1 = _mm_add_epi32(v1, v5);
+ w2 = _mm_add_epi32(v2, v6);
+ w3 = _mm_add_epi32(v3, v7);
+ w4 = _mm_sub_epi32(v0, v4);
+ w5 = _mm_sub_epi32(v1, v5);
+ w6 = _mm_sub_epi32(v2, v6);
+ w7 = _mm_sub_epi32(v3, v7);
+
+ v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING);
+ v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING);
+ v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING);
+ v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING);
+ v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING);
+ v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING);
+ v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING);
+ v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING);
+
+ u0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ u1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ u2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ u3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ u4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ u5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ u6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ u7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+
+ // back to 16-bit intergers
+ s4 = _mm_packs_epi32(u0, u1);
+ s5 = _mm_packs_epi32(u2, u3);
+ s6 = _mm_packs_epi32(u4, u5);
+ s7 = _mm_packs_epi32(u6, u7);
+
+ // stage 3
+ u0 = _mm_unpacklo_epi16(s2, s3);
+ u1 = _mm_unpackhi_epi16(s2, s3);
+ u2 = _mm_unpacklo_epi16(s6, s7);
+ u3 = _mm_unpackhi_epi16(s6, s7);
+
+ v0 = _mm_madd_epi16(u0, k__cospi_p16_p16);
+ v1 = _mm_madd_epi16(u1, k__cospi_p16_p16);
+ v2 = _mm_madd_epi16(u0, k__cospi_p16_m16);
+ v3 = _mm_madd_epi16(u1, k__cospi_p16_m16);
+ v4 = _mm_madd_epi16(u2, k__cospi_p16_p16);
+ v5 = _mm_madd_epi16(u3, k__cospi_p16_p16);
+ v6 = _mm_madd_epi16(u2, k__cospi_p16_m16);
+ v7 = _mm_madd_epi16(u3, k__cospi_p16_m16);
+
+ u0 = _mm_add_epi32(v0, k__DCT_CONST_ROUNDING);
+ u1 = _mm_add_epi32(v1, k__DCT_CONST_ROUNDING);
+ u2 = _mm_add_epi32(v2, k__DCT_CONST_ROUNDING);
+ u3 = _mm_add_epi32(v3, k__DCT_CONST_ROUNDING);
+ u4 = _mm_add_epi32(v4, k__DCT_CONST_ROUNDING);
+ u5 = _mm_add_epi32(v5, k__DCT_CONST_ROUNDING);
+ u6 = _mm_add_epi32(v6, k__DCT_CONST_ROUNDING);
+ u7 = _mm_add_epi32(v7, k__DCT_CONST_ROUNDING);
+
+ v0 = _mm_srai_epi32(u0, DCT_CONST_BITS);
+ v1 = _mm_srai_epi32(u1, DCT_CONST_BITS);
+ v2 = _mm_srai_epi32(u2, DCT_CONST_BITS);
+ v3 = _mm_srai_epi32(u3, DCT_CONST_BITS);
+ v4 = _mm_srai_epi32(u4, DCT_CONST_BITS);
+ v5 = _mm_srai_epi32(u5, DCT_CONST_BITS);
+ v6 = _mm_srai_epi32(u6, DCT_CONST_BITS);
+ v7 = _mm_srai_epi32(u7, DCT_CONST_BITS);
+
+ s2 = _mm_packs_epi32(v0, v1);
+ s3 = _mm_packs_epi32(v2, v3);
+ s6 = _mm_packs_epi32(v4, v5);
+ s7 = _mm_packs_epi32(v6, v7);
+
+ // FIXME(jingning): do subtract using bit inversion?
+ in[0] = s0;
+ in[1] = _mm_sub_epi16(k__const_0, s4);
+ in[2] = s6;
+ in[3] = _mm_sub_epi16(k__const_0, s2);
+ in[4] = s3;
+ in[5] = _mm_sub_epi16(k__const_0, s7);
+ in[6] = s5;
+ in[7] = _mm_sub_epi16(k__const_0, s1);
+
+ // transpose
+ array_transpose_8x8(in, in);
+}
+
+#if CONFIG_EXT_TX
+static void fidtx8_sse2(__m128i *in) {
+ in[0] = _mm_slli_epi16(in[0], 1);
+ in[1] = _mm_slli_epi16(in[1], 1);
+ in[2] = _mm_slli_epi16(in[2], 1);
+ in[3] = _mm_slli_epi16(in[3], 1);
+ in[4] = _mm_slli_epi16(in[4], 1);
+ in[5] = _mm_slli_epi16(in[5], 1);
+ in[6] = _mm_slli_epi16(in[6], 1);
+ in[7] = _mm_slli_epi16(in[7], 1);
+
+ array_transpose_8x8(in, in);
+}
+#endif // CONFIG_EXT_TX
+
+void vp10_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ __m128i in[8];
+
+ switch (tx_type) {
+ case DCT_DCT: vpx_fdct8x8_sse2(input, output, stride); break;
+ case ADST_DCT:
+ load_buffer_8x8(input, in, stride, 0, 0);
+ fadst8_sse2(in);
+ fdct8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case DCT_ADST:
+ load_buffer_8x8(input, in, stride, 0, 0);
+ fdct8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case ADST_ADST:
+ load_buffer_8x8(input, in, stride, 0, 0);
+ fadst8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ load_buffer_8x8(input, in, stride, 1, 0);
+ fadst8_sse2(in);
+ fdct8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case DCT_FLIPADST:
+ load_buffer_8x8(input, in, stride, 0, 1);
+ fdct8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case FLIPADST_FLIPADST:
+ load_buffer_8x8(input, in, stride, 1, 1);
+ fadst8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case ADST_FLIPADST:
+ load_buffer_8x8(input, in, stride, 0, 1);
+ fadst8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case FLIPADST_ADST:
+ load_buffer_8x8(input, in, stride, 1, 0);
+ fadst8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case V_DCT:
+ load_buffer_8x8(input, in, stride, 0, 0);
+ fdct8_sse2(in);
+ fidtx8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case H_DCT:
+ load_buffer_8x8(input, in, stride, 0, 0);
+ fidtx8_sse2(in);
+ fdct8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case V_ADST:
+ load_buffer_8x8(input, in, stride, 0, 0);
+ fadst8_sse2(in);
+ fidtx8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case H_ADST:
+ load_buffer_8x8(input, in, stride, 0, 0);
+ fidtx8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case V_FLIPADST:
+ load_buffer_8x8(input, in, stride, 1, 0);
+ fadst8_sse2(in);
+ fidtx8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+ case H_FLIPADST:
+ load_buffer_8x8(input, in, stride, 0, 1);
+ fidtx8_sse2(in);
+ fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0);
+ }
+}
+
+static INLINE void load_buffer_16x16(const int16_t *input, __m128i *in0,
+ __m128i *in1, int stride, int flipud,
+ int fliplr) {
+ // Load 4 8x8 blocks
+ const int16_t *topL = input;
+ const int16_t *topR = input + 8;
+ const int16_t *botL = input + 8 * stride;
+ const int16_t *botR = input + 8 * stride + 8;
+
+ const int16_t *tmp;
+
+ if (flipud) {
+ // Swap left columns
+ tmp = topL;
+ topL = botL;
+ botL = tmp;
+ // Swap right columns
+ tmp = topR;
+ topR = botR;
+ botR = tmp;
+ }
+
+ if (fliplr) {
+ // Swap top rows
+ tmp = topL;
+ topL = topR;
+ topR = tmp;
+ // Swap bottom rows
+ tmp = botL;
+ botL = botR;
+ botR = tmp;
+ }
+
+ // load first 8 columns
+ load_buffer_8x8(topL, in0, stride, flipud, fliplr);
+ load_buffer_8x8(botL, in0 + 8, stride, flipud, fliplr);
+
+ // load second 8 columns
+ load_buffer_8x8(topR, in1, stride, flipud, fliplr);
+ load_buffer_8x8(botR, in1 + 8, stride, flipud, fliplr);
+}
+
+static INLINE void write_buffer_16x16(tran_low_t *output, __m128i *in0,
+ __m128i *in1, int stride) {
+ // write first 8 columns
+ write_buffer_8x8(output, in0, stride);
+ write_buffer_8x8(output + 8 * stride, in0 + 8, stride);
+ // write second 8 columns
+ output += 8;
+ write_buffer_8x8(output, in1, stride);
+ write_buffer_8x8(output + 8 * stride, in1 + 8, stride);
+}
+
+static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
+ __m128i tbuf[8];
+ array_transpose_8x8(res0, res0);
+ array_transpose_8x8(res1, tbuf);
+ array_transpose_8x8(res0 + 8, res1);
+ array_transpose_8x8(res1 + 8, res1 + 8);
+
+ res0[8] = tbuf[0];
+ res0[9] = tbuf[1];
+ res0[10] = tbuf[2];
+ res0[11] = tbuf[3];
+ res0[12] = tbuf[4];
+ res0[13] = tbuf[5];
+ res0[14] = tbuf[6];
+ res0[15] = tbuf[7];
+}
+
+static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) {
+ // perform rounding operations
+ right_shift_8x8(res0, 2);
+ right_shift_8x8(res0 + 8, 2);
+ right_shift_8x8(res1, 2);
+ right_shift_8x8(res1 + 8, 2);
+}
+
+static void fdct16_8col(__m128i *in) {
+ // perform 16x16 1-D DCT for 8 columns
+ __m128i i[8], s[8], p[8], t[8], u[16], v[16];
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+ const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
+ const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
+ const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i k__cospi_p30_p02 = pair_set_epi16(cospi_30_64, cospi_2_64);
+ const __m128i k__cospi_p14_p18 = pair_set_epi16(cospi_14_64, cospi_18_64);
+ const __m128i k__cospi_m02_p30 = pair_set_epi16(-cospi_2_64, cospi_30_64);
+ const __m128i k__cospi_m18_p14 = pair_set_epi16(-cospi_18_64, cospi_14_64);
+ const __m128i k__cospi_p22_p10 = pair_set_epi16(cospi_22_64, cospi_10_64);
+ const __m128i k__cospi_p06_p26 = pair_set_epi16(cospi_6_64, cospi_26_64);
+ const __m128i k__cospi_m10_p22 = pair_set_epi16(-cospi_10_64, cospi_22_64);
+ const __m128i k__cospi_m26_p06 = pair_set_epi16(-cospi_26_64, cospi_6_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+
+ // stage 1
+ i[0] = _mm_add_epi16(in[0], in[15]);
+ i[1] = _mm_add_epi16(in[1], in[14]);
+ i[2] = _mm_add_epi16(in[2], in[13]);
+ i[3] = _mm_add_epi16(in[3], in[12]);
+ i[4] = _mm_add_epi16(in[4], in[11]);
+ i[5] = _mm_add_epi16(in[5], in[10]);
+ i[6] = _mm_add_epi16(in[6], in[9]);
+ i[7] = _mm_add_epi16(in[7], in[8]);
+
+ s[0] = _mm_sub_epi16(in[7], in[8]);
+ s[1] = _mm_sub_epi16(in[6], in[9]);
+ s[2] = _mm_sub_epi16(in[5], in[10]);
+ s[3] = _mm_sub_epi16(in[4], in[11]);
+ s[4] = _mm_sub_epi16(in[3], in[12]);
+ s[5] = _mm_sub_epi16(in[2], in[13]);
+ s[6] = _mm_sub_epi16(in[1], in[14]);
+ s[7] = _mm_sub_epi16(in[0], in[15]);
+
+ p[0] = _mm_add_epi16(i[0], i[7]);
+ p[1] = _mm_add_epi16(i[1], i[6]);
+ p[2] = _mm_add_epi16(i[2], i[5]);
+ p[3] = _mm_add_epi16(i[3], i[4]);
+ p[4] = _mm_sub_epi16(i[3], i[4]);
+ p[5] = _mm_sub_epi16(i[2], i[5]);
+ p[6] = _mm_sub_epi16(i[1], i[6]);
+ p[7] = _mm_sub_epi16(i[0], i[7]);
+
+ u[0] = _mm_add_epi16(p[0], p[3]);
+ u[1] = _mm_add_epi16(p[1], p[2]);
+ u[2] = _mm_sub_epi16(p[1], p[2]);
+ u[3] = _mm_sub_epi16(p[0], p[3]);
+
+ v[0] = _mm_unpacklo_epi16(u[0], u[1]);
+ v[1] = _mm_unpackhi_epi16(u[0], u[1]);
+ v[2] = _mm_unpacklo_epi16(u[2], u[3]);
+ v[3] = _mm_unpackhi_epi16(u[2], u[3]);
+
+ u[0] = _mm_madd_epi16(v[0], k__cospi_p16_p16);
+ u[1] = _mm_madd_epi16(v[1], k__cospi_p16_p16);
+ u[2] = _mm_madd_epi16(v[0], k__cospi_p16_m16);
+ u[3] = _mm_madd_epi16(v[1], k__cospi_p16_m16);
+ u[4] = _mm_madd_epi16(v[2], k__cospi_p24_p08);
+ u[5] = _mm_madd_epi16(v[3], k__cospi_p24_p08);
+ u[6] = _mm_madd_epi16(v[2], k__cospi_m08_p24);
+ u[7] = _mm_madd_epi16(v[3], k__cospi_m08_p24);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+
+ in[0] = _mm_packs_epi32(u[0], u[1]);
+ in[4] = _mm_packs_epi32(u[4], u[5]);
+ in[8] = _mm_packs_epi32(u[2], u[3]);
+ in[12] = _mm_packs_epi32(u[6], u[7]);
+
+ u[0] = _mm_unpacklo_epi16(p[5], p[6]);
+ u[1] = _mm_unpackhi_epi16(p[5], p[6]);
+ v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+
+ v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+
+ u[0] = _mm_packs_epi32(v[0], v[1]);
+ u[1] = _mm_packs_epi32(v[2], v[3]);
+
+ t[0] = _mm_add_epi16(p[4], u[0]);
+ t[1] = _mm_sub_epi16(p[4], u[0]);
+ t[2] = _mm_sub_epi16(p[7], u[1]);
+ t[3] = _mm_add_epi16(p[7], u[1]);
+
+ u[0] = _mm_unpacklo_epi16(t[0], t[3]);
+ u[1] = _mm_unpackhi_epi16(t[0], t[3]);
+ u[2] = _mm_unpacklo_epi16(t[1], t[2]);
+ u[3] = _mm_unpackhi_epi16(t[1], t[2]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p28_p04);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_p28_p04);
+ v[2] = _mm_madd_epi16(u[2], k__cospi_p12_p20);
+ v[3] = _mm_madd_epi16(u[3], k__cospi_p12_p20);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_m20_p12);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_m20_p12);
+ v[6] = _mm_madd_epi16(u[0], k__cospi_m04_p28);
+ v[7] = _mm_madd_epi16(u[1], k__cospi_m04_p28);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+
+ v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+
+ in[2] = _mm_packs_epi32(v[0], v[1]);
+ in[6] = _mm_packs_epi32(v[4], v[5]);
+ in[10] = _mm_packs_epi32(v[2], v[3]);
+ in[14] = _mm_packs_epi32(v[6], v[7]);
+
+ // stage 2
+ u[0] = _mm_unpacklo_epi16(s[2], s[5]);
+ u[1] = _mm_unpackhi_epi16(s[2], s[5]);
+ u[2] = _mm_unpacklo_epi16(s[3], s[4]);
+ u[3] = _mm_unpackhi_epi16(s[3], s[4]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16);
+ v[2] = _mm_madd_epi16(u[2], k__cospi_m16_p16);
+ v[3] = _mm_madd_epi16(u[3], k__cospi_m16_p16);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_p16_p16);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_p16_p16);
+ v[6] = _mm_madd_epi16(u[0], k__cospi_p16_p16);
+ v[7] = _mm_madd_epi16(u[1], k__cospi_p16_p16);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+
+ v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+
+ t[2] = _mm_packs_epi32(v[0], v[1]);
+ t[3] = _mm_packs_epi32(v[2], v[3]);
+ t[4] = _mm_packs_epi32(v[4], v[5]);
+ t[5] = _mm_packs_epi32(v[6], v[7]);
+
+ // stage 3
+ p[0] = _mm_add_epi16(s[0], t[3]);
+ p[1] = _mm_add_epi16(s[1], t[2]);
+ p[2] = _mm_sub_epi16(s[1], t[2]);
+ p[3] = _mm_sub_epi16(s[0], t[3]);
+ p[4] = _mm_sub_epi16(s[7], t[4]);
+ p[5] = _mm_sub_epi16(s[6], t[5]);
+ p[6] = _mm_add_epi16(s[6], t[5]);
+ p[7] = _mm_add_epi16(s[7], t[4]);
+
+ // stage 4
+ u[0] = _mm_unpacklo_epi16(p[1], p[6]);
+ u[1] = _mm_unpackhi_epi16(p[1], p[6]);
+ u[2] = _mm_unpacklo_epi16(p[2], p[5]);
+ u[3] = _mm_unpackhi_epi16(p[2], p[5]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_m08_p24);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_m08_p24);
+ v[2] = _mm_madd_epi16(u[2], k__cospi_m24_m08);
+ v[3] = _mm_madd_epi16(u[3], k__cospi_m24_m08);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_m08_p24);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_m08_p24);
+ v[6] = _mm_madd_epi16(u[0], k__cospi_p24_p08);
+ v[7] = _mm_madd_epi16(u[1], k__cospi_p24_p08);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+
+ v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+
+ t[1] = _mm_packs_epi32(v[0], v[1]);
+ t[2] = _mm_packs_epi32(v[2], v[3]);
+ t[5] = _mm_packs_epi32(v[4], v[5]);
+ t[6] = _mm_packs_epi32(v[6], v[7]);
+
+ // stage 5
+ s[0] = _mm_add_epi16(p[0], t[1]);
+ s[1] = _mm_sub_epi16(p[0], t[1]);
+ s[2] = _mm_sub_epi16(p[3], t[2]);
+ s[3] = _mm_add_epi16(p[3], t[2]);
+ s[4] = _mm_add_epi16(p[4], t[5]);
+ s[5] = _mm_sub_epi16(p[4], t[5]);
+ s[6] = _mm_sub_epi16(p[7], t[6]);
+ s[7] = _mm_add_epi16(p[7], t[6]);
+
+ // stage 6
+ u[0] = _mm_unpacklo_epi16(s[0], s[7]);
+ u[1] = _mm_unpackhi_epi16(s[0], s[7]);
+ u[2] = _mm_unpacklo_epi16(s[1], s[6]);
+ u[3] = _mm_unpackhi_epi16(s[1], s[6]);
+ u[4] = _mm_unpacklo_epi16(s[2], s[5]);
+ u[5] = _mm_unpackhi_epi16(s[2], s[5]);
+ u[6] = _mm_unpacklo_epi16(s[3], s[4]);
+ u[7] = _mm_unpackhi_epi16(s[3], s[4]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p30_p02);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_p30_p02);
+ v[2] = _mm_madd_epi16(u[2], k__cospi_p14_p18);
+ v[3] = _mm_madd_epi16(u[3], k__cospi_p14_p18);
+ v[4] = _mm_madd_epi16(u[4], k__cospi_p22_p10);
+ v[5] = _mm_madd_epi16(u[5], k__cospi_p22_p10);
+ v[6] = _mm_madd_epi16(u[6], k__cospi_p06_p26);
+ v[7] = _mm_madd_epi16(u[7], k__cospi_p06_p26);
+ v[8] = _mm_madd_epi16(u[6], k__cospi_m26_p06);
+ v[9] = _mm_madd_epi16(u[7], k__cospi_m26_p06);
+ v[10] = _mm_madd_epi16(u[4], k__cospi_m10_p22);
+ v[11] = _mm_madd_epi16(u[5], k__cospi_m10_p22);
+ v[12] = _mm_madd_epi16(u[2], k__cospi_m18_p14);
+ v[13] = _mm_madd_epi16(u[3], k__cospi_m18_p14);
+ v[14] = _mm_madd_epi16(u[0], k__cospi_m02_p30);
+ v[15] = _mm_madd_epi16(u[1], k__cospi_m02_p30);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+ u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING);
+ u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING);
+ u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING);
+ u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING);
+ u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING);
+ u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING);
+ u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING);
+ u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING);
+
+ v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+ v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+ v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+ v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+ v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+ v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+ v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+ v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+ v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+ in[1] = _mm_packs_epi32(v[0], v[1]);
+ in[9] = _mm_packs_epi32(v[2], v[3]);
+ in[5] = _mm_packs_epi32(v[4], v[5]);
+ in[13] = _mm_packs_epi32(v[6], v[7]);
+ in[3] = _mm_packs_epi32(v[8], v[9]);
+ in[11] = _mm_packs_epi32(v[10], v[11]);
+ in[7] = _mm_packs_epi32(v[12], v[13]);
+ in[15] = _mm_packs_epi32(v[14], v[15]);
+}
+
+static void fadst16_8col(__m128i *in) {
+ // perform 16x16 1-D ADST for 8 columns
+ __m128i s[16], x[16], u[32], v[32];
+ const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
+ const __m128i k__cospi_p31_m01 = pair_set_epi16(cospi_31_64, -cospi_1_64);
+ const __m128i k__cospi_p05_p27 = pair_set_epi16(cospi_5_64, cospi_27_64);
+ const __m128i k__cospi_p27_m05 = pair_set_epi16(cospi_27_64, -cospi_5_64);
+ const __m128i k__cospi_p09_p23 = pair_set_epi16(cospi_9_64, cospi_23_64);
+ const __m128i k__cospi_p23_m09 = pair_set_epi16(cospi_23_64, -cospi_9_64);
+ const __m128i k__cospi_p13_p19 = pair_set_epi16(cospi_13_64, cospi_19_64);
+ const __m128i k__cospi_p19_m13 = pair_set_epi16(cospi_19_64, -cospi_13_64);
+ const __m128i k__cospi_p17_p15 = pair_set_epi16(cospi_17_64, cospi_15_64);
+ const __m128i k__cospi_p15_m17 = pair_set_epi16(cospi_15_64, -cospi_17_64);
+ const __m128i k__cospi_p21_p11 = pair_set_epi16(cospi_21_64, cospi_11_64);
+ const __m128i k__cospi_p11_m21 = pair_set_epi16(cospi_11_64, -cospi_21_64);
+ const __m128i k__cospi_p25_p07 = pair_set_epi16(cospi_25_64, cospi_7_64);
+ const __m128i k__cospi_p07_m25 = pair_set_epi16(cospi_7_64, -cospi_25_64);
+ const __m128i k__cospi_p29_p03 = pair_set_epi16(cospi_29_64, cospi_3_64);
+ const __m128i k__cospi_p03_m29 = pair_set_epi16(cospi_3_64, -cospi_29_64);
+ const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64);
+ const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64);
+ const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64);
+ const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64);
+ const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64);
+ const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
+ const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_m16_m16 = _mm_set1_epi16((int16_t)-cospi_16_64);
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ const __m128i kZero = _mm_set1_epi16(0);
+
+ u[0] = _mm_unpacklo_epi16(in[15], in[0]);
+ u[1] = _mm_unpackhi_epi16(in[15], in[0]);
+ u[2] = _mm_unpacklo_epi16(in[13], in[2]);
+ u[3] = _mm_unpackhi_epi16(in[13], in[2]);
+ u[4] = _mm_unpacklo_epi16(in[11], in[4]);
+ u[5] = _mm_unpackhi_epi16(in[11], in[4]);
+ u[6] = _mm_unpacklo_epi16(in[9], in[6]);
+ u[7] = _mm_unpackhi_epi16(in[9], in[6]);
+ u[8] = _mm_unpacklo_epi16(in[7], in[8]);
+ u[9] = _mm_unpackhi_epi16(in[7], in[8]);
+ u[10] = _mm_unpacklo_epi16(in[5], in[10]);
+ u[11] = _mm_unpackhi_epi16(in[5], in[10]);
+ u[12] = _mm_unpacklo_epi16(in[3], in[12]);
+ u[13] = _mm_unpackhi_epi16(in[3], in[12]);
+ u[14] = _mm_unpacklo_epi16(in[1], in[14]);
+ u[15] = _mm_unpackhi_epi16(in[1], in[14]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p01_p31);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_p01_p31);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p31_m01);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p31_m01);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_p05_p27);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_p05_p27);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p27_m05);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p27_m05);
+ v[8] = _mm_madd_epi16(u[4], k__cospi_p09_p23);
+ v[9] = _mm_madd_epi16(u[5], k__cospi_p09_p23);
+ v[10] = _mm_madd_epi16(u[4], k__cospi_p23_m09);
+ v[11] = _mm_madd_epi16(u[5], k__cospi_p23_m09);
+ v[12] = _mm_madd_epi16(u[6], k__cospi_p13_p19);
+ v[13] = _mm_madd_epi16(u[7], k__cospi_p13_p19);
+ v[14] = _mm_madd_epi16(u[6], k__cospi_p19_m13);
+ v[15] = _mm_madd_epi16(u[7], k__cospi_p19_m13);
+ v[16] = _mm_madd_epi16(u[8], k__cospi_p17_p15);
+ v[17] = _mm_madd_epi16(u[9], k__cospi_p17_p15);
+ v[18] = _mm_madd_epi16(u[8], k__cospi_p15_m17);
+ v[19] = _mm_madd_epi16(u[9], k__cospi_p15_m17);
+ v[20] = _mm_madd_epi16(u[10], k__cospi_p21_p11);
+ v[21] = _mm_madd_epi16(u[11], k__cospi_p21_p11);
+ v[22] = _mm_madd_epi16(u[10], k__cospi_p11_m21);
+ v[23] = _mm_madd_epi16(u[11], k__cospi_p11_m21);
+ v[24] = _mm_madd_epi16(u[12], k__cospi_p25_p07);
+ v[25] = _mm_madd_epi16(u[13], k__cospi_p25_p07);
+ v[26] = _mm_madd_epi16(u[12], k__cospi_p07_m25);
+ v[27] = _mm_madd_epi16(u[13], k__cospi_p07_m25);
+ v[28] = _mm_madd_epi16(u[14], k__cospi_p29_p03);
+ v[29] = _mm_madd_epi16(u[15], k__cospi_p29_p03);
+ v[30] = _mm_madd_epi16(u[14], k__cospi_p03_m29);
+ v[31] = _mm_madd_epi16(u[15], k__cospi_p03_m29);
+
+ u[0] = _mm_add_epi32(v[0], v[16]);
+ u[1] = _mm_add_epi32(v[1], v[17]);
+ u[2] = _mm_add_epi32(v[2], v[18]);
+ u[3] = _mm_add_epi32(v[3], v[19]);
+ u[4] = _mm_add_epi32(v[4], v[20]);
+ u[5] = _mm_add_epi32(v[5], v[21]);
+ u[6] = _mm_add_epi32(v[6], v[22]);
+ u[7] = _mm_add_epi32(v[7], v[23]);
+ u[8] = _mm_add_epi32(v[8], v[24]);
+ u[9] = _mm_add_epi32(v[9], v[25]);
+ u[10] = _mm_add_epi32(v[10], v[26]);
+ u[11] = _mm_add_epi32(v[11], v[27]);
+ u[12] = _mm_add_epi32(v[12], v[28]);
+ u[13] = _mm_add_epi32(v[13], v[29]);
+ u[14] = _mm_add_epi32(v[14], v[30]);
+ u[15] = _mm_add_epi32(v[15], v[31]);
+ u[16] = _mm_sub_epi32(v[0], v[16]);
+ u[17] = _mm_sub_epi32(v[1], v[17]);
+ u[18] = _mm_sub_epi32(v[2], v[18]);
+ u[19] = _mm_sub_epi32(v[3], v[19]);
+ u[20] = _mm_sub_epi32(v[4], v[20]);
+ u[21] = _mm_sub_epi32(v[5], v[21]);
+ u[22] = _mm_sub_epi32(v[6], v[22]);
+ u[23] = _mm_sub_epi32(v[7], v[23]);
+ u[24] = _mm_sub_epi32(v[8], v[24]);
+ u[25] = _mm_sub_epi32(v[9], v[25]);
+ u[26] = _mm_sub_epi32(v[10], v[26]);
+ u[27] = _mm_sub_epi32(v[11], v[27]);
+ u[28] = _mm_sub_epi32(v[12], v[28]);
+ u[29] = _mm_sub_epi32(v[13], v[29]);
+ u[30] = _mm_sub_epi32(v[14], v[30]);
+ u[31] = _mm_sub_epi32(v[15], v[31]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+ v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+ v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+ v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+ v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+ v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+ v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+ v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+ v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+ v[16] = _mm_add_epi32(u[16], k__DCT_CONST_ROUNDING);
+ v[17] = _mm_add_epi32(u[17], k__DCT_CONST_ROUNDING);
+ v[18] = _mm_add_epi32(u[18], k__DCT_CONST_ROUNDING);
+ v[19] = _mm_add_epi32(u[19], k__DCT_CONST_ROUNDING);
+ v[20] = _mm_add_epi32(u[20], k__DCT_CONST_ROUNDING);
+ v[21] = _mm_add_epi32(u[21], k__DCT_CONST_ROUNDING);
+ v[22] = _mm_add_epi32(u[22], k__DCT_CONST_ROUNDING);
+ v[23] = _mm_add_epi32(u[23], k__DCT_CONST_ROUNDING);
+ v[24] = _mm_add_epi32(u[24], k__DCT_CONST_ROUNDING);
+ v[25] = _mm_add_epi32(u[25], k__DCT_CONST_ROUNDING);
+ v[26] = _mm_add_epi32(u[26], k__DCT_CONST_ROUNDING);
+ v[27] = _mm_add_epi32(u[27], k__DCT_CONST_ROUNDING);
+ v[28] = _mm_add_epi32(u[28], k__DCT_CONST_ROUNDING);
+ v[29] = _mm_add_epi32(u[29], k__DCT_CONST_ROUNDING);
+ v[30] = _mm_add_epi32(u[30], k__DCT_CONST_ROUNDING);
+ v[31] = _mm_add_epi32(u[31], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+ u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+ u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+ u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+ u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+ u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+ u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+ u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+ u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+ u[16] = _mm_srai_epi32(v[16], DCT_CONST_BITS);
+ u[17] = _mm_srai_epi32(v[17], DCT_CONST_BITS);
+ u[18] = _mm_srai_epi32(v[18], DCT_CONST_BITS);
+ u[19] = _mm_srai_epi32(v[19], DCT_CONST_BITS);
+ u[20] = _mm_srai_epi32(v[20], DCT_CONST_BITS);
+ u[21] = _mm_srai_epi32(v[21], DCT_CONST_BITS);
+ u[22] = _mm_srai_epi32(v[22], DCT_CONST_BITS);
+ u[23] = _mm_srai_epi32(v[23], DCT_CONST_BITS);
+ u[24] = _mm_srai_epi32(v[24], DCT_CONST_BITS);
+ u[25] = _mm_srai_epi32(v[25], DCT_CONST_BITS);
+ u[26] = _mm_srai_epi32(v[26], DCT_CONST_BITS);
+ u[27] = _mm_srai_epi32(v[27], DCT_CONST_BITS);
+ u[28] = _mm_srai_epi32(v[28], DCT_CONST_BITS);
+ u[29] = _mm_srai_epi32(v[29], DCT_CONST_BITS);
+ u[30] = _mm_srai_epi32(v[30], DCT_CONST_BITS);
+ u[31] = _mm_srai_epi32(v[31], DCT_CONST_BITS);
+
+ s[0] = _mm_packs_epi32(u[0], u[1]);
+ s[1] = _mm_packs_epi32(u[2], u[3]);
+ s[2] = _mm_packs_epi32(u[4], u[5]);
+ s[3] = _mm_packs_epi32(u[6], u[7]);
+ s[4] = _mm_packs_epi32(u[8], u[9]);
+ s[5] = _mm_packs_epi32(u[10], u[11]);
+ s[6] = _mm_packs_epi32(u[12], u[13]);
+ s[7] = _mm_packs_epi32(u[14], u[15]);
+ s[8] = _mm_packs_epi32(u[16], u[17]);
+ s[9] = _mm_packs_epi32(u[18], u[19]);
+ s[10] = _mm_packs_epi32(u[20], u[21]);
+ s[11] = _mm_packs_epi32(u[22], u[23]);
+ s[12] = _mm_packs_epi32(u[24], u[25]);
+ s[13] = _mm_packs_epi32(u[26], u[27]);
+ s[14] = _mm_packs_epi32(u[28], u[29]);
+ s[15] = _mm_packs_epi32(u[30], u[31]);
+
+ // stage 2
+ u[0] = _mm_unpacklo_epi16(s[8], s[9]);
+ u[1] = _mm_unpackhi_epi16(s[8], s[9]);
+ u[2] = _mm_unpacklo_epi16(s[10], s[11]);
+ u[3] = _mm_unpackhi_epi16(s[10], s[11]);
+ u[4] = _mm_unpacklo_epi16(s[12], s[13]);
+ u[5] = _mm_unpackhi_epi16(s[12], s[13]);
+ u[6] = _mm_unpacklo_epi16(s[14], s[15]);
+ u[7] = _mm_unpackhi_epi16(s[14], s[15]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p04_p28);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_p04_p28);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p28_m04);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p28_m04);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_p20_p12);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_p20_p12);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p12_m20);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p12_m20);
+ v[8] = _mm_madd_epi16(u[4], k__cospi_m28_p04);
+ v[9] = _mm_madd_epi16(u[5], k__cospi_m28_p04);
+ v[10] = _mm_madd_epi16(u[4], k__cospi_p04_p28);
+ v[11] = _mm_madd_epi16(u[5], k__cospi_p04_p28);
+ v[12] = _mm_madd_epi16(u[6], k__cospi_m12_p20);
+ v[13] = _mm_madd_epi16(u[7], k__cospi_m12_p20);
+ v[14] = _mm_madd_epi16(u[6], k__cospi_p20_p12);
+ v[15] = _mm_madd_epi16(u[7], k__cospi_p20_p12);
+
+ u[0] = _mm_add_epi32(v[0], v[8]);
+ u[1] = _mm_add_epi32(v[1], v[9]);
+ u[2] = _mm_add_epi32(v[2], v[10]);
+ u[3] = _mm_add_epi32(v[3], v[11]);
+ u[4] = _mm_add_epi32(v[4], v[12]);
+ u[5] = _mm_add_epi32(v[5], v[13]);
+ u[6] = _mm_add_epi32(v[6], v[14]);
+ u[7] = _mm_add_epi32(v[7], v[15]);
+ u[8] = _mm_sub_epi32(v[0], v[8]);
+ u[9] = _mm_sub_epi32(v[1], v[9]);
+ u[10] = _mm_sub_epi32(v[2], v[10]);
+ u[11] = _mm_sub_epi32(v[3], v[11]);
+ u[12] = _mm_sub_epi32(v[4], v[12]);
+ u[13] = _mm_sub_epi32(v[5], v[13]);
+ u[14] = _mm_sub_epi32(v[6], v[14]);
+ u[15] = _mm_sub_epi32(v[7], v[15]);
+
+ v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+ v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+ v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+ v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+ v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+ v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+ v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+ v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+ v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+ u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
+ u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
+ u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
+ u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
+ u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS);
+ u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS);
+ u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS);
+ u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS);
+ u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS);
+ u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS);
+ u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS);
+ u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS);
+ u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS);
+ u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS);
+ u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS);
+ u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS);
+
+ x[0] = _mm_add_epi16(s[0], s[4]);
+ x[1] = _mm_add_epi16(s[1], s[5]);
+ x[2] = _mm_add_epi16(s[2], s[6]);
+ x[3] = _mm_add_epi16(s[3], s[7]);
+ x[4] = _mm_sub_epi16(s[0], s[4]);
+ x[5] = _mm_sub_epi16(s[1], s[5]);
+ x[6] = _mm_sub_epi16(s[2], s[6]);
+ x[7] = _mm_sub_epi16(s[3], s[7]);
+ x[8] = _mm_packs_epi32(u[0], u[1]);
+ x[9] = _mm_packs_epi32(u[2], u[3]);
+ x[10] = _mm_packs_epi32(u[4], u[5]);
+ x[11] = _mm_packs_epi32(u[6], u[7]);
+ x[12] = _mm_packs_epi32(u[8], u[9]);
+ x[13] = _mm_packs_epi32(u[10], u[11]);
+ x[14] = _mm_packs_epi32(u[12], u[13]);
+ x[15] = _mm_packs_epi32(u[14], u[15]);
+
+ // stage 3
+ u[0] = _mm_unpacklo_epi16(x[4], x[5]);
+ u[1] = _mm_unpackhi_epi16(x[4], x[5]);
+ u[2] = _mm_unpacklo_epi16(x[6], x[7]);
+ u[3] = _mm_unpackhi_epi16(x[6], x[7]);
+ u[4] = _mm_unpacklo_epi16(x[12], x[13]);
+ u[5] = _mm_unpackhi_epi16(x[12], x[13]);
+ u[6] = _mm_unpacklo_epi16(x[14], x[15]);
+ u[7] = _mm_unpackhi_epi16(x[14], x[15]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_p08_p24);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_p08_p24);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p24_m08);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p24_m08);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_m24_p08);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_m24_p08);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24);
+ v[8] = _mm_madd_epi16(u[4], k__cospi_p08_p24);
+ v[9] = _mm_madd_epi16(u[5], k__cospi_p08_p24);
+ v[10] = _mm_madd_epi16(u[4], k__cospi_p24_m08);
+ v[11] = _mm_madd_epi16(u[5], k__cospi_p24_m08);
+ v[12] = _mm_madd_epi16(u[6], k__cospi_m24_p08);
+ v[13] = _mm_madd_epi16(u[7], k__cospi_m24_p08);
+ v[14] = _mm_madd_epi16(u[6], k__cospi_p08_p24);
+ v[15] = _mm_madd_epi16(u[7], k__cospi_p08_p24);
+
+ u[0] = _mm_add_epi32(v[0], v[4]);
+ u[1] = _mm_add_epi32(v[1], v[5]);
+ u[2] = _mm_add_epi32(v[2], v[6]);
+ u[3] = _mm_add_epi32(v[3], v[7]);
+ u[4] = _mm_sub_epi32(v[0], v[4]);
+ u[5] = _mm_sub_epi32(v[1], v[5]);
+ u[6] = _mm_sub_epi32(v[2], v[6]);
+ u[7] = _mm_sub_epi32(v[3], v[7]);
+ u[8] = _mm_add_epi32(v[8], v[12]);
+ u[9] = _mm_add_epi32(v[9], v[13]);
+ u[10] = _mm_add_epi32(v[10], v[14]);
+ u[11] = _mm_add_epi32(v[11], v[15]);
+ u[12] = _mm_sub_epi32(v[8], v[12]);
+ u[13] = _mm_sub_epi32(v[9], v[13]);
+ u[14] = _mm_sub_epi32(v[10], v[14]);
+ u[15] = _mm_sub_epi32(v[11], v[15]);
+
+ u[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING);
+ u[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING);
+ u[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING);
+ u[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING);
+ u[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING);
+ u[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING);
+ u[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING);
+ u[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING);
+ u[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING);
+
+ v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+ v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+ v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+ v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+ v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+ v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+ v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+ v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+ v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+ s[0] = _mm_add_epi16(x[0], x[2]);
+ s[1] = _mm_add_epi16(x[1], x[3]);
+ s[2] = _mm_sub_epi16(x[0], x[2]);
+ s[3] = _mm_sub_epi16(x[1], x[3]);
+ s[4] = _mm_packs_epi32(v[0], v[1]);
+ s[5] = _mm_packs_epi32(v[2], v[3]);
+ s[6] = _mm_packs_epi32(v[4], v[5]);
+ s[7] = _mm_packs_epi32(v[6], v[7]);
+ s[8] = _mm_add_epi16(x[8], x[10]);
+ s[9] = _mm_add_epi16(x[9], x[11]);
+ s[10] = _mm_sub_epi16(x[8], x[10]);
+ s[11] = _mm_sub_epi16(x[9], x[11]);
+ s[12] = _mm_packs_epi32(v[8], v[9]);
+ s[13] = _mm_packs_epi32(v[10], v[11]);
+ s[14] = _mm_packs_epi32(v[12], v[13]);
+ s[15] = _mm_packs_epi32(v[14], v[15]);
+
+ // stage 4
+ u[0] = _mm_unpacklo_epi16(s[2], s[3]);
+ u[1] = _mm_unpackhi_epi16(s[2], s[3]);
+ u[2] = _mm_unpacklo_epi16(s[6], s[7]);
+ u[3] = _mm_unpackhi_epi16(s[6], s[7]);
+ u[4] = _mm_unpacklo_epi16(s[10], s[11]);
+ u[5] = _mm_unpackhi_epi16(s[10], s[11]);
+ u[6] = _mm_unpacklo_epi16(s[14], s[15]);
+ u[7] = _mm_unpackhi_epi16(s[14], s[15]);
+
+ v[0] = _mm_madd_epi16(u[0], k__cospi_m16_m16);
+ v[1] = _mm_madd_epi16(u[1], k__cospi_m16_m16);
+ v[2] = _mm_madd_epi16(u[0], k__cospi_p16_m16);
+ v[3] = _mm_madd_epi16(u[1], k__cospi_p16_m16);
+ v[4] = _mm_madd_epi16(u[2], k__cospi_p16_p16);
+ v[5] = _mm_madd_epi16(u[3], k__cospi_p16_p16);
+ v[6] = _mm_madd_epi16(u[2], k__cospi_m16_p16);
+ v[7] = _mm_madd_epi16(u[3], k__cospi_m16_p16);
+ v[8] = _mm_madd_epi16(u[4], k__cospi_p16_p16);
+ v[9] = _mm_madd_epi16(u[5], k__cospi_p16_p16);
+ v[10] = _mm_madd_epi16(u[4], k__cospi_m16_p16);
+ v[11] = _mm_madd_epi16(u[5], k__cospi_m16_p16);
+ v[12] = _mm_madd_epi16(u[6], k__cospi_m16_m16);
+ v[13] = _mm_madd_epi16(u[7], k__cospi_m16_m16);
+ v[14] = _mm_madd_epi16(u[6], k__cospi_p16_m16);
+ v[15] = _mm_madd_epi16(u[7], k__cospi_p16_m16);
+
+ u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING);
+ u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING);
+ u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING);
+ u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING);
+ u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING);
+ u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING);
+ u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING);
+ u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING);
+ u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING);
+ u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING);
+ u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING);
+ u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING);
+ u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING);
+ u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING);
+ u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING);
+ u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING);
+
+ v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS);
+ v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS);
+ v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS);
+ v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS);
+ v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS);
+ v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS);
+ v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS);
+ v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS);
+ v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS);
+ v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS);
+ v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS);
+ v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS);
+ v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS);
+ v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS);
+ v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS);
+ v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS);
+
+ in[0] = s[0];
+ in[1] = _mm_sub_epi16(kZero, s[8]);
+ in[2] = s[12];
+ in[3] = _mm_sub_epi16(kZero, s[4]);
+ in[4] = _mm_packs_epi32(v[4], v[5]);
+ in[5] = _mm_packs_epi32(v[12], v[13]);
+ in[6] = _mm_packs_epi32(v[8], v[9]);
+ in[7] = _mm_packs_epi32(v[0], v[1]);
+ in[8] = _mm_packs_epi32(v[2], v[3]);
+ in[9] = _mm_packs_epi32(v[10], v[11]);
+ in[10] = _mm_packs_epi32(v[14], v[15]);
+ in[11] = _mm_packs_epi32(v[6], v[7]);
+ in[12] = s[5];
+ in[13] = _mm_sub_epi16(kZero, s[13]);
+ in[14] = s[9];
+ in[15] = _mm_sub_epi16(kZero, s[1]);
+}
+
+static void fdct16_sse2(__m128i *in0, __m128i *in1) {
+ fdct16_8col(in0);
+ fdct16_8col(in1);
+ array_transpose_16x16(in0, in1);
+}
+
+static void fadst16_sse2(__m128i *in0, __m128i *in1) {
+ fadst16_8col(in0);
+ fadst16_8col(in1);
+ array_transpose_16x16(in0, in1);
+}
+
+#if CONFIG_EXT_TX
+static void fidtx16_8col(__m128i *in) {
+ const __m128i k__zero_epi16 = _mm_set1_epi16((int16_t)0);
+ const __m128i k__sqrt2_epi16 = _mm_set1_epi16((int16_t)Sqrt2);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+
+ __m128i v0, v1, v2, v3, v4, v5, v6, v7;
+ __m128i u0, u1, u2, u3, u4, u5, u6, u7;
+ __m128i x0, x1, x2, x3, x4, x5, x6, x7;
+ __m128i y0, y1, y2, y3, y4, y5, y6, y7;
+
+ in[0] = _mm_slli_epi16(in[0], 1);
+ in[1] = _mm_slli_epi16(in[1], 1);
+ in[2] = _mm_slli_epi16(in[2], 1);
+ in[3] = _mm_slli_epi16(in[3], 1);
+ in[4] = _mm_slli_epi16(in[4], 1);
+ in[5] = _mm_slli_epi16(in[5], 1);
+ in[6] = _mm_slli_epi16(in[6], 1);
+ in[7] = _mm_slli_epi16(in[7], 1);
+ in[8] = _mm_slli_epi16(in[8], 1);
+ in[9] = _mm_slli_epi16(in[9], 1);
+ in[10] = _mm_slli_epi16(in[10], 1);
+ in[11] = _mm_slli_epi16(in[11], 1);
+ in[12] = _mm_slli_epi16(in[12], 1);
+ in[13] = _mm_slli_epi16(in[13], 1);
+ in[14] = _mm_slli_epi16(in[14], 1);
+ in[15] = _mm_slli_epi16(in[15], 1);
+
+ v0 = _mm_unpacklo_epi16(in[0], k__zero_epi16);
+ v1 = _mm_unpacklo_epi16(in[1], k__zero_epi16);
+ v2 = _mm_unpacklo_epi16(in[2], k__zero_epi16);
+ v3 = _mm_unpacklo_epi16(in[3], k__zero_epi16);
+ v4 = _mm_unpacklo_epi16(in[4], k__zero_epi16);
+ v5 = _mm_unpacklo_epi16(in[5], k__zero_epi16);
+ v6 = _mm_unpacklo_epi16(in[6], k__zero_epi16);
+ v7 = _mm_unpacklo_epi16(in[7], k__zero_epi16);
+
+ u0 = _mm_unpacklo_epi16(in[8], k__zero_epi16);
+ u1 = _mm_unpacklo_epi16(in[9], k__zero_epi16);
+ u2 = _mm_unpacklo_epi16(in[10], k__zero_epi16);
+ u3 = _mm_unpacklo_epi16(in[11], k__zero_epi16);
+ u4 = _mm_unpacklo_epi16(in[12], k__zero_epi16);
+ u5 = _mm_unpacklo_epi16(in[13], k__zero_epi16);
+ u6 = _mm_unpacklo_epi16(in[14], k__zero_epi16);
+ u7 = _mm_unpacklo_epi16(in[15], k__zero_epi16);
+
+ x0 = _mm_unpackhi_epi16(in[0], k__zero_epi16);
+ x1 = _mm_unpackhi_epi16(in[1], k__zero_epi16);
+ x2 = _mm_unpackhi_epi16(in[2], k__zero_epi16);
+ x3 = _mm_unpackhi_epi16(in[3], k__zero_epi16);
+ x4 = _mm_unpackhi_epi16(in[4], k__zero_epi16);
+ x5 = _mm_unpackhi_epi16(in[5], k__zero_epi16);
+ x6 = _mm_unpackhi_epi16(in[6], k__zero_epi16);
+ x7 = _mm_unpackhi_epi16(in[7], k__zero_epi16);
+
+ y0 = _mm_unpackhi_epi16(in[8], k__zero_epi16);
+ y1 = _mm_unpackhi_epi16(in[9], k__zero_epi16);
+ y2 = _mm_unpackhi_epi16(in[10], k__zero_epi16);
+ y3 = _mm_unpackhi_epi16(in[11], k__zero_epi16);
+ y4 = _mm_unpackhi_epi16(in[12], k__zero_epi16);
+ y5 = _mm_unpackhi_epi16(in[13], k__zero_epi16);
+ y6 = _mm_unpackhi_epi16(in[14], k__zero_epi16);
+ y7 = _mm_unpackhi_epi16(in[15], k__zero_epi16);
+
+ v0 = _mm_madd_epi16(v0, k__sqrt2_epi16);
+ v1 = _mm_madd_epi16(v1, k__sqrt2_epi16);
+ v2 = _mm_madd_epi16(v2, k__sqrt2_epi16);
+ v3 = _mm_madd_epi16(v3, k__sqrt2_epi16);
+ v4 = _mm_madd_epi16(v4, k__sqrt2_epi16);
+ v5 = _mm_madd_epi16(v5, k__sqrt2_epi16);
+ v6 = _mm_madd_epi16(v6, k__sqrt2_epi16);
+ v7 = _mm_madd_epi16(v7, k__sqrt2_epi16);
+
+ x0 = _mm_madd_epi16(x0, k__sqrt2_epi16);
+ x1 = _mm_madd_epi16(x1, k__sqrt2_epi16);
+ x2 = _mm_madd_epi16(x2, k__sqrt2_epi16);
+ x3 = _mm_madd_epi16(x3, k__sqrt2_epi16);
+ x4 = _mm_madd_epi16(x4, k__sqrt2_epi16);
+ x5 = _mm_madd_epi16(x5, k__sqrt2_epi16);
+ x6 = _mm_madd_epi16(x6, k__sqrt2_epi16);
+ x7 = _mm_madd_epi16(x7, k__sqrt2_epi16);
+
+ u0 = _mm_madd_epi16(u0, k__sqrt2_epi16);
+ u1 = _mm_madd_epi16(u1, k__sqrt2_epi16);
+ u2 = _mm_madd_epi16(u2, k__sqrt2_epi16);
+ u3 = _mm_madd_epi16(u3, k__sqrt2_epi16);
+ u4 = _mm_madd_epi16(u4, k__sqrt2_epi16);
+ u5 = _mm_madd_epi16(u5, k__sqrt2_epi16);
+ u6 = _mm_madd_epi16(u6, k__sqrt2_epi16);
+ u7 = _mm_madd_epi16(u7, k__sqrt2_epi16);
+
+ y0 = _mm_madd_epi16(y0, k__sqrt2_epi16);
+ y1 = _mm_madd_epi16(y1, k__sqrt2_epi16);
+ y2 = _mm_madd_epi16(y2, k__sqrt2_epi16);
+ y3 = _mm_madd_epi16(y3, k__sqrt2_epi16);
+ y4 = _mm_madd_epi16(y4, k__sqrt2_epi16);
+ y5 = _mm_madd_epi16(y5, k__sqrt2_epi16);
+ y6 = _mm_madd_epi16(y6, k__sqrt2_epi16);
+ y7 = _mm_madd_epi16(y7, k__sqrt2_epi16);
+
+ v0 = _mm_add_epi32(v0, k__DCT_CONST_ROUNDING);
+ v1 = _mm_add_epi32(v1, k__DCT_CONST_ROUNDING);
+ v2 = _mm_add_epi32(v2, k__DCT_CONST_ROUNDING);
+ v3 = _mm_add_epi32(v3, k__DCT_CONST_ROUNDING);
+ v4 = _mm_add_epi32(v4, k__DCT_CONST_ROUNDING);
+ v5 = _mm_add_epi32(v5, k__DCT_CONST_ROUNDING);
+ v6 = _mm_add_epi32(v6, k__DCT_CONST_ROUNDING);
+ v7 = _mm_add_epi32(v7, k__DCT_CONST_ROUNDING);
+
+ x0 = _mm_add_epi32(x0, k__DCT_CONST_ROUNDING);
+ x1 = _mm_add_epi32(x1, k__DCT_CONST_ROUNDING);
+ x2 = _mm_add_epi32(x2, k__DCT_CONST_ROUNDING);
+ x3 = _mm_add_epi32(x3, k__DCT_CONST_ROUNDING);
+ x4 = _mm_add_epi32(x4, k__DCT_CONST_ROUNDING);
+ x5 = _mm_add_epi32(x5, k__DCT_CONST_ROUNDING);
+ x6 = _mm_add_epi32(x6, k__DCT_CONST_ROUNDING);
+ x7 = _mm_add_epi32(x7, k__DCT_CONST_ROUNDING);
+
+ u0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ u1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ u2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ u3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ u4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ u5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ u6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ u7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+
+ y0 = _mm_add_epi32(y0, k__DCT_CONST_ROUNDING);
+ y1 = _mm_add_epi32(y1, k__DCT_CONST_ROUNDING);
+ y2 = _mm_add_epi32(y2, k__DCT_CONST_ROUNDING);
+ y3 = _mm_add_epi32(y3, k__DCT_CONST_ROUNDING);
+ y4 = _mm_add_epi32(y4, k__DCT_CONST_ROUNDING);
+ y5 = _mm_add_epi32(y5, k__DCT_CONST_ROUNDING);
+ y6 = _mm_add_epi32(y6, k__DCT_CONST_ROUNDING);
+ y7 = _mm_add_epi32(y7, k__DCT_CONST_ROUNDING);
+
+ v0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ v1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ v2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ v3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ v4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ v5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ v6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ v7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+
+ x0 = _mm_srai_epi32(x0, DCT_CONST_BITS);
+ x1 = _mm_srai_epi32(x1, DCT_CONST_BITS);
+ x2 = _mm_srai_epi32(x2, DCT_CONST_BITS);
+ x3 = _mm_srai_epi32(x3, DCT_CONST_BITS);
+ x4 = _mm_srai_epi32(x4, DCT_CONST_BITS);
+ x5 = _mm_srai_epi32(x5, DCT_CONST_BITS);
+ x6 = _mm_srai_epi32(x6, DCT_CONST_BITS);
+ x7 = _mm_srai_epi32(x7, DCT_CONST_BITS);
+
+ u0 = _mm_srai_epi32(u0, DCT_CONST_BITS);
+ u1 = _mm_srai_epi32(u1, DCT_CONST_BITS);
+ u2 = _mm_srai_epi32(u2, DCT_CONST_BITS);
+ u3 = _mm_srai_epi32(u3, DCT_CONST_BITS);
+ u4 = _mm_srai_epi32(u4, DCT_CONST_BITS);
+ u5 = _mm_srai_epi32(u5, DCT_CONST_BITS);
+ u6 = _mm_srai_epi32(u6, DCT_CONST_BITS);
+ u7 = _mm_srai_epi32(u7, DCT_CONST_BITS);
+
+ y0 = _mm_srai_epi32(y0, DCT_CONST_BITS);
+ y1 = _mm_srai_epi32(y1, DCT_CONST_BITS);
+ y2 = _mm_srai_epi32(y2, DCT_CONST_BITS);
+ y3 = _mm_srai_epi32(y3, DCT_CONST_BITS);
+ y4 = _mm_srai_epi32(y4, DCT_CONST_BITS);
+ y5 = _mm_srai_epi32(y5, DCT_CONST_BITS);
+ y6 = _mm_srai_epi32(y6, DCT_CONST_BITS);
+ y7 = _mm_srai_epi32(y7, DCT_CONST_BITS);
+
+ in[0] = _mm_packs_epi32(v0, x0);
+ in[1] = _mm_packs_epi32(v1, x1);
+ in[2] = _mm_packs_epi32(v2, x2);
+ in[3] = _mm_packs_epi32(v3, x3);
+ in[4] = _mm_packs_epi32(v4, x4);
+ in[5] = _mm_packs_epi32(v5, x5);
+ in[6] = _mm_packs_epi32(v6, x6);
+ in[7] = _mm_packs_epi32(v7, x7);
+
+ in[8] = _mm_packs_epi32(u0, y0);
+ in[9] = _mm_packs_epi32(u1, y1);
+ in[10] = _mm_packs_epi32(u2, y2);
+ in[11] = _mm_packs_epi32(u3, y3);
+ in[12] = _mm_packs_epi32(u4, y4);
+ in[13] = _mm_packs_epi32(u5, y5);
+ in[14] = _mm_packs_epi32(u6, y6);
+ in[15] = _mm_packs_epi32(u7, y7);
+}
+
+static void fidtx16_sse2(__m128i *in0, __m128i *in1) {
+ fidtx16_8col(in0);
+ fidtx16_8col(in1);
+ array_transpose_16x16(in0, in1);
+}
+#endif // CONFIG_EXT_TX
+
+void vp10_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
+ int tx_type) {
+ __m128i in0[16], in1[16];
+
+ switch (tx_type) {
+ case DCT_DCT: vpx_fdct16x16_sse2(input, output, stride); break;
+ case ADST_DCT:
+ load_buffer_16x16(input, in0, in1, stride, 0, 0);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fdct16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case DCT_ADST:
+ load_buffer_16x16(input, in0, in1, stride, 0, 0);
+ fdct16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case ADST_ADST:
+ load_buffer_16x16(input, in0, in1, stride, 0, 0);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ load_buffer_16x16(input, in0, in1, stride, 1, 0);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fdct16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case DCT_FLIPADST:
+ load_buffer_16x16(input, in0, in1, stride, 0, 1);
+ fdct16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case FLIPADST_FLIPADST:
+ load_buffer_16x16(input, in0, in1, stride, 1, 1);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case ADST_FLIPADST:
+ load_buffer_16x16(input, in0, in1, stride, 0, 1);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case FLIPADST_ADST:
+ load_buffer_16x16(input, in0, in1, stride, 1, 0);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case V_DCT:
+ load_buffer_16x16(input, in0, in1, stride, 0, 0);
+ fdct16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fidtx16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case H_DCT:
+ load_buffer_16x16(input, in0, in1, stride, 0, 0);
+ fidtx16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fdct16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case V_ADST:
+ load_buffer_16x16(input, in0, in1, stride, 0, 0);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fidtx16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case H_ADST:
+ load_buffer_16x16(input, in0, in1, stride, 0, 0);
+ fidtx16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case V_FLIPADST:
+ load_buffer_16x16(input, in0, in1, stride, 1, 0);
+ fadst16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fidtx16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+ case H_FLIPADST:
+ load_buffer_16x16(input, in0, in1, stride, 0, 1);
+ fidtx16_sse2(in0, in1);
+ right_shift_16x16(in0, in1);
+ fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0); break;
+ }
+}
diff --git a/av1/encoder/x86/dct_sse2.asm b/av1/encoder/x86/dct_sse2.asm
new file mode 100644
index 0000000..c3a5fb5
--- /dev/null
+++ b/av1/encoder/x86/dct_sse2.asm
@@ -0,0 +1,86 @@
+;
+; Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%define private_prefix vp10
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+%macro TRANSFORM_COLS 0
+ paddw m0, m1
+ movq m4, m0
+ psubw m3, m2
+ psubw m4, m3
+ psraw m4, 1
+ movq m5, m4
+ psubw m5, m1 ;b1
+ psubw m4, m2 ;c1
+ psubw m0, m4
+ paddw m3, m5
+ ; m0 a0
+ SWAP 1, 4 ; m1 c1
+ SWAP 2, 3 ; m2 d1
+ SWAP 3, 5 ; m3 b1
+%endmacro
+
+%macro TRANSPOSE_4X4 0
+ ; 00 01 02 03
+ ; 10 11 12 13
+ ; 20 21 22 23
+ ; 30 31 32 33
+ punpcklwd m0, m1 ; 00 10 01 11 02 12 03 13
+ punpcklwd m2, m3 ; 20 30 21 31 22 32 23 33
+ mova m1, m0
+ punpckldq m0, m2 ; 00 10 20 30 01 11 21 31
+ punpckhdq m1, m2 ; 02 12 22 32 03 13 23 33
+%endmacro
+
+INIT_XMM sse2
+cglobal fwht4x4, 3, 4, 8, input, output, stride
+ lea r3q, [inputq + strideq*4]
+ movq m0, [inputq] ;a1
+ movq m1, [inputq + strideq*2] ;b1
+ movq m2, [r3q] ;c1
+ movq m3, [r3q + strideq*2] ;d1
+
+ TRANSFORM_COLS
+ TRANSPOSE_4X4
+ SWAP 1, 2
+ psrldq m1, m0, 8
+ psrldq m3, m2, 8
+ TRANSFORM_COLS
+ TRANSPOSE_4X4
+
+ psllw m0, 2
+ psllw m1, 2
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; sign extension
+ mova m2, m0
+ mova m3, m1
+ punpcklwd m0, m0
+ punpcklwd m1, m1
+ punpckhwd m2, m2
+ punpckhwd m3, m3
+ psrad m0, 16
+ psrad m1, 16
+ psrad m2, 16
+ psrad m3, 16
+ mova [outputq], m0
+ mova [outputq + 16], m2
+ mova [outputq + 32], m1
+ mova [outputq + 48], m3
+%else
+ mova [outputq], m0
+ mova [outputq + 16], m1
+%endif
+
+ RET
diff --git a/av1/encoder/x86/dct_ssse3.c b/av1/encoder/x86/dct_ssse3.c
new file mode 100644
index 0000000..aa018a1
--- /dev/null
+++ b/av1/encoder/x86/dct_ssse3.c
@@ -0,0 +1,468 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#if defined(_MSC_VER) && _MSC_VER <= 1500
+// Need to include math.h before calling tmmintrin.h/intrin.h
+// in certain versions of MSVS.
+#include <math.h>
+#endif
+#include <tmmintrin.h> // SSSE3
+
+#include "./vp10_rtcd.h"
+#include "aom_dsp/x86/inv_txfm_sse2.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+
+void vp10_fdct8x8_quant_ssse3(
+ const int16_t *input, int stride, int16_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+ __m128i zero;
+ int pass;
+ // Constants
+ // When we use them, in one case, they are all the same. In all others
+ // it's a pair of them that we need to repeat four times. This is done
+ // by constructing the 32 bit constant corresponding to that pair.
+ const __m128i k__dual_p16_p16 = dual_set_epi16(23170, 23170);
+ const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
+ const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
+ const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
+ const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
+ const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
+ const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
+ const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
+ const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
+ const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
+ // Load input
+ __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ __m128i *in[8];
+ int index = 0;
+
+ (void)scan_ptr;
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)coeff_ptr;
+
+ // Pre-condition input (shift by two)
+ in0 = _mm_slli_epi16(in0, 2);
+ in1 = _mm_slli_epi16(in1, 2);
+ in2 = _mm_slli_epi16(in2, 2);
+ in3 = _mm_slli_epi16(in3, 2);
+ in4 = _mm_slli_epi16(in4, 2);
+ in5 = _mm_slli_epi16(in5, 2);
+ in6 = _mm_slli_epi16(in6, 2);
+ in7 = _mm_slli_epi16(in7, 2);
+
+ in[0] = &in0;
+ in[1] = &in1;
+ in[2] = &in2;
+ in[3] = &in3;
+ in[4] = &in4;
+ in[5] = &in5;
+ in[6] = &in6;
+ in[7] = &in7;
+
+ // We do two passes, first the columns, then the rows. The results of the
+ // first pass are transposed so that the same column code can be reused. The
+ // results of the second pass are also transposed so that the rows (processed
+ // as columns) are put back in row positions.
+ for (pass = 0; pass < 2; pass++) {
+ // To store results of each pass before the transpose.
+ __m128i res0, res1, res2, res3, res4, res5, res6, res7;
+ // Add/subtract
+ const __m128i q0 = _mm_add_epi16(in0, in7);
+ const __m128i q1 = _mm_add_epi16(in1, in6);
+ const __m128i q2 = _mm_add_epi16(in2, in5);
+ const __m128i q3 = _mm_add_epi16(in3, in4);
+ const __m128i q4 = _mm_sub_epi16(in3, in4);
+ const __m128i q5 = _mm_sub_epi16(in2, in5);
+ const __m128i q6 = _mm_sub_epi16(in1, in6);
+ const __m128i q7 = _mm_sub_epi16(in0, in7);
+ // Work on first four results
+ {
+ // Add/subtract
+ const __m128i r0 = _mm_add_epi16(q0, q3);
+ const __m128i r1 = _mm_add_epi16(q1, q2);
+ const __m128i r2 = _mm_sub_epi16(q1, q2);
+ const __m128i r3 = _mm_sub_epi16(q0, q3);
+ // Interleave to do the multiply by constants which gets us into 32bits
+ const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
+ const __m128i t1 = _mm_unpackhi_epi16(r0, r1);
+ const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
+ const __m128i t3 = _mm_unpackhi_epi16(r2, r3);
+
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16);
+
+ const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08);
+ const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08);
+ const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24);
+ const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24);
+ // dct_const_round_shift
+
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+
+ const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+
+ const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ // Combine
+
+ res0 = _mm_packs_epi32(w0, w1);
+ res4 = _mm_packs_epi32(w2, w3);
+ res2 = _mm_packs_epi32(w4, w5);
+ res6 = _mm_packs_epi32(w6, w7);
+ }
+ // Work on next four results
+ {
+ // Interleave to do the multiply by constants which gets us into 32bits
+ const __m128i d0 = _mm_sub_epi16(q6, q5);
+ const __m128i d1 = _mm_add_epi16(q6, q5);
+ const __m128i r0 = _mm_mulhrs_epi16(d0, k__dual_p16_p16);
+ const __m128i r1 = _mm_mulhrs_epi16(d1, k__dual_p16_p16);
+
+ // Add/subtract
+ const __m128i x0 = _mm_add_epi16(q4, r0);
+ const __m128i x1 = _mm_sub_epi16(q4, r0);
+ const __m128i x2 = _mm_sub_epi16(q7, r1);
+ const __m128i x3 = _mm_add_epi16(q7, r1);
+ // Interleave to do the multiply by constants which gets us into 32bits
+ const __m128i t0 = _mm_unpacklo_epi16(x0, x3);
+ const __m128i t1 = _mm_unpackhi_epi16(x0, x3);
+ const __m128i t2 = _mm_unpacklo_epi16(x1, x2);
+ const __m128i t3 = _mm_unpackhi_epi16(x1, x2);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28);
+ const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20);
+ const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20);
+ const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12);
+ const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12);
+ // dct_const_round_shift
+ const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
+ const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
+ const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
+ const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+ const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
+ const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
+ const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
+ const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+ const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
+ const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
+ const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
+ const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+ const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
+ const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
+ const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
+ const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
+ // Combine
+ res1 = _mm_packs_epi32(w0, w1);
+ res7 = _mm_packs_epi32(w2, w3);
+ res5 = _mm_packs_epi32(w4, w5);
+ res3 = _mm_packs_epi32(w6, w7);
+ }
+ // Transpose the 8x8.
+ {
+ // 00 01 02 03 04 05 06 07
+ // 10 11 12 13 14 15 16 17
+ // 20 21 22 23 24 25 26 27
+ // 30 31 32 33 34 35 36 37
+ // 40 41 42 43 44 45 46 47
+ // 50 51 52 53 54 55 56 57
+ // 60 61 62 63 64 65 66 67
+ // 70 71 72 73 74 75 76 77
+ const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1);
+ const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3);
+ const __m128i tr0_2 = _mm_unpackhi_epi16(res0, res1);
+ const __m128i tr0_3 = _mm_unpackhi_epi16(res2, res3);
+ const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5);
+ const __m128i tr0_5 = _mm_unpacklo_epi16(res6, res7);
+ const __m128i tr0_6 = _mm_unpackhi_epi16(res4, res5);
+ const __m128i tr0_7 = _mm_unpackhi_epi16(res6, res7);
+ // 00 10 01 11 02 12 03 13
+ // 20 30 21 31 22 32 23 33
+ // 04 14 05 15 06 16 07 17
+ // 24 34 25 35 26 36 27 37
+ // 40 50 41 51 42 52 43 53
+ // 60 70 61 71 62 72 63 73
+ // 54 54 55 55 56 56 57 57
+ // 64 74 65 75 66 76 67 77
+ const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
+ const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3);
+ const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
+ const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3);
+ const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
+ const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
+ const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
+ const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
+ // 00 10 20 30 01 11 21 31
+ // 40 50 60 70 41 51 61 71
+ // 02 12 22 32 03 13 23 33
+ // 42 52 62 72 43 53 63 73
+ // 04 14 24 34 05 15 21 36
+ // 44 54 64 74 45 55 61 76
+ // 06 16 26 36 07 17 27 37
+ // 46 56 66 76 47 57 67 77
+ in0 = _mm_unpacklo_epi64(tr1_0, tr1_4);
+ in1 = _mm_unpackhi_epi64(tr1_0, tr1_4);
+ in2 = _mm_unpacklo_epi64(tr1_2, tr1_6);
+ in3 = _mm_unpackhi_epi64(tr1_2, tr1_6);
+ in4 = _mm_unpacklo_epi64(tr1_1, tr1_5);
+ in5 = _mm_unpackhi_epi64(tr1_1, tr1_5);
+ in6 = _mm_unpacklo_epi64(tr1_3, tr1_7);
+ in7 = _mm_unpackhi_epi64(tr1_3, tr1_7);
+ // 00 10 20 30 40 50 60 70
+ // 01 11 21 31 41 51 61 71
+ // 02 12 22 32 42 52 62 72
+ // 03 13 23 33 43 53 63 73
+ // 04 14 24 34 44 54 64 74
+ // 05 15 25 35 45 55 65 75
+ // 06 16 26 36 46 56 66 76
+ // 07 17 27 37 47 57 67 77
+ }
+ }
+ // Post-condition output and store it
+ {
+ // Post-condition (division by two)
+ // division of two 16 bits signed numbers using shifts
+ // n / 2 = (n - (n >> 15)) >> 1
+ const __m128i sign_in0 = _mm_srai_epi16(in0, 15);
+ const __m128i sign_in1 = _mm_srai_epi16(in1, 15);
+ const __m128i sign_in2 = _mm_srai_epi16(in2, 15);
+ const __m128i sign_in3 = _mm_srai_epi16(in3, 15);
+ const __m128i sign_in4 = _mm_srai_epi16(in4, 15);
+ const __m128i sign_in5 = _mm_srai_epi16(in5, 15);
+ const __m128i sign_in6 = _mm_srai_epi16(in6, 15);
+ const __m128i sign_in7 = _mm_srai_epi16(in7, 15);
+ in0 = _mm_sub_epi16(in0, sign_in0);
+ in1 = _mm_sub_epi16(in1, sign_in1);
+ in2 = _mm_sub_epi16(in2, sign_in2);
+ in3 = _mm_sub_epi16(in3, sign_in3);
+ in4 = _mm_sub_epi16(in4, sign_in4);
+ in5 = _mm_sub_epi16(in5, sign_in5);
+ in6 = _mm_sub_epi16(in6, sign_in6);
+ in7 = _mm_sub_epi16(in7, sign_in7);
+ in0 = _mm_srai_epi16(in0, 1);
+ in1 = _mm_srai_epi16(in1, 1);
+ in2 = _mm_srai_epi16(in2, 1);
+ in3 = _mm_srai_epi16(in3, 1);
+ in4 = _mm_srai_epi16(in4, 1);
+ in5 = _mm_srai_epi16(in5, 1);
+ in6 = _mm_srai_epi16(in6, 1);
+ in7 = _mm_srai_epi16(in7, 1);
+ }
+
+ iscan_ptr += n_coeffs;
+ qcoeff_ptr += n_coeffs;
+ dqcoeff_ptr += n_coeffs;
+ n_coeffs = -n_coeffs;
+ zero = _mm_setzero_si128();
+
+ if (!skip_block) {
+ __m128i eob;
+ __m128i round, quant, dequant, thr;
+ int16_t nzflag;
+ {
+ __m128i coeff0, coeff1;
+
+ // Setup global values
+ {
+ round = _mm_load_si128((const __m128i *)round_ptr);
+ quant = _mm_load_si128((const __m128i *)quant_ptr);
+ dequant = _mm_load_si128((const __m128i *)dequant_ptr);
+ }
+
+ {
+ __m128i coeff0_sign, coeff1_sign;
+ __m128i qcoeff0, qcoeff1;
+ __m128i qtmp0, qtmp1;
+ // Do DC and first 15 AC
+ coeff0 = *in[0];
+ coeff1 = *in[1];
+
+ // Poor man's sign extract
+ coeff0_sign = _mm_srai_epi16(coeff0, 15);
+ coeff1_sign = _mm_srai_epi16(coeff1, 15);
+ qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+ round = _mm_unpackhi_epi64(round, round);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+ qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+ quant = _mm_unpackhi_epi64(quant, quant);
+ qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+ coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+ dequant = _mm_unpackhi_epi64(dequant, dequant);
+ coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ }
+
+ {
+ // Scan for eob
+ __m128i zero_coeff0, zero_coeff1;
+ __m128i nzero_coeff0, nzero_coeff1;
+ __m128i iscan0, iscan1;
+ __m128i eob1;
+ zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+ zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+ nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+ nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ // Add one to convert from indices to counts
+ iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+ iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+ eob = _mm_and_si128(iscan0, nzero_coeff0);
+ eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+ eob = _mm_max_epi16(eob, eob1);
+ }
+ n_coeffs += 8 * 2;
+ }
+
+ // AC only loop
+ index = 2;
+ thr = _mm_srai_epi16(dequant, 1);
+ while (n_coeffs < 0) {
+ __m128i coeff0, coeff1;
+ {
+ __m128i coeff0_sign, coeff1_sign;
+ __m128i qcoeff0, qcoeff1;
+ __m128i qtmp0, qtmp1;
+
+ assert(index < (int)(sizeof(in) / sizeof(in[0])) - 1);
+ coeff0 = *in[index];
+ coeff1 = *in[index + 1];
+
+ // Poor man's sign extract
+ coeff0_sign = _mm_srai_epi16(coeff0, 15);
+ coeff1_sign = _mm_srai_epi16(coeff1, 15);
+ qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
+ _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
+
+ if (nzflag) {
+ qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+ qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+ qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+ coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+ coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ } else {
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
+
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
+ }
+ }
+
+ if (nzflag) {
+ // Scan for eob
+ __m128i zero_coeff0, zero_coeff1;
+ __m128i nzero_coeff0, nzero_coeff1;
+ __m128i iscan0, iscan1;
+ __m128i eob0, eob1;
+ zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+ zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+ nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+ nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ // Add one to convert from indices to counts
+ iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+ iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+ eob0 = _mm_and_si128(iscan0, nzero_coeff0);
+ eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+ eob0 = _mm_max_epi16(eob0, eob1);
+ eob = _mm_max_epi16(eob, eob0);
+ }
+ n_coeffs += 8 * 2;
+ index += 2;
+ }
+
+ // Accumulate EOB
+ {
+ __m128i eob_shuffled;
+ eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ *eob_ptr = _mm_extract_epi16(eob, 1);
+ }
+ } else {
+ do {
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
+ n_coeffs += 8 * 2;
+ } while (n_coeffs < 0);
+ *eob_ptr = 0;
+ }
+}
diff --git a/av1/encoder/x86/error_intrin_avx2.c b/av1/encoder/x86/error_intrin_avx2.c
new file mode 100644
index 0000000..6e7c093
--- /dev/null
+++ b/av1/encoder/x86/error_intrin_avx2.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Usee of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <immintrin.h> // AVX2
+
+#include "./vp10_rtcd.h"
+#include "aom/vpx_integer.h"
+
+int64_t vp10_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff,
+ intptr_t block_size, int64_t *ssz) {
+ __m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg;
+ __m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi;
+ __m256i sse_reg_64hi, ssz_reg_64hi;
+ __m128i sse_reg128, ssz_reg128;
+ int64_t sse;
+ int i;
+ const __m256i zero_reg = _mm256_set1_epi16(0);
+
+ // init sse and ssz registerd to zero
+ sse_reg = _mm256_set1_epi16(0);
+ ssz_reg = _mm256_set1_epi16(0);
+
+ for (i = 0; i < block_size; i += 16) {
+ // load 32 bytes from coeff and dqcoeff
+ coeff_reg = _mm256_loadu_si256((const __m256i *)(coeff + i));
+ dqcoeff_reg = _mm256_loadu_si256((const __m256i *)(dqcoeff + i));
+ // dqcoeff - coeff
+ dqcoeff_reg = _mm256_sub_epi16(dqcoeff_reg, coeff_reg);
+ // madd (dqcoeff - coeff)
+ dqcoeff_reg = _mm256_madd_epi16(dqcoeff_reg, dqcoeff_reg);
+ // madd coeff
+ coeff_reg = _mm256_madd_epi16(coeff_reg, coeff_reg);
+ // expand each double word of madd (dqcoeff - coeff) to quad word
+ exp_dqcoeff_lo = _mm256_unpacklo_epi32(dqcoeff_reg, zero_reg);
+ exp_dqcoeff_hi = _mm256_unpackhi_epi32(dqcoeff_reg, zero_reg);
+ // expand each double word of madd (coeff) to quad word
+ exp_coeff_lo = _mm256_unpacklo_epi32(coeff_reg, zero_reg);
+ exp_coeff_hi = _mm256_unpackhi_epi32(coeff_reg, zero_reg);
+ // add each quad word of madd (dqcoeff - coeff) and madd (coeff)
+ sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_lo);
+ ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_lo);
+ sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_hi);
+ ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_hi);
+ }
+ // save the higher 64 bit of each 128 bit lane
+ sse_reg_64hi = _mm256_srli_si256(sse_reg, 8);
+ ssz_reg_64hi = _mm256_srli_si256(ssz_reg, 8);
+ // add the higher 64 bit to the low 64 bit
+ sse_reg = _mm256_add_epi64(sse_reg, sse_reg_64hi);
+ ssz_reg = _mm256_add_epi64(ssz_reg, ssz_reg_64hi);
+
+ // add each 64 bit from each of the 128 bit lane of the 256 bit
+ sse_reg128 = _mm_add_epi64(_mm256_castsi256_si128(sse_reg),
+ _mm256_extractf128_si256(sse_reg, 1));
+
+ ssz_reg128 = _mm_add_epi64(_mm256_castsi256_si128(ssz_reg),
+ _mm256_extractf128_si256(ssz_reg, 1));
+
+ // store the results
+ _mm_storel_epi64((__m128i *)(&sse), sse_reg128);
+
+ _mm_storel_epi64((__m128i *)(ssz), ssz_reg128);
+ return sse;
+}
diff --git a/av1/encoder/x86/error_sse2.asm b/av1/encoder/x86/error_sse2.asm
new file mode 100644
index 0000000..0772da4
--- /dev/null
+++ b/av1/encoder/x86/error_sse2.asm
@@ -0,0 +1,122 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%define private_prefix vp10
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+; int64_t vp10_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size,
+; int64_t *ssz)
+
+INIT_XMM sse2
+cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
+ pxor m4, m4 ; sse accumulator
+ pxor m6, m6 ; ssz accumulator
+ pxor m5, m5 ; dedicated zero register
+ lea uqcq, [uqcq+sizeq*2]
+ lea dqcq, [dqcq+sizeq*2]
+ neg sizeq
+.loop:
+ mova m2, [uqcq+sizeq*2]
+ mova m0, [dqcq+sizeq*2]
+ mova m3, [uqcq+sizeq*2+mmsize]
+ mova m1, [dqcq+sizeq*2+mmsize]
+ psubw m0, m2
+ psubw m1, m3
+ ; individual errors are max. 15bit+sign, so squares are 30bit, and
+ ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
+ pmaddwd m0, m0
+ pmaddwd m1, m1
+ pmaddwd m2, m2
+ pmaddwd m3, m3
+ ; accumulate in 64bit
+ punpckldq m7, m0, m5
+ punpckhdq m0, m5
+ paddq m4, m7
+ punpckldq m7, m1, m5
+ paddq m4, m0
+ punpckhdq m1, m5
+ paddq m4, m7
+ punpckldq m7, m2, m5
+ paddq m4, m1
+ punpckhdq m2, m5
+ paddq m6, m7
+ punpckldq m7, m3, m5
+ paddq m6, m2
+ punpckhdq m3, m5
+ paddq m6, m7
+ paddq m6, m3
+ add sizeq, mmsize
+ jl .loop
+
+ ; accumulate horizontally and store in return value
+ movhlps m5, m4
+ movhlps m7, m6
+ paddq m4, m5
+ paddq m6, m7
+%if ARCH_X86_64
+ movq rax, m4
+ movq [sszq], m6
+%else
+ mov eax, sszm
+ pshufd m5, m4, 0x1
+ movq [eax], m6
+ movd eax, m4
+ movd edx, m5
+%endif
+ RET
+
+; Compute the sum of squared difference between two int16_t vectors.
+; int64_t vp10_block_error_fp(int16_t *coeff, int16_t *dqcoeff,
+; intptr_t block_size)
+
+INIT_XMM sse2
+cglobal block_error_fp, 3, 3, 6, uqc, dqc, size
+ pxor m4, m4 ; sse accumulator
+ pxor m5, m5 ; dedicated zero register
+ lea uqcq, [uqcq+sizeq*2]
+ lea dqcq, [dqcq+sizeq*2]
+ neg sizeq
+.loop:
+ mova m2, [uqcq+sizeq*2]
+ mova m0, [dqcq+sizeq*2]
+ mova m3, [uqcq+sizeq*2+mmsize]
+ mova m1, [dqcq+sizeq*2+mmsize]
+ psubw m0, m2
+ psubw m1, m3
+ ; individual errors are max. 15bit+sign, so squares are 30bit, and
+ ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
+ pmaddwd m0, m0
+ pmaddwd m1, m1
+ ; accumulate in 64bit
+ punpckldq m3, m0, m5
+ punpckhdq m0, m5
+ paddq m4, m3
+ punpckldq m3, m1, m5
+ paddq m4, m0
+ punpckhdq m1, m5
+ paddq m4, m3
+ paddq m4, m1
+ add sizeq, mmsize
+ jl .loop
+
+ ; accumulate horizontally and store in return value
+ movhlps m5, m4
+ paddq m4, m5
+%if ARCH_X86_64
+ movq rax, m4
+%else
+ pshufd m5, m4, 0x1
+ movd eax, m4
+ movd edx, m5
+%endif
+ RET
diff --git a/av1/encoder/x86/highbd_block_error_intrin_sse2.c b/av1/encoder/x86/highbd_block_error_intrin_sse2.c
new file mode 100644
index 0000000..2728880
--- /dev/null
+++ b/av1/encoder/x86/highbd_block_error_intrin_sse2.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h>
+#include <stdio.h>
+
+#include "av1/common/common.h"
+
+int64_t vp10_highbd_block_error_sse2(tran_low_t *coeff, tran_low_t *dqcoeff,
+ intptr_t block_size, int64_t *ssz,
+ int bps) {
+ int i, j, test;
+ uint32_t temp[4];
+ __m128i max, min, cmp0, cmp1, cmp2, cmp3;
+ int64_t error = 0, sqcoeff = 0;
+ const int shift = 2 * (bps - 8);
+ const int rounding = shift > 0 ? 1 << (shift - 1) : 0;
+
+ for (i = 0; i < block_size; i += 8) {
+ // Load the data into xmm registers
+ __m128i mm_coeff = _mm_load_si128((__m128i *)(coeff + i));
+ __m128i mm_coeff2 = _mm_load_si128((__m128i *)(coeff + i + 4));
+ __m128i mm_dqcoeff = _mm_load_si128((__m128i *)(dqcoeff + i));
+ __m128i mm_dqcoeff2 = _mm_load_si128((__m128i *)(dqcoeff + i + 4));
+ // Check if any values require more than 15 bit
+ max = _mm_set1_epi32(0x3fff);
+ min = _mm_set1_epi32(0xffffc000);
+ cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max),
+ _mm_cmplt_epi32(mm_coeff, min));
+ cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max),
+ _mm_cmplt_epi32(mm_coeff2, min));
+ cmp2 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff, max),
+ _mm_cmplt_epi32(mm_dqcoeff, min));
+ cmp3 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff2, max),
+ _mm_cmplt_epi32(mm_dqcoeff2, min));
+ test = _mm_movemask_epi8(
+ _mm_or_si128(_mm_or_si128(cmp0, cmp1), _mm_or_si128(cmp2, cmp3)));
+
+ if (!test) {
+ __m128i mm_diff, error_sse2, sqcoeff_sse2;
+ mm_coeff = _mm_packs_epi32(mm_coeff, mm_coeff2);
+ mm_dqcoeff = _mm_packs_epi32(mm_dqcoeff, mm_dqcoeff2);
+ mm_diff = _mm_sub_epi16(mm_coeff, mm_dqcoeff);
+ error_sse2 = _mm_madd_epi16(mm_diff, mm_diff);
+ sqcoeff_sse2 = _mm_madd_epi16(mm_coeff, mm_coeff);
+ _mm_storeu_si128((__m128i *)temp, error_sse2);
+ error = error + temp[0] + temp[1] + temp[2] + temp[3];
+ _mm_storeu_si128((__m128i *)temp, sqcoeff_sse2);
+ sqcoeff += temp[0] + temp[1] + temp[2] + temp[3];
+ } else {
+ for (j = 0; j < 8; j++) {
+ const int64_t diff = coeff[i + j] - dqcoeff[i + j];
+ error += diff * diff;
+ sqcoeff += (int64_t)coeff[i + j] * (int64_t)coeff[i + j];
+ }
+ }
+ }
+ assert(error >= 0 && sqcoeff >= 0);
+ error = (error + rounding) >> shift;
+ sqcoeff = (sqcoeff + rounding) >> shift;
+
+ *ssz = sqcoeff;
+ return error;
+}
diff --git a/av1/encoder/x86/highbd_fwd_txfm_sse4.c b/av1/encoder/x86/highbd_fwd_txfm_sse4.c
new file mode 100644
index 0000000..a6cb454
--- /dev/null
+++ b/av1/encoder/x86/highbd_fwd_txfm_sse4.c
@@ -0,0 +1,1895 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <smmintrin.h> /* SSE4.1 */
+
+#include "./vp10_rtcd.h"
+#include "./vpx_config.h"
+#include "av1/common/vp10_fwd_txfm2d_cfg.h"
+#include "av1/common/vp10_txfm.h"
+#include "av1/common/x86/highbd_txfm_utility_sse4.h"
+#include "aom_dsp/txfm_common.h"
+#include "aom_dsp/x86/txfm_common_sse2.h"
+#include "aom_ports/mem.h"
+
+static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in,
+ int stride, int flipud, int fliplr,
+ int shift) {
+ if (!flipud) {
+ in[0] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+ in[1] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+ in[2] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
+ in[3] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
+ } else {
+ in[0] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride));
+ in[1] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride));
+ in[2] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+ in[3] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+ }
+
+ if (fliplr) {
+ in[0] = _mm_shufflelo_epi16(in[0], 0x1b);
+ in[1] = _mm_shufflelo_epi16(in[1], 0x1b);
+ in[2] = _mm_shufflelo_epi16(in[2], 0x1b);
+ in[3] = _mm_shufflelo_epi16(in[3], 0x1b);
+ }
+
+ in[0] = _mm_cvtepi16_epi32(in[0]);
+ in[1] = _mm_cvtepi16_epi32(in[1]);
+ in[2] = _mm_cvtepi16_epi32(in[2]);
+ in[3] = _mm_cvtepi16_epi32(in[3]);
+
+ in[0] = _mm_slli_epi32(in[0], shift);
+ in[1] = _mm_slli_epi32(in[1], shift);
+ in[2] = _mm_slli_epi32(in[2], shift);
+ in[3] = _mm_slli_epi32(in[3], shift);
+}
+
+// We only use stage-2 bit;
+// shift[0] is used in load_buffer_4x4()
+// shift[1] is used in txfm_func_col()
+// shift[2] is used in txfm_func_row()
+static void fdct4x4_sse4_1(__m128i *in, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ __m128i s0, s1, s2, s3;
+ __m128i u0, u1, u2, u3;
+ __m128i v0, v1, v2, v3;
+
+ s0 = _mm_add_epi32(in[0], in[3]);
+ s1 = _mm_add_epi32(in[1], in[2]);
+ s2 = _mm_sub_epi32(in[1], in[2]);
+ s3 = _mm_sub_epi32(in[0], in[3]);
+
+ // btf_32_sse4_1_type0(cospi32, cospi32, s[01], u[02], bit);
+ u0 = _mm_mullo_epi32(s0, cospi32);
+ u1 = _mm_mullo_epi32(s1, cospi32);
+ u2 = _mm_add_epi32(u0, u1);
+ v0 = _mm_sub_epi32(u0, u1);
+
+ u3 = _mm_add_epi32(u2, rnding);
+ v1 = _mm_add_epi32(v0, rnding);
+
+ u0 = _mm_srai_epi32(u3, bit);
+ u2 = _mm_srai_epi32(v1, bit);
+
+ // btf_32_sse4_1_type1(cospi48, cospi16, s[23], u[13], bit);
+ v0 = _mm_mullo_epi32(s2, cospi48);
+ v1 = _mm_mullo_epi32(s3, cospi16);
+ v2 = _mm_add_epi32(v0, v1);
+
+ v3 = _mm_add_epi32(v2, rnding);
+ u1 = _mm_srai_epi32(v3, bit);
+
+ v0 = _mm_mullo_epi32(s2, cospi16);
+ v1 = _mm_mullo_epi32(s3, cospi48);
+ v2 = _mm_sub_epi32(v1, v0);
+
+ v3 = _mm_add_epi32(v2, rnding);
+ u3 = _mm_srai_epi32(v3, bit);
+
+ // Note: shift[1] and shift[2] are zeros
+
+ // Transpose 4x4 32-bit
+ v0 = _mm_unpacklo_epi32(u0, u1);
+ v1 = _mm_unpackhi_epi32(u0, u1);
+ v2 = _mm_unpacklo_epi32(u2, u3);
+ v3 = _mm_unpackhi_epi32(u2, u3);
+
+ in[0] = _mm_unpacklo_epi64(v0, v2);
+ in[1] = _mm_unpackhi_epi64(v0, v2);
+ in[2] = _mm_unpacklo_epi64(v1, v3);
+ in[3] = _mm_unpackhi_epi64(v1, v3);
+}
+
+static INLINE void write_buffer_4x4(__m128i *res, tran_low_t *output) {
+ _mm_store_si128((__m128i *)(output + 0 * 4), res[0]);
+ _mm_store_si128((__m128i *)(output + 1 * 4), res[1]);
+ _mm_store_si128((__m128i *)(output + 2 * 4), res[2]);
+ _mm_store_si128((__m128i *)(output + 3 * 4), res[3]);
+}
+
+// Note:
+// We implement vp10_fwd_txfm2d_4x4(). This function is kept here since
+// vp10_highbd_fht4x4_c() is not removed yet
+void vp10_highbd_fht4x4_sse4_1(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ (void)input;
+ (void)output;
+ (void)stride;
+ (void)tx_type;
+ assert(0);
+}
+
+static void fadst4x4_sse4_1(__m128i *in, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ const __m128i kZero = _mm_setzero_si128();
+ __m128i s0, s1, s2, s3;
+ __m128i u0, u1, u2, u3;
+ __m128i v0, v1, v2, v3;
+
+ // stage 0
+ // stage 1
+ // stage 2
+ u0 = _mm_mullo_epi32(in[3], cospi8);
+ u1 = _mm_mullo_epi32(in[0], cospi56);
+ u2 = _mm_add_epi32(u0, u1);
+ s0 = _mm_add_epi32(u2, rnding);
+ s0 = _mm_srai_epi32(s0, bit);
+
+ v0 = _mm_mullo_epi32(in[3], cospi56);
+ v1 = _mm_mullo_epi32(in[0], cospi8);
+ v2 = _mm_sub_epi32(v0, v1);
+ s1 = _mm_add_epi32(v2, rnding);
+ s1 = _mm_srai_epi32(s1, bit);
+
+ u0 = _mm_mullo_epi32(in[1], cospi40);
+ u1 = _mm_mullo_epi32(in[2], cospi24);
+ u2 = _mm_add_epi32(u0, u1);
+ s2 = _mm_add_epi32(u2, rnding);
+ s2 = _mm_srai_epi32(s2, bit);
+
+ v0 = _mm_mullo_epi32(in[1], cospi24);
+ v1 = _mm_mullo_epi32(in[2], cospi40);
+ v2 = _mm_sub_epi32(v0, v1);
+ s3 = _mm_add_epi32(v2, rnding);
+ s3 = _mm_srai_epi32(s3, bit);
+
+ // stage 3
+ u0 = _mm_add_epi32(s0, s2);
+ u2 = _mm_sub_epi32(s0, s2);
+ u1 = _mm_add_epi32(s1, s3);
+ u3 = _mm_sub_epi32(s1, s3);
+
+ // stage 4
+ v0 = _mm_mullo_epi32(u2, cospi32);
+ v1 = _mm_mullo_epi32(u3, cospi32);
+ v2 = _mm_add_epi32(v0, v1);
+ s2 = _mm_add_epi32(v2, rnding);
+ u2 = _mm_srai_epi32(s2, bit);
+
+ v2 = _mm_sub_epi32(v0, v1);
+ s3 = _mm_add_epi32(v2, rnding);
+ u3 = _mm_srai_epi32(s3, bit);
+
+ // u0, u1, u2, u3
+ u2 = _mm_sub_epi32(kZero, u2);
+ u1 = _mm_sub_epi32(kZero, u1);
+
+ // u0, u2, u3, u1
+ // Transpose 4x4 32-bit
+ v0 = _mm_unpacklo_epi32(u0, u2);
+ v1 = _mm_unpackhi_epi32(u0, u2);
+ v2 = _mm_unpacklo_epi32(u3, u1);
+ v3 = _mm_unpackhi_epi32(u3, u1);
+
+ in[0] = _mm_unpacklo_epi64(v0, v2);
+ in[1] = _mm_unpackhi_epi64(v0, v2);
+ in[2] = _mm_unpacklo_epi64(v1, v3);
+ in[3] = _mm_unpackhi_epi64(v1, v3);
+}
+
+void vp10_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *coeff,
+ int input_stride, int tx_type, int bd) {
+ __m128i in[4];
+ const TXFM_2D_CFG *cfg = NULL;
+
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &fwd_txfm_2d_cfg_dct_dct_4;
+ load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
+ fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+ case ADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_4;
+ load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+ case DCT_ADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_4;
+ load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
+ fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+ case ADST_ADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_4;
+ load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_4;
+ load_buffer_4x4(input, in, input_stride, 1, 0, cfg->shift[0]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fdct4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+ case DCT_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_4;
+ load_buffer_4x4(input, in, input_stride, 0, 1, cfg->shift[0]);
+ fdct4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+ case FLIPADST_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_4;
+ load_buffer_4x4(input, in, input_stride, 1, 1, cfg->shift[0]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+ case ADST_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_4;
+ load_buffer_4x4(input, in, input_stride, 0, 1, cfg->shift[0]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+ case FLIPADST_ADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_4;
+ load_buffer_4x4(input, in, input_stride, 1, 0, cfg->shift[0]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_col[2]);
+ fadst4x4_sse4_1(in, cfg->cos_bit_row[2]);
+ write_buffer_4x4(in, coeff);
+ break;
+#endif
+ default: assert(0);
+ }
+ (void)bd;
+}
+
+static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in,
+ int stride, int flipud, int fliplr,
+ int shift) {
+ __m128i u;
+ if (!flipud) {
+ in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ } else {
+ in[0] = _mm_load_si128((const __m128i *)(input + 7 * stride));
+ in[1] = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ in[2] = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ in[3] = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ in[4] = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ in[5] = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ in[6] = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ in[7] = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ }
+
+ if (fliplr) {
+ in[0] = mm_reverse_epi16(in[0]);
+ in[1] = mm_reverse_epi16(in[1]);
+ in[2] = mm_reverse_epi16(in[2]);
+ in[3] = mm_reverse_epi16(in[3]);
+ in[4] = mm_reverse_epi16(in[4]);
+ in[5] = mm_reverse_epi16(in[5]);
+ in[6] = mm_reverse_epi16(in[6]);
+ in[7] = mm_reverse_epi16(in[7]);
+ }
+
+ u = _mm_unpackhi_epi64(in[4], in[4]);
+ in[8] = _mm_cvtepi16_epi32(in[4]);
+ in[9] = _mm_cvtepi16_epi32(u);
+
+ u = _mm_unpackhi_epi64(in[5], in[5]);
+ in[10] = _mm_cvtepi16_epi32(in[5]);
+ in[11] = _mm_cvtepi16_epi32(u);
+
+ u = _mm_unpackhi_epi64(in[6], in[6]);
+ in[12] = _mm_cvtepi16_epi32(in[6]);
+ in[13] = _mm_cvtepi16_epi32(u);
+
+ u = _mm_unpackhi_epi64(in[7], in[7]);
+ in[14] = _mm_cvtepi16_epi32(in[7]);
+ in[15] = _mm_cvtepi16_epi32(u);
+
+ u = _mm_unpackhi_epi64(in[3], in[3]);
+ in[6] = _mm_cvtepi16_epi32(in[3]);
+ in[7] = _mm_cvtepi16_epi32(u);
+
+ u = _mm_unpackhi_epi64(in[2], in[2]);
+ in[4] = _mm_cvtepi16_epi32(in[2]);
+ in[5] = _mm_cvtepi16_epi32(u);
+
+ u = _mm_unpackhi_epi64(in[1], in[1]);
+ in[2] = _mm_cvtepi16_epi32(in[1]);
+ in[3] = _mm_cvtepi16_epi32(u);
+
+ u = _mm_unpackhi_epi64(in[0], in[0]);
+ in[0] = _mm_cvtepi16_epi32(in[0]);
+ in[1] = _mm_cvtepi16_epi32(u);
+
+ in[0] = _mm_slli_epi32(in[0], shift);
+ in[1] = _mm_slli_epi32(in[1], shift);
+ in[2] = _mm_slli_epi32(in[2], shift);
+ in[3] = _mm_slli_epi32(in[3], shift);
+ in[4] = _mm_slli_epi32(in[4], shift);
+ in[5] = _mm_slli_epi32(in[5], shift);
+ in[6] = _mm_slli_epi32(in[6], shift);
+ in[7] = _mm_slli_epi32(in[7], shift);
+
+ in[8] = _mm_slli_epi32(in[8], shift);
+ in[9] = _mm_slli_epi32(in[9], shift);
+ in[10] = _mm_slli_epi32(in[10], shift);
+ in[11] = _mm_slli_epi32(in[11], shift);
+ in[12] = _mm_slli_epi32(in[12], shift);
+ in[13] = _mm_slli_epi32(in[13], shift);
+ in[14] = _mm_slli_epi32(in[14], shift);
+ in[15] = _mm_slli_epi32(in[15], shift);
+}
+
+static INLINE void col_txfm_8x8_rounding(__m128i *in, int shift) {
+ const __m128i rounding = _mm_set1_epi32(1 << (shift - 1));
+
+ in[0] = _mm_add_epi32(in[0], rounding);
+ in[1] = _mm_add_epi32(in[1], rounding);
+ in[2] = _mm_add_epi32(in[2], rounding);
+ in[3] = _mm_add_epi32(in[3], rounding);
+ in[4] = _mm_add_epi32(in[4], rounding);
+ in[5] = _mm_add_epi32(in[5], rounding);
+ in[6] = _mm_add_epi32(in[6], rounding);
+ in[7] = _mm_add_epi32(in[7], rounding);
+ in[8] = _mm_add_epi32(in[8], rounding);
+ in[9] = _mm_add_epi32(in[9], rounding);
+ in[10] = _mm_add_epi32(in[10], rounding);
+ in[11] = _mm_add_epi32(in[11], rounding);
+ in[12] = _mm_add_epi32(in[12], rounding);
+ in[13] = _mm_add_epi32(in[13], rounding);
+ in[14] = _mm_add_epi32(in[14], rounding);
+ in[15] = _mm_add_epi32(in[15], rounding);
+
+ in[0] = _mm_srai_epi32(in[0], shift);
+ in[1] = _mm_srai_epi32(in[1], shift);
+ in[2] = _mm_srai_epi32(in[2], shift);
+ in[3] = _mm_srai_epi32(in[3], shift);
+ in[4] = _mm_srai_epi32(in[4], shift);
+ in[5] = _mm_srai_epi32(in[5], shift);
+ in[6] = _mm_srai_epi32(in[6], shift);
+ in[7] = _mm_srai_epi32(in[7], shift);
+ in[8] = _mm_srai_epi32(in[8], shift);
+ in[9] = _mm_srai_epi32(in[9], shift);
+ in[10] = _mm_srai_epi32(in[10], shift);
+ in[11] = _mm_srai_epi32(in[11], shift);
+ in[12] = _mm_srai_epi32(in[12], shift);
+ in[13] = _mm_srai_epi32(in[13], shift);
+ in[14] = _mm_srai_epi32(in[14], shift);
+ in[15] = _mm_srai_epi32(in[15], shift);
+}
+
+static INLINE void write_buffer_8x8(const __m128i *res, tran_low_t *output) {
+ _mm_store_si128((__m128i *)(output + 0 * 4), res[0]);
+ _mm_store_si128((__m128i *)(output + 1 * 4), res[1]);
+ _mm_store_si128((__m128i *)(output + 2 * 4), res[2]);
+ _mm_store_si128((__m128i *)(output + 3 * 4), res[3]);
+
+ _mm_store_si128((__m128i *)(output + 4 * 4), res[4]);
+ _mm_store_si128((__m128i *)(output + 5 * 4), res[5]);
+ _mm_store_si128((__m128i *)(output + 6 * 4), res[6]);
+ _mm_store_si128((__m128i *)(output + 7 * 4), res[7]);
+
+ _mm_store_si128((__m128i *)(output + 8 * 4), res[8]);
+ _mm_store_si128((__m128i *)(output + 9 * 4), res[9]);
+ _mm_store_si128((__m128i *)(output + 10 * 4), res[10]);
+ _mm_store_si128((__m128i *)(output + 11 * 4), res[11]);
+
+ _mm_store_si128((__m128i *)(output + 12 * 4), res[12]);
+ _mm_store_si128((__m128i *)(output + 13 * 4), res[13]);
+ _mm_store_si128((__m128i *)(output + 14 * 4), res[14]);
+ _mm_store_si128((__m128i *)(output + 15 * 4), res[15]);
+}
+
+static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ __m128i u[8], v[8];
+
+ // Even 8 points 0, 2, ..., 14
+ // stage 0
+ // stage 1
+ u[0] = _mm_add_epi32(in[0], in[14]);
+ v[7] = _mm_sub_epi32(in[0], in[14]); // v[7]
+ u[1] = _mm_add_epi32(in[2], in[12]);
+ u[6] = _mm_sub_epi32(in[2], in[12]);
+ u[2] = _mm_add_epi32(in[4], in[10]);
+ u[5] = _mm_sub_epi32(in[4], in[10]);
+ u[3] = _mm_add_epi32(in[6], in[8]);
+ v[4] = _mm_sub_epi32(in[6], in[8]); // v[4]
+
+ // stage 2
+ v[0] = _mm_add_epi32(u[0], u[3]);
+ v[3] = _mm_sub_epi32(u[0], u[3]);
+ v[1] = _mm_add_epi32(u[1], u[2]);
+ v[2] = _mm_sub_epi32(u[1], u[2]);
+
+ v[5] = _mm_mullo_epi32(u[5], cospim32);
+ v[6] = _mm_mullo_epi32(u[6], cospi32);
+ v[5] = _mm_add_epi32(v[5], v[6]);
+ v[5] = _mm_add_epi32(v[5], rnding);
+ v[5] = _mm_srai_epi32(v[5], bit);
+
+ u[0] = _mm_mullo_epi32(u[5], cospi32);
+ v[6] = _mm_mullo_epi32(u[6], cospim32);
+ v[6] = _mm_sub_epi32(u[0], v[6]);
+ v[6] = _mm_add_epi32(v[6], rnding);
+ v[6] = _mm_srai_epi32(v[6], bit);
+
+ // stage 3
+ // type 0
+ v[0] = _mm_mullo_epi32(v[0], cospi32);
+ v[1] = _mm_mullo_epi32(v[1], cospi32);
+ u[0] = _mm_add_epi32(v[0], v[1]);
+ u[0] = _mm_add_epi32(u[0], rnding);
+ u[0] = _mm_srai_epi32(u[0], bit);
+
+ u[1] = _mm_sub_epi32(v[0], v[1]);
+ u[1] = _mm_add_epi32(u[1], rnding);
+ u[1] = _mm_srai_epi32(u[1], bit);
+
+ // type 1
+ v[0] = _mm_mullo_epi32(v[2], cospi48);
+ v[1] = _mm_mullo_epi32(v[3], cospi16);
+ u[2] = _mm_add_epi32(v[0], v[1]);
+ u[2] = _mm_add_epi32(u[2], rnding);
+ u[2] = _mm_srai_epi32(u[2], bit);
+
+ v[0] = _mm_mullo_epi32(v[2], cospi16);
+ v[1] = _mm_mullo_epi32(v[3], cospi48);
+ u[3] = _mm_sub_epi32(v[1], v[0]);
+ u[3] = _mm_add_epi32(u[3], rnding);
+ u[3] = _mm_srai_epi32(u[3], bit);
+
+ u[4] = _mm_add_epi32(v[4], v[5]);
+ u[5] = _mm_sub_epi32(v[4], v[5]);
+ u[6] = _mm_sub_epi32(v[7], v[6]);
+ u[7] = _mm_add_epi32(v[7], v[6]);
+
+ // stage 4
+ // stage 5
+ v[0] = _mm_mullo_epi32(u[4], cospi56);
+ v[1] = _mm_mullo_epi32(u[7], cospi8);
+ v[0] = _mm_add_epi32(v[0], v[1]);
+ v[0] = _mm_add_epi32(v[0], rnding);
+ out[2] = _mm_srai_epi32(v[0], bit); // buf0[4]
+
+ v[0] = _mm_mullo_epi32(u[4], cospi8);
+ v[1] = _mm_mullo_epi32(u[7], cospi56);
+ v[0] = _mm_sub_epi32(v[1], v[0]);
+ v[0] = _mm_add_epi32(v[0], rnding);
+ out[14] = _mm_srai_epi32(v[0], bit); // buf0[7]
+
+ v[0] = _mm_mullo_epi32(u[5], cospi24);
+ v[1] = _mm_mullo_epi32(u[6], cospi40);
+ v[0] = _mm_add_epi32(v[0], v[1]);
+ v[0] = _mm_add_epi32(v[0], rnding);
+ out[10] = _mm_srai_epi32(v[0], bit); // buf0[5]
+
+ v[0] = _mm_mullo_epi32(u[5], cospi40);
+ v[1] = _mm_mullo_epi32(u[6], cospi24);
+ v[0] = _mm_sub_epi32(v[1], v[0]);
+ v[0] = _mm_add_epi32(v[0], rnding);
+ out[6] = _mm_srai_epi32(v[0], bit); // buf0[6]
+
+ out[0] = u[0]; // buf0[0]
+ out[8] = u[1]; // buf0[1]
+ out[4] = u[2]; // buf0[2]
+ out[12] = u[3]; // buf0[3]
+
+ // Odd 8 points: 1, 3, ..., 15
+ // stage 0
+ // stage 1
+ u[0] = _mm_add_epi32(in[1], in[15]);
+ v[7] = _mm_sub_epi32(in[1], in[15]); // v[7]
+ u[1] = _mm_add_epi32(in[3], in[13]);
+ u[6] = _mm_sub_epi32(in[3], in[13]);
+ u[2] = _mm_add_epi32(in[5], in[11]);
+ u[5] = _mm_sub_epi32(in[5], in[11]);
+ u[3] = _mm_add_epi32(in[7], in[9]);
+ v[4] = _mm_sub_epi32(in[7], in[9]); // v[4]
+
+ // stage 2
+ v[0] = _mm_add_epi32(u[0], u[3]);
+ v[3] = _mm_sub_epi32(u[0], u[3]);
+ v[1] = _mm_add_epi32(u[1], u[2]);
+ v[2] = _mm_sub_epi32(u[1], u[2]);
+
+ v[5] = _mm_mullo_epi32(u[5], cospim32);
+ v[6] = _mm_mullo_epi32(u[6], cospi32);
+ v[5] = _mm_add_epi32(v[5], v[6]);
+ v[5] = _mm_add_epi32(v[5], rnding);
+ v[5] = _mm_srai_epi32(v[5], bit);
+
+ u[0] = _mm_mullo_epi32(u[5], cospi32);
+ v[6] = _mm_mullo_epi32(u[6], cospim32);
+ v[6] = _mm_sub_epi32(u[0], v[6]);
+ v[6] = _mm_add_epi32(v[6], rnding);
+ v[6] = _mm_srai_epi32(v[6], bit);
+
+ // stage 3
+ // type 0
+ v[0] = _mm_mullo_epi32(v[0], cospi32);
+ v[1] = _mm_mullo_epi32(v[1], cospi32);
+ u[0] = _mm_add_epi32(v[0], v[1]);
+ u[0] = _mm_add_epi32(u[0], rnding);
+ u[0] = _mm_srai_epi32(u[0], bit);
+
+ u[1] = _mm_sub_epi32(v[0], v[1]);
+ u[1] = _mm_add_epi32(u[1], rnding);
+ u[1] = _mm_srai_epi32(u[1], bit);
+
+ // type 1
+ v[0] = _mm_mullo_epi32(v[2], cospi48);
+ v[1] = _mm_mullo_epi32(v[3], cospi16);
+ u[2] = _mm_add_epi32(v[0], v[1]);
+ u[2] = _mm_add_epi32(u[2], rnding);
+ u[2] = _mm_srai_epi32(u[2], bit);
+
+ v[0] = _mm_mullo_epi32(v[2], cospi16);
+ v[1] = _mm_mullo_epi32(v[3], cospi48);
+ u[3] = _mm_sub_epi32(v[1], v[0]);
+ u[3] = _mm_add_epi32(u[3], rnding);
+ u[3] = _mm_srai_epi32(u[3], bit);
+
+ u[4] = _mm_add_epi32(v[4], v[5]);
+ u[5] = _mm_sub_epi32(v[4], v[5]);
+ u[6] = _mm_sub_epi32(v[7], v[6]);
+ u[7] = _mm_add_epi32(v[7], v[6]);
+
+ // stage 4
+ // stage 5
+ v[0] = _mm_mullo_epi32(u[4], cospi56);
+ v[1] = _mm_mullo_epi32(u[7], cospi8);
+ v[0] = _mm_add_epi32(v[0], v[1]);
+ v[0] = _mm_add_epi32(v[0], rnding);
+ out[3] = _mm_srai_epi32(v[0], bit); // buf0[4]
+
+ v[0] = _mm_mullo_epi32(u[4], cospi8);
+ v[1] = _mm_mullo_epi32(u[7], cospi56);
+ v[0] = _mm_sub_epi32(v[1], v[0]);
+ v[0] = _mm_add_epi32(v[0], rnding);
+ out[15] = _mm_srai_epi32(v[0], bit); // buf0[7]
+
+ v[0] = _mm_mullo_epi32(u[5], cospi24);
+ v[1] = _mm_mullo_epi32(u[6], cospi40);
+ v[0] = _mm_add_epi32(v[0], v[1]);
+ v[0] = _mm_add_epi32(v[0], rnding);
+ out[11] = _mm_srai_epi32(v[0], bit); // buf0[5]
+
+ v[0] = _mm_mullo_epi32(u[5], cospi40);
+ v[1] = _mm_mullo_epi32(u[6], cospi24);
+ v[0] = _mm_sub_epi32(v[1], v[0]);
+ v[0] = _mm_add_epi32(v[0], rnding);
+ out[7] = _mm_srai_epi32(v[0], bit); // buf0[6]
+
+ out[1] = u[0]; // buf0[0]
+ out[9] = u[1]; // buf0[1]
+ out[5] = u[2]; // buf0[2]
+ out[13] = u[3]; // buf0[3]
+}
+
+static void fadst8x8_sse4_1(__m128i *in, __m128i *out, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
+ const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
+ const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
+ const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
+ const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
+ const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
+ const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
+ const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ const __m128i kZero = _mm_setzero_si128();
+ __m128i u[8], v[8], x;
+
+ // Even 8 points: 0, 2, ..., 14
+ // stage 0
+ // stage 1
+ // stage 2
+ // (1)
+ u[0] = _mm_mullo_epi32(in[14], cospi4);
+ x = _mm_mullo_epi32(in[0], cospi60);
+ u[0] = _mm_add_epi32(u[0], x);
+ u[0] = _mm_add_epi32(u[0], rnding);
+ u[0] = _mm_srai_epi32(u[0], bit);
+
+ u[1] = _mm_mullo_epi32(in[14], cospi60);
+ x = _mm_mullo_epi32(in[0], cospi4);
+ u[1] = _mm_sub_epi32(u[1], x);
+ u[1] = _mm_add_epi32(u[1], rnding);
+ u[1] = _mm_srai_epi32(u[1], bit);
+
+ // (2)
+ u[2] = _mm_mullo_epi32(in[10], cospi20);
+ x = _mm_mullo_epi32(in[4], cospi44);
+ u[2] = _mm_add_epi32(u[2], x);
+ u[2] = _mm_add_epi32(u[2], rnding);
+ u[2] = _mm_srai_epi32(u[2], bit);
+
+ u[3] = _mm_mullo_epi32(in[10], cospi44);
+ x = _mm_mullo_epi32(in[4], cospi20);
+ u[3] = _mm_sub_epi32(u[3], x);
+ u[3] = _mm_add_epi32(u[3], rnding);
+ u[3] = _mm_srai_epi32(u[3], bit);
+
+ // (3)
+ u[4] = _mm_mullo_epi32(in[6], cospi36);
+ x = _mm_mullo_epi32(in[8], cospi28);
+ u[4] = _mm_add_epi32(u[4], x);
+ u[4] = _mm_add_epi32(u[4], rnding);
+ u[4] = _mm_srai_epi32(u[4], bit);
+
+ u[5] = _mm_mullo_epi32(in[6], cospi28);
+ x = _mm_mullo_epi32(in[8], cospi36);
+ u[5] = _mm_sub_epi32(u[5], x);
+ u[5] = _mm_add_epi32(u[5], rnding);
+ u[5] = _mm_srai_epi32(u[5], bit);
+
+ // (4)
+ u[6] = _mm_mullo_epi32(in[2], cospi52);
+ x = _mm_mullo_epi32(in[12], cospi12);
+ u[6] = _mm_add_epi32(u[6], x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = _mm_mullo_epi32(in[2], cospi12);
+ x = _mm_mullo_epi32(in[12], cospi52);
+ u[7] = _mm_sub_epi32(u[7], x);
+ u[7] = _mm_add_epi32(u[7], rnding);
+ u[7] = _mm_srai_epi32(u[7], bit);
+
+ // stage 3
+ v[0] = _mm_add_epi32(u[0], u[4]);
+ v[4] = _mm_sub_epi32(u[0], u[4]);
+ v[1] = _mm_add_epi32(u[1], u[5]);
+ v[5] = _mm_sub_epi32(u[1], u[5]);
+ v[2] = _mm_add_epi32(u[2], u[6]);
+ v[6] = _mm_sub_epi32(u[2], u[6]);
+ v[3] = _mm_add_epi32(u[3], u[7]);
+ v[7] = _mm_sub_epi32(u[3], u[7]);
+
+ // stage 4
+ u[0] = v[0];
+ u[1] = v[1];
+ u[2] = v[2];
+ u[3] = v[3];
+
+ u[4] = _mm_mullo_epi32(v[4], cospi16);
+ x = _mm_mullo_epi32(v[5], cospi48);
+ u[4] = _mm_add_epi32(u[4], x);
+ u[4] = _mm_add_epi32(u[4], rnding);
+ u[4] = _mm_srai_epi32(u[4], bit);
+
+ u[5] = _mm_mullo_epi32(v[4], cospi48);
+ x = _mm_mullo_epi32(v[5], cospi16);
+ u[5] = _mm_sub_epi32(u[5], x);
+ u[5] = _mm_add_epi32(u[5], rnding);
+ u[5] = _mm_srai_epi32(u[5], bit);
+
+ u[6] = _mm_mullo_epi32(v[6], cospim48);
+ x = _mm_mullo_epi32(v[7], cospi16);
+ u[6] = _mm_add_epi32(u[6], x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = _mm_mullo_epi32(v[6], cospi16);
+ x = _mm_mullo_epi32(v[7], cospim48);
+ u[7] = _mm_sub_epi32(u[7], x);
+ u[7] = _mm_add_epi32(u[7], rnding);
+ u[7] = _mm_srai_epi32(u[7], bit);
+
+ // stage 5
+ v[0] = _mm_add_epi32(u[0], u[2]);
+ v[2] = _mm_sub_epi32(u[0], u[2]);
+ v[1] = _mm_add_epi32(u[1], u[3]);
+ v[3] = _mm_sub_epi32(u[1], u[3]);
+ v[4] = _mm_add_epi32(u[4], u[6]);
+ v[6] = _mm_sub_epi32(u[4], u[6]);
+ v[5] = _mm_add_epi32(u[5], u[7]);
+ v[7] = _mm_sub_epi32(u[5], u[7]);
+
+ // stage 6
+ u[0] = v[0];
+ u[1] = v[1];
+ u[4] = v[4];
+ u[5] = v[5];
+
+ v[0] = _mm_mullo_epi32(v[2], cospi32);
+ x = _mm_mullo_epi32(v[3], cospi32);
+ u[2] = _mm_add_epi32(v[0], x);
+ u[2] = _mm_add_epi32(u[2], rnding);
+ u[2] = _mm_srai_epi32(u[2], bit);
+
+ u[3] = _mm_sub_epi32(v[0], x);
+ u[3] = _mm_add_epi32(u[3], rnding);
+ u[3] = _mm_srai_epi32(u[3], bit);
+
+ v[0] = _mm_mullo_epi32(v[6], cospi32);
+ x = _mm_mullo_epi32(v[7], cospi32);
+ u[6] = _mm_add_epi32(v[0], x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = _mm_sub_epi32(v[0], x);
+ u[7] = _mm_add_epi32(u[7], rnding);
+ u[7] = _mm_srai_epi32(u[7], bit);
+
+ // stage 7
+ out[0] = u[0];
+ out[2] = _mm_sub_epi32(kZero, u[4]);
+ out[4] = u[6];
+ out[6] = _mm_sub_epi32(kZero, u[2]);
+ out[8] = u[3];
+ out[10] = _mm_sub_epi32(kZero, u[7]);
+ out[12] = u[5];
+ out[14] = _mm_sub_epi32(kZero, u[1]);
+
+ // Odd 8 points: 1, 3, ..., 15
+ // stage 0
+ // stage 1
+ // stage 2
+ // (1)
+ u[0] = _mm_mullo_epi32(in[15], cospi4);
+ x = _mm_mullo_epi32(in[1], cospi60);
+ u[0] = _mm_add_epi32(u[0], x);
+ u[0] = _mm_add_epi32(u[0], rnding);
+ u[0] = _mm_srai_epi32(u[0], bit);
+
+ u[1] = _mm_mullo_epi32(in[15], cospi60);
+ x = _mm_mullo_epi32(in[1], cospi4);
+ u[1] = _mm_sub_epi32(u[1], x);
+ u[1] = _mm_add_epi32(u[1], rnding);
+ u[1] = _mm_srai_epi32(u[1], bit);
+
+ // (2)
+ u[2] = _mm_mullo_epi32(in[11], cospi20);
+ x = _mm_mullo_epi32(in[5], cospi44);
+ u[2] = _mm_add_epi32(u[2], x);
+ u[2] = _mm_add_epi32(u[2], rnding);
+ u[2] = _mm_srai_epi32(u[2], bit);
+
+ u[3] = _mm_mullo_epi32(in[11], cospi44);
+ x = _mm_mullo_epi32(in[5], cospi20);
+ u[3] = _mm_sub_epi32(u[3], x);
+ u[3] = _mm_add_epi32(u[3], rnding);
+ u[3] = _mm_srai_epi32(u[3], bit);
+
+ // (3)
+ u[4] = _mm_mullo_epi32(in[7], cospi36);
+ x = _mm_mullo_epi32(in[9], cospi28);
+ u[4] = _mm_add_epi32(u[4], x);
+ u[4] = _mm_add_epi32(u[4], rnding);
+ u[4] = _mm_srai_epi32(u[4], bit);
+
+ u[5] = _mm_mullo_epi32(in[7], cospi28);
+ x = _mm_mullo_epi32(in[9], cospi36);
+ u[5] = _mm_sub_epi32(u[5], x);
+ u[5] = _mm_add_epi32(u[5], rnding);
+ u[5] = _mm_srai_epi32(u[5], bit);
+
+ // (4)
+ u[6] = _mm_mullo_epi32(in[3], cospi52);
+ x = _mm_mullo_epi32(in[13], cospi12);
+ u[6] = _mm_add_epi32(u[6], x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = _mm_mullo_epi32(in[3], cospi12);
+ x = _mm_mullo_epi32(in[13], cospi52);
+ u[7] = _mm_sub_epi32(u[7], x);
+ u[7] = _mm_add_epi32(u[7], rnding);
+ u[7] = _mm_srai_epi32(u[7], bit);
+
+ // stage 3
+ v[0] = _mm_add_epi32(u[0], u[4]);
+ v[4] = _mm_sub_epi32(u[0], u[4]);
+ v[1] = _mm_add_epi32(u[1], u[5]);
+ v[5] = _mm_sub_epi32(u[1], u[5]);
+ v[2] = _mm_add_epi32(u[2], u[6]);
+ v[6] = _mm_sub_epi32(u[2], u[6]);
+ v[3] = _mm_add_epi32(u[3], u[7]);
+ v[7] = _mm_sub_epi32(u[3], u[7]);
+
+ // stage 4
+ u[0] = v[0];
+ u[1] = v[1];
+ u[2] = v[2];
+ u[3] = v[3];
+
+ u[4] = _mm_mullo_epi32(v[4], cospi16);
+ x = _mm_mullo_epi32(v[5], cospi48);
+ u[4] = _mm_add_epi32(u[4], x);
+ u[4] = _mm_add_epi32(u[4], rnding);
+ u[4] = _mm_srai_epi32(u[4], bit);
+
+ u[5] = _mm_mullo_epi32(v[4], cospi48);
+ x = _mm_mullo_epi32(v[5], cospi16);
+ u[5] = _mm_sub_epi32(u[5], x);
+ u[5] = _mm_add_epi32(u[5], rnding);
+ u[5] = _mm_srai_epi32(u[5], bit);
+
+ u[6] = _mm_mullo_epi32(v[6], cospim48);
+ x = _mm_mullo_epi32(v[7], cospi16);
+ u[6] = _mm_add_epi32(u[6], x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = _mm_mullo_epi32(v[6], cospi16);
+ x = _mm_mullo_epi32(v[7], cospim48);
+ u[7] = _mm_sub_epi32(u[7], x);
+ u[7] = _mm_add_epi32(u[7], rnding);
+ u[7] = _mm_srai_epi32(u[7], bit);
+
+ // stage 5
+ v[0] = _mm_add_epi32(u[0], u[2]);
+ v[2] = _mm_sub_epi32(u[0], u[2]);
+ v[1] = _mm_add_epi32(u[1], u[3]);
+ v[3] = _mm_sub_epi32(u[1], u[3]);
+ v[4] = _mm_add_epi32(u[4], u[6]);
+ v[6] = _mm_sub_epi32(u[4], u[6]);
+ v[5] = _mm_add_epi32(u[5], u[7]);
+ v[7] = _mm_sub_epi32(u[5], u[7]);
+
+ // stage 6
+ u[0] = v[0];
+ u[1] = v[1];
+ u[4] = v[4];
+ u[5] = v[5];
+
+ v[0] = _mm_mullo_epi32(v[2], cospi32);
+ x = _mm_mullo_epi32(v[3], cospi32);
+ u[2] = _mm_add_epi32(v[0], x);
+ u[2] = _mm_add_epi32(u[2], rnding);
+ u[2] = _mm_srai_epi32(u[2], bit);
+
+ u[3] = _mm_sub_epi32(v[0], x);
+ u[3] = _mm_add_epi32(u[3], rnding);
+ u[3] = _mm_srai_epi32(u[3], bit);
+
+ v[0] = _mm_mullo_epi32(v[6], cospi32);
+ x = _mm_mullo_epi32(v[7], cospi32);
+ u[6] = _mm_add_epi32(v[0], x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = _mm_sub_epi32(v[0], x);
+ u[7] = _mm_add_epi32(u[7], rnding);
+ u[7] = _mm_srai_epi32(u[7], bit);
+
+ // stage 7
+ out[1] = u[0];
+ out[3] = _mm_sub_epi32(kZero, u[4]);
+ out[5] = u[6];
+ out[7] = _mm_sub_epi32(kZero, u[2]);
+ out[9] = u[3];
+ out[11] = _mm_sub_epi32(kZero, u[7]);
+ out[13] = u[5];
+ out[15] = _mm_sub_epi32(kZero, u[1]);
+}
+
+void vp10_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *coeff,
+ int stride, int tx_type, int bd) {
+ __m128i in[16], out[16];
+ const TXFM_2D_CFG *cfg = NULL;
+
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &fwd_txfm_2d_cfg_dct_dct_8;
+ load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
+ fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+ case ADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_8;
+ load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+ case DCT_ADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_8;
+ load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
+ fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+ case ADST_ADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_8;
+ load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_8;
+ load_buffer_8x8(input, in, stride, 1, 0, cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+ case DCT_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_8;
+ load_buffer_8x8(input, in, stride, 0, 1, cfg->shift[0]);
+ fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+ case FLIPADST_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_8;
+ load_buffer_8x8(input, in, stride, 1, 1, cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+ case ADST_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_8;
+ load_buffer_8x8(input, in, stride, 0, 1, cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+ case FLIPADST_ADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_8;
+ load_buffer_8x8(input, in, stride, 1, 0, cfg->shift[0]);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]);
+ col_txfm_8x8_rounding(out, -cfg->shift[1]);
+ transpose_8x8(out, in);
+ fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]);
+ transpose_8x8(out, in);
+ write_buffer_8x8(in, coeff);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0);
+ }
+ (void)bd;
+}
+
+// Hybrid Transform 16x16
+
+static INLINE void convert_8x8_to_16x16(const __m128i *in, __m128i *out) {
+ int row_index = 0;
+ int dst_index = 0;
+ int src_index = 0;
+
+ // row 0, 1, .., 7
+ do {
+ out[dst_index] = in[src_index];
+ out[dst_index + 1] = in[src_index + 1];
+ out[dst_index + 2] = in[src_index + 16];
+ out[dst_index + 3] = in[src_index + 17];
+ dst_index += 4;
+ src_index += 2;
+ row_index += 1;
+ } while (row_index < 8);
+
+ // row 8, 9, ..., 15
+ src_index += 16;
+ do {
+ out[dst_index] = in[src_index];
+ out[dst_index + 1] = in[src_index + 1];
+ out[dst_index + 2] = in[src_index + 16];
+ out[dst_index + 3] = in[src_index + 17];
+ dst_index += 4;
+ src_index += 2;
+ row_index += 1;
+ } while (row_index < 16);
+}
+
+static INLINE void load_buffer_16x16(const int16_t *input, __m128i *out,
+ int stride, int flipud, int fliplr,
+ int shift) {
+ __m128i in[64];
+ // Load 4 8x8 blocks
+ const int16_t *topL = input;
+ const int16_t *topR = input + 8;
+ const int16_t *botL = input + 8 * stride;
+ const int16_t *botR = input + 8 * stride + 8;
+
+ const int16_t *tmp;
+
+ if (flipud) {
+ // Swap left columns
+ tmp = topL;
+ topL = botL;
+ botL = tmp;
+ // Swap right columns
+ tmp = topR;
+ topR = botR;
+ botR = tmp;
+ }
+
+ if (fliplr) {
+ // Swap top rows
+ tmp = topL;
+ topL = topR;
+ topR = tmp;
+ // Swap bottom rows
+ tmp = botL;
+ botL = botR;
+ botR = tmp;
+ }
+
+ // load first 8 columns
+ load_buffer_8x8(topL, &in[0], stride, flipud, fliplr, shift);
+ load_buffer_8x8(botL, &in[32], stride, flipud, fliplr, shift);
+
+ // load second 8 columns
+ load_buffer_8x8(topR, &in[16], stride, flipud, fliplr, shift);
+ load_buffer_8x8(botR, &in[48], stride, flipud, fliplr, shift);
+
+ convert_8x8_to_16x16(in, out);
+}
+
+static void fdct16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i cospim32 = _mm_set1_epi32(-cospi[32]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+ const __m128i cospim16 = _mm_set1_epi32(-cospi[16]);
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospi60 = _mm_set1_epi32(cospi[60]);
+ const __m128i cospi4 = _mm_set1_epi32(cospi[4]);
+ const __m128i cospi28 = _mm_set1_epi32(cospi[28]);
+ const __m128i cospi36 = _mm_set1_epi32(cospi[36]);
+ const __m128i cospi44 = _mm_set1_epi32(cospi[44]);
+ const __m128i cospi20 = _mm_set1_epi32(cospi[20]);
+ const __m128i cospi12 = _mm_set1_epi32(cospi[12]);
+ const __m128i cospi52 = _mm_set1_epi32(cospi[52]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ __m128i u[16], v[16], x;
+ const int col_num = 4;
+ int col;
+
+ // Calculate the column 0, 1, 2, 3
+ for (col = 0; col < col_num; ++col) {
+ // stage 0
+ // stage 1
+ u[0] = _mm_add_epi32(in[0 * col_num + col], in[15 * col_num + col]);
+ u[15] = _mm_sub_epi32(in[0 * col_num + col], in[15 * col_num + col]);
+ u[1] = _mm_add_epi32(in[1 * col_num + col], in[14 * col_num + col]);
+ u[14] = _mm_sub_epi32(in[1 * col_num + col], in[14 * col_num + col]);
+ u[2] = _mm_add_epi32(in[2 * col_num + col], in[13 * col_num + col]);
+ u[13] = _mm_sub_epi32(in[2 * col_num + col], in[13 * col_num + col]);
+ u[3] = _mm_add_epi32(in[3 * col_num + col], in[12 * col_num + col]);
+ u[12] = _mm_sub_epi32(in[3 * col_num + col], in[12 * col_num + col]);
+ u[4] = _mm_add_epi32(in[4 * col_num + col], in[11 * col_num + col]);
+ u[11] = _mm_sub_epi32(in[4 * col_num + col], in[11 * col_num + col]);
+ u[5] = _mm_add_epi32(in[5 * col_num + col], in[10 * col_num + col]);
+ u[10] = _mm_sub_epi32(in[5 * col_num + col], in[10 * col_num + col]);
+ u[6] = _mm_add_epi32(in[6 * col_num + col], in[9 * col_num + col]);
+ u[9] = _mm_sub_epi32(in[6 * col_num + col], in[9 * col_num + col]);
+ u[7] = _mm_add_epi32(in[7 * col_num + col], in[8 * col_num + col]);
+ u[8] = _mm_sub_epi32(in[7 * col_num + col], in[8 * col_num + col]);
+
+ // stage 2
+ v[0] = _mm_add_epi32(u[0], u[7]);
+ v[7] = _mm_sub_epi32(u[0], u[7]);
+ v[1] = _mm_add_epi32(u[1], u[6]);
+ v[6] = _mm_sub_epi32(u[1], u[6]);
+ v[2] = _mm_add_epi32(u[2], u[5]);
+ v[5] = _mm_sub_epi32(u[2], u[5]);
+ v[3] = _mm_add_epi32(u[3], u[4]);
+ v[4] = _mm_sub_epi32(u[3], u[4]);
+ v[8] = u[8];
+ v[9] = u[9];
+
+ v[10] = _mm_mullo_epi32(u[10], cospim32);
+ x = _mm_mullo_epi32(u[13], cospi32);
+ v[10] = _mm_add_epi32(v[10], x);
+ v[10] = _mm_add_epi32(v[10], rnding);
+ v[10] = _mm_srai_epi32(v[10], bit);
+
+ v[13] = _mm_mullo_epi32(u[10], cospi32);
+ x = _mm_mullo_epi32(u[13], cospim32);
+ v[13] = _mm_sub_epi32(v[13], x);
+ v[13] = _mm_add_epi32(v[13], rnding);
+ v[13] = _mm_srai_epi32(v[13], bit);
+
+ v[11] = _mm_mullo_epi32(u[11], cospim32);
+ x = _mm_mullo_epi32(u[12], cospi32);
+ v[11] = _mm_add_epi32(v[11], x);
+ v[11] = _mm_add_epi32(v[11], rnding);
+ v[11] = _mm_srai_epi32(v[11], bit);
+
+ v[12] = _mm_mullo_epi32(u[11], cospi32);
+ x = _mm_mullo_epi32(u[12], cospim32);
+ v[12] = _mm_sub_epi32(v[12], x);
+ v[12] = _mm_add_epi32(v[12], rnding);
+ v[12] = _mm_srai_epi32(v[12], bit);
+ v[14] = u[14];
+ v[15] = u[15];
+
+ // stage 3
+ u[0] = _mm_add_epi32(v[0], v[3]);
+ u[3] = _mm_sub_epi32(v[0], v[3]);
+ u[1] = _mm_add_epi32(v[1], v[2]);
+ u[2] = _mm_sub_epi32(v[1], v[2]);
+ u[4] = v[4];
+
+ u[5] = _mm_mullo_epi32(v[5], cospim32);
+ x = _mm_mullo_epi32(v[6], cospi32);
+ u[5] = _mm_add_epi32(u[5], x);
+ u[5] = _mm_add_epi32(u[5], rnding);
+ u[5] = _mm_srai_epi32(u[5], bit);
+
+ u[6] = _mm_mullo_epi32(v[5], cospi32);
+ x = _mm_mullo_epi32(v[6], cospim32);
+ u[6] = _mm_sub_epi32(u[6], x);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[7] = v[7];
+ u[8] = _mm_add_epi32(v[8], v[11]);
+ u[11] = _mm_sub_epi32(v[8], v[11]);
+ u[9] = _mm_add_epi32(v[9], v[10]);
+ u[10] = _mm_sub_epi32(v[9], v[10]);
+ u[12] = _mm_sub_epi32(v[15], v[12]);
+ u[15] = _mm_add_epi32(v[15], v[12]);
+ u[13] = _mm_sub_epi32(v[14], v[13]);
+ u[14] = _mm_add_epi32(v[14], v[13]);
+
+ // stage 4
+ u[0] = _mm_mullo_epi32(u[0], cospi32);
+ u[1] = _mm_mullo_epi32(u[1], cospi32);
+ v[0] = _mm_add_epi32(u[0], u[1]);
+ v[0] = _mm_add_epi32(v[0], rnding);
+ v[0] = _mm_srai_epi32(v[0], bit);
+
+ v[1] = _mm_sub_epi32(u[0], u[1]);
+ v[1] = _mm_add_epi32(v[1], rnding);
+ v[1] = _mm_srai_epi32(v[1], bit);
+
+ v[2] = _mm_mullo_epi32(u[2], cospi48);
+ x = _mm_mullo_epi32(u[3], cospi16);
+ v[2] = _mm_add_epi32(v[2], x);
+ v[2] = _mm_add_epi32(v[2], rnding);
+ v[2] = _mm_srai_epi32(v[2], bit);
+
+ v[3] = _mm_mullo_epi32(u[2], cospi16);
+ x = _mm_mullo_epi32(u[3], cospi48);
+ v[3] = _mm_sub_epi32(x, v[3]);
+ v[3] = _mm_add_epi32(v[3], rnding);
+ v[3] = _mm_srai_epi32(v[3], bit);
+
+ v[4] = _mm_add_epi32(u[4], u[5]);
+ v[5] = _mm_sub_epi32(u[4], u[5]);
+ v[6] = _mm_sub_epi32(u[7], u[6]);
+ v[7] = _mm_add_epi32(u[7], u[6]);
+ v[8] = u[8];
+
+ v[9] = _mm_mullo_epi32(u[9], cospim16);
+ x = _mm_mullo_epi32(u[14], cospi48);
+ v[9] = _mm_add_epi32(v[9], x);
+ v[9] = _mm_add_epi32(v[9], rnding);
+ v[9] = _mm_srai_epi32(v[9], bit);
+
+ v[14] = _mm_mullo_epi32(u[9], cospi48);
+ x = _mm_mullo_epi32(u[14], cospim16);
+ v[14] = _mm_sub_epi32(v[14], x);
+ v[14] = _mm_add_epi32(v[14], rnding);
+ v[14] = _mm_srai_epi32(v[14], bit);
+
+ v[10] = _mm_mullo_epi32(u[10], cospim48);
+ x = _mm_mullo_epi32(u[13], cospim16);
+ v[10] = _mm_add_epi32(v[10], x);
+ v[10] = _mm_add_epi32(v[10], rnding);
+ v[10] = _mm_srai_epi32(v[10], bit);
+
+ v[13] = _mm_mullo_epi32(u[10], cospim16);
+ x = _mm_mullo_epi32(u[13], cospim48);
+ v[13] = _mm_sub_epi32(v[13], x);
+ v[13] = _mm_add_epi32(v[13], rnding);
+ v[13] = _mm_srai_epi32(v[13], bit);
+
+ v[11] = u[11];
+ v[12] = u[12];
+ v[15] = u[15];
+
+ // stage 5
+ u[0] = v[0];
+ u[1] = v[1];
+ u[2] = v[2];
+ u[3] = v[3];
+
+ u[4] = _mm_mullo_epi32(v[4], cospi56);
+ x = _mm_mullo_epi32(v[7], cospi8);
+ u[4] = _mm_add_epi32(u[4], x);
+ u[4] = _mm_add_epi32(u[4], rnding);
+ u[4] = _mm_srai_epi32(u[4], bit);
+
+ u[7] = _mm_mullo_epi32(v[4], cospi8);
+ x = _mm_mullo_epi32(v[7], cospi56);
+ u[7] = _mm_sub_epi32(x, u[7]);
+ u[7] = _mm_add_epi32(u[7], rnding);
+ u[7] = _mm_srai_epi32(u[7], bit);
+
+ u[5] = _mm_mullo_epi32(v[5], cospi24);
+ x = _mm_mullo_epi32(v[6], cospi40);
+ u[5] = _mm_add_epi32(u[5], x);
+ u[5] = _mm_add_epi32(u[5], rnding);
+ u[5] = _mm_srai_epi32(u[5], bit);
+
+ u[6] = _mm_mullo_epi32(v[5], cospi40);
+ x = _mm_mullo_epi32(v[6], cospi24);
+ u[6] = _mm_sub_epi32(x, u[6]);
+ u[6] = _mm_add_epi32(u[6], rnding);
+ u[6] = _mm_srai_epi32(u[6], bit);
+
+ u[8] = _mm_add_epi32(v[8], v[9]);
+ u[9] = _mm_sub_epi32(v[8], v[9]);
+ u[10] = _mm_sub_epi32(v[11], v[10]);
+ u[11] = _mm_add_epi32(v[11], v[10]);
+ u[12] = _mm_add_epi32(v[12], v[13]);
+ u[13] = _mm_sub_epi32(v[12], v[13]);
+ u[14] = _mm_sub_epi32(v[15], v[14]);
+ u[15] = _mm_add_epi32(v[15], v[14]);
+
+ // stage 6
+ v[0] = u[0];
+ v[1] = u[1];
+ v[2] = u[2];
+ v[3] = u[3];
+ v[4] = u[4];
+ v[5] = u[5];
+ v[6] = u[6];
+ v[7] = u[7];
+
+ v[8] = _mm_mullo_epi32(u[8], cospi60);
+ x = _mm_mullo_epi32(u[15], cospi4);
+ v[8] = _mm_add_epi32(v[8], x);
+ v[8] = _mm_add_epi32(v[8], rnding);
+ v[8] = _mm_srai_epi32(v[8], bit);
+
+ v[15] = _mm_mullo_epi32(u[8], cospi4);
+ x = _mm_mullo_epi32(u[15], cospi60);
+ v[15] = _mm_sub_epi32(x, v[15]);
+ v[15] = _mm_add_epi32(v[15], rnding);
+ v[15] = _mm_srai_epi32(v[15], bit);
+
+ v[9] = _mm_mullo_epi32(u[9], cospi28);
+ x = _mm_mullo_epi32(u[14], cospi36);
+ v[9] = _mm_add_epi32(v[9], x);
+ v[9] = _mm_add_epi32(v[9], rnding);
+ v[9] = _mm_srai_epi32(v[9], bit);
+
+ v[14] = _mm_mullo_epi32(u[9], cospi36);
+ x = _mm_mullo_epi32(u[14], cospi28);
+ v[14] = _mm_sub_epi32(x, v[14]);
+ v[14] = _mm_add_epi32(v[14], rnding);
+ v[14] = _mm_srai_epi32(v[14], bit);
+
+ v[10] = _mm_mullo_epi32(u[10], cospi44);
+ x = _mm_mullo_epi32(u[13], cospi20);
+ v[10] = _mm_add_epi32(v[10], x);
+ v[10] = _mm_add_epi32(v[10], rnding);
+ v[10] = _mm_srai_epi32(v[10], bit);
+
+ v[13] = _mm_mullo_epi32(u[10], cospi20);
+ x = _mm_mullo_epi32(u[13], cospi44);
+ v[13] = _mm_sub_epi32(x, v[13]);
+ v[13] = _mm_add_epi32(v[13], rnding);
+ v[13] = _mm_srai_epi32(v[13], bit);
+
+ v[11] = _mm_mullo_epi32(u[11], cospi12);
+ x = _mm_mullo_epi32(u[12], cospi52);
+ v[11] = _mm_add_epi32(v[11], x);
+ v[11] = _mm_add_epi32(v[11], rnding);
+ v[11] = _mm_srai_epi32(v[11], bit);
+
+ v[12] = _mm_mullo_epi32(u[11], cospi52);
+ x = _mm_mullo_epi32(u[12], cospi12);
+ v[12] = _mm_sub_epi32(x, v[12]);
+ v[12] = _mm_add_epi32(v[12], rnding);
+ v[12] = _mm_srai_epi32(v[12], bit);
+
+ out[0 * col_num + col] = v[0];
+ out[1 * col_num + col] = v[8];
+ out[2 * col_num + col] = v[4];
+ out[3 * col_num + col] = v[12];
+ out[4 * col_num + col] = v[2];
+ out[5 * col_num + col] = v[10];
+ out[6 * col_num + col] = v[6];
+ out[7 * col_num + col] = v[14];
+ out[8 * col_num + col] = v[1];
+ out[9 * col_num + col] = v[9];
+ out[10 * col_num + col] = v[5];
+ out[11 * col_num + col] = v[13];
+ out[12 * col_num + col] = v[3];
+ out[13 * col_num + col] = v[11];
+ out[14 * col_num + col] = v[7];
+ out[15 * col_num + col] = v[15];
+ }
+}
+
+static void fadst16x16_sse4_1(__m128i *in, __m128i *out, int bit) {
+ const int32_t *cospi = cospi_arr[bit - cos_bit_min];
+ const __m128i cospi2 = _mm_set1_epi32(cospi[2]);
+ const __m128i cospi62 = _mm_set1_epi32(cospi[62]);
+ const __m128i cospi10 = _mm_set1_epi32(cospi[10]);
+ const __m128i cospi54 = _mm_set1_epi32(cospi[54]);
+ const __m128i cospi18 = _mm_set1_epi32(cospi[18]);
+ const __m128i cospi46 = _mm_set1_epi32(cospi[46]);
+ const __m128i cospi26 = _mm_set1_epi32(cospi[26]);
+ const __m128i cospi38 = _mm_set1_epi32(cospi[38]);
+ const __m128i cospi34 = _mm_set1_epi32(cospi[34]);
+ const __m128i cospi30 = _mm_set1_epi32(cospi[30]);
+ const __m128i cospi42 = _mm_set1_epi32(cospi[42]);
+ const __m128i cospi22 = _mm_set1_epi32(cospi[22]);
+ const __m128i cospi50 = _mm_set1_epi32(cospi[50]);
+ const __m128i cospi14 = _mm_set1_epi32(cospi[14]);
+ const __m128i cospi58 = _mm_set1_epi32(cospi[58]);
+ const __m128i cospi6 = _mm_set1_epi32(cospi[6]);
+ const __m128i cospi8 = _mm_set1_epi32(cospi[8]);
+ const __m128i cospi56 = _mm_set1_epi32(cospi[56]);
+ const __m128i cospi40 = _mm_set1_epi32(cospi[40]);
+ const __m128i cospi24 = _mm_set1_epi32(cospi[24]);
+ const __m128i cospim56 = _mm_set1_epi32(-cospi[56]);
+ const __m128i cospim24 = _mm_set1_epi32(-cospi[24]);
+ const __m128i cospi48 = _mm_set1_epi32(cospi[48]);
+ const __m128i cospi16 = _mm_set1_epi32(cospi[16]);
+ const __m128i cospim48 = _mm_set1_epi32(-cospi[48]);
+ const __m128i cospi32 = _mm_set1_epi32(cospi[32]);
+ const __m128i rnding = _mm_set1_epi32(1 << (bit - 1));
+ __m128i u[16], v[16], x, y;
+ const int col_num = 4;
+ int col;
+
+ // Calculate the column 0, 1, 2, 3
+ for (col = 0; col < col_num; ++col) {
+ // stage 0
+ // stage 1
+ // stage 2
+ v[0] = _mm_mullo_epi32(in[15 * col_num + col], cospi2);
+ x = _mm_mullo_epi32(in[0 * col_num + col], cospi62);
+ v[0] = _mm_add_epi32(v[0], x);
+ v[0] = _mm_add_epi32(v[0], rnding);
+ v[0] = _mm_srai_epi32(v[0], bit);
+
+ v[1] = _mm_mullo_epi32(in[15 * col_num + col], cospi62);
+ x = _mm_mullo_epi32(in[0 * col_num + col], cospi2);
+ v[1] = _mm_sub_epi32(v[1], x);
+ v[1] = _mm_add_epi32(v[1], rnding);
+ v[1] = _mm_srai_epi32(v[1], bit);
+
+ v[2] = _mm_mullo_epi32(in[13 * col_num + col], cospi10);
+ x = _mm_mullo_epi32(in[2 * col_num + col], cospi54);
+ v[2] = _mm_add_epi32(v[2], x);
+ v[2] = _mm_add_epi32(v[2], rnding);
+ v[2] = _mm_srai_epi32(v[2], bit);
+
+ v[3] = _mm_mullo_epi32(in[13 * col_num + col], cospi54);
+ x = _mm_mullo_epi32(in[2 * col_num + col], cospi10);
+ v[3] = _mm_sub_epi32(v[3], x);
+ v[3] = _mm_add_epi32(v[3], rnding);
+ v[3] = _mm_srai_epi32(v[3], bit);
+
+ v[4] = _mm_mullo_epi32(in[11 * col_num + col], cospi18);
+ x = _mm_mullo_epi32(in[4 * col_num + col], cospi46);
+ v[4] = _mm_add_epi32(v[4], x);
+ v[4] = _mm_add_epi32(v[4], rnding);
+ v[4] = _mm_srai_epi32(v[4], bit);
+
+ v[5] = _mm_mullo_epi32(in[11 * col_num + col], cospi46);
+ x = _mm_mullo_epi32(in[4 * col_num + col], cospi18);
+ v[5] = _mm_sub_epi32(v[5], x);
+ v[5] = _mm_add_epi32(v[5], rnding);
+ v[5] = _mm_srai_epi32(v[5], bit);
+
+ v[6] = _mm_mullo_epi32(in[9 * col_num + col], cospi26);
+ x = _mm_mullo_epi32(in[6 * col_num + col], cospi38);
+ v[6] = _mm_add_epi32(v[6], x);
+ v[6] = _mm_add_epi32(v[6], rnding);
+ v[6] = _mm_srai_epi32(v[6], bit);
+
+ v[7] = _mm_mullo_epi32(in[9 * col_num + col], cospi38);
+ x = _mm_mullo_epi32(in[6 * col_num + col], cospi26);
+ v[7] = _mm_sub_epi32(v[7], x);
+ v[7] = _mm_add_epi32(v[7], rnding);
+ v[7] = _mm_srai_epi32(v[7], bit);
+
+ v[8] = _mm_mullo_epi32(in[7 * col_num + col], cospi34);
+ x = _mm_mullo_epi32(in[8 * col_num + col], cospi30);
+ v[8] = _mm_add_epi32(v[8], x);
+ v[8] = _mm_add_epi32(v[8], rnding);
+ v[8] = _mm_srai_epi32(v[8], bit);
+
+ v[9] = _mm_mullo_epi32(in[7 * col_num + col], cospi30);
+ x = _mm_mullo_epi32(in[8 * col_num + col], cospi34);
+ v[9] = _mm_sub_epi32(v[9], x);
+ v[9] = _mm_add_epi32(v[9], rnding);
+ v[9] = _mm_srai_epi32(v[9], bit);
+
+ v[10] = _mm_mullo_epi32(in[5 * col_num + col], cospi42);
+ x = _mm_mullo_epi32(in[10 * col_num + col], cospi22);
+ v[10] = _mm_add_epi32(v[10], x);
+ v[10] = _mm_add_epi32(v[10], rnding);
+ v[10] = _mm_srai_epi32(v[10], bit);
+
+ v[11] = _mm_mullo_epi32(in[5 * col_num + col], cospi22);
+ x = _mm_mullo_epi32(in[10 * col_num + col], cospi42);
+ v[11] = _mm_sub_epi32(v[11], x);
+ v[11] = _mm_add_epi32(v[11], rnding);
+ v[11] = _mm_srai_epi32(v[11], bit);
+
+ v[12] = _mm_mullo_epi32(in[3 * col_num + col], cospi50);
+ x = _mm_mullo_epi32(in[12 * col_num + col], cospi14);
+ v[12] = _mm_add_epi32(v[12], x);
+ v[12] = _mm_add_epi32(v[12], rnding);
+ v[12] = _mm_srai_epi32(v[12], bit);
+
+ v[13] = _mm_mullo_epi32(in[3 * col_num + col], cospi14);
+ x = _mm_mullo_epi32(in[12 * col_num + col], cospi50);
+ v[13] = _mm_sub_epi32(v[13], x);
+ v[13] = _mm_add_epi32(v[13], rnding);
+ v[13] = _mm_srai_epi32(v[13], bit);
+
+ v[14] = _mm_mullo_epi32(in[1 * col_num + col], cospi58);
+ x = _mm_mullo_epi32(in[14 * col_num + col], cospi6);
+ v[14] = _mm_add_epi32(v[14], x);
+ v[14] = _mm_add_epi32(v[14], rnding);
+ v[14] = _mm_srai_epi32(v[14], bit);
+
+ v[15] = _mm_mullo_epi32(in[1 * col_num + col], cospi6);
+ x = _mm_mullo_epi32(in[14 * col_num + col], cospi58);
+ v[15] = _mm_sub_epi32(v[15], x);
+ v[15] = _mm_add_epi32(v[15], rnding);
+ v[15] = _mm_srai_epi32(v[15], bit);
+
+ // stage 3
+ u[0] = _mm_add_epi32(v[0], v[8]);
+ u[8] = _mm_sub_epi32(v[0], v[8]);
+ u[1] = _mm_add_epi32(v[1], v[9]);
+ u[9] = _mm_sub_epi32(v[1], v[9]);
+ u[2] = _mm_add_epi32(v[2], v[10]);
+ u[10] = _mm_sub_epi32(v[2], v[10]);
+ u[3] = _mm_add_epi32(v[3], v[11]);
+ u[11] = _mm_sub_epi32(v[3], v[11]);
+ u[4] = _mm_add_epi32(v[4], v[12]);
+ u[12] = _mm_sub_epi32(v[4], v[12]);
+ u[5] = _mm_add_epi32(v[5], v[13]);
+ u[13] = _mm_sub_epi32(v[5], v[13]);
+ u[6] = _mm_add_epi32(v[6], v[14]);
+ u[14] = _mm_sub_epi32(v[6], v[14]);
+ u[7] = _mm_add_epi32(v[7], v[15]);
+ u[15] = _mm_sub_epi32(v[7], v[15]);
+
+ // stage 4
+ v[0] = u[0];
+ v[1] = u[1];
+ v[2] = u[2];
+ v[3] = u[3];
+ v[4] = u[4];
+ v[5] = u[5];
+ v[6] = u[6];
+ v[7] = u[7];
+
+ v[8] = _mm_mullo_epi32(u[8], cospi8);
+ x = _mm_mullo_epi32(u[9], cospi56);
+ v[8] = _mm_add_epi32(v[8], x);
+ v[8] = _mm_add_epi32(v[8], rnding);
+ v[8] = _mm_srai_epi32(v[8], bit);
+
+ v[9] = _mm_mullo_epi32(u[8], cospi56);
+ x = _mm_mullo_epi32(u[9], cospi8);
+ v[9] = _mm_sub_epi32(v[9], x);
+ v[9] = _mm_add_epi32(v[9], rnding);
+ v[9] = _mm_srai_epi32(v[9], bit);
+
+ v[10] = _mm_mullo_epi32(u[10], cospi40);
+ x = _mm_mullo_epi32(u[11], cospi24);
+ v[10] = _mm_add_epi32(v[10], x);
+ v[10] = _mm_add_epi32(v[10], rnding);
+ v[10] = _mm_srai_epi32(v[10], bit);
+
+ v[11] = _mm_mullo_epi32(u[10], cospi24);
+ x = _mm_mullo_epi32(u[11], cospi40);
+ v[11] = _mm_sub_epi32(v[11], x);
+ v[11] = _mm_add_epi32(v[11], rnding);
+ v[11] = _mm_srai_epi32(v[11], bit);
+
+ v[12] = _mm_mullo_epi32(u[12], cospim56);
+ x = _mm_mullo_epi32(u[13], cospi8);
+ v[12] = _mm_add_epi32(v[12], x);
+ v[12] = _mm_add_epi32(v[12], rnding);
+ v[12] = _mm_srai_epi32(v[12], bit);
+
+ v[13] = _mm_mullo_epi32(u[12], cospi8);
+ x = _mm_mullo_epi32(u[13], cospim56);
+ v[13] = _mm_sub_epi32(v[13], x);
+ v[13] = _mm_add_epi32(v[13], rnding);
+ v[13] = _mm_srai_epi32(v[13], bit);
+
+ v[14] = _mm_mullo_epi32(u[14], cospim24);
+ x = _mm_mullo_epi32(u[15], cospi40);
+ v[14] = _mm_add_epi32(v[14], x);
+ v[14] = _mm_add_epi32(v[14], rnding);
+ v[14] = _mm_srai_epi32(v[14], bit);
+
+ v[15] = _mm_mullo_epi32(u[14], cospi40);
+ x = _mm_mullo_epi32(u[15], cospim24);
+ v[15] = _mm_sub_epi32(v[15], x);
+ v[15] = _mm_add_epi32(v[15], rnding);
+ v[15] = _mm_srai_epi32(v[15], bit);
+
+ // stage 5
+ u[0] = _mm_add_epi32(v[0], v[4]);
+ u[4] = _mm_sub_epi32(v[0], v[4]);
+ u[1] = _mm_add_epi32(v[1], v[5]);
+ u[5] = _mm_sub_epi32(v[1], v[5]);
+ u[2] = _mm_add_epi32(v[2], v[6]);
+ u[6] = _mm_sub_epi32(v[2], v[6]);
+ u[3] = _mm_add_epi32(v[3], v[7]);
+ u[7] = _mm_sub_epi32(v[3], v[7]);
+ u[8] = _mm_add_epi32(v[8], v[12]);
+ u[12] = _mm_sub_epi32(v[8], v[12]);
+ u[9] = _mm_add_epi32(v[9], v[13]);
+ u[13] = _mm_sub_epi32(v[9], v[13]);
+ u[10] = _mm_add_epi32(v[10], v[14]);
+ u[14] = _mm_sub_epi32(v[10], v[14]);
+ u[11] = _mm_add_epi32(v[11], v[15]);
+ u[15] = _mm_sub_epi32(v[11], v[15]);
+
+ // stage 6
+ v[0] = u[0];
+ v[1] = u[1];
+ v[2] = u[2];
+ v[3] = u[3];
+
+ v[4] = _mm_mullo_epi32(u[4], cospi16);
+ x = _mm_mullo_epi32(u[5], cospi48);
+ v[4] = _mm_add_epi32(v[4], x);
+ v[4] = _mm_add_epi32(v[4], rnding);
+ v[4] = _mm_srai_epi32(v[4], bit);
+
+ v[5] = _mm_mullo_epi32(u[4], cospi48);
+ x = _mm_mullo_epi32(u[5], cospi16);
+ v[5] = _mm_sub_epi32(v[5], x);
+ v[5] = _mm_add_epi32(v[5], rnding);
+ v[5] = _mm_srai_epi32(v[5], bit);
+
+ v[6] = _mm_mullo_epi32(u[6], cospim48);
+ x = _mm_mullo_epi32(u[7], cospi16);
+ v[6] = _mm_add_epi32(v[6], x);
+ v[6] = _mm_add_epi32(v[6], rnding);
+ v[6] = _mm_srai_epi32(v[6], bit);
+
+ v[7] = _mm_mullo_epi32(u[6], cospi16);
+ x = _mm_mullo_epi32(u[7], cospim48);
+ v[7] = _mm_sub_epi32(v[7], x);
+ v[7] = _mm_add_epi32(v[7], rnding);
+ v[7] = _mm_srai_epi32(v[7], bit);
+
+ v[8] = u[8];
+ v[9] = u[9];
+ v[10] = u[10];
+ v[11] = u[11];
+
+ v[12] = _mm_mullo_epi32(u[12], cospi16);
+ x = _mm_mullo_epi32(u[13], cospi48);
+ v[12] = _mm_add_epi32(v[12], x);
+ v[12] = _mm_add_epi32(v[12], rnding);
+ v[12] = _mm_srai_epi32(v[12], bit);
+
+ v[13] = _mm_mullo_epi32(u[12], cospi48);
+ x = _mm_mullo_epi32(u[13], cospi16);
+ v[13] = _mm_sub_epi32(v[13], x);
+ v[13] = _mm_add_epi32(v[13], rnding);
+ v[13] = _mm_srai_epi32(v[13], bit);
+
+ v[14] = _mm_mullo_epi32(u[14], cospim48);
+ x = _mm_mullo_epi32(u[15], cospi16);
+ v[14] = _mm_add_epi32(v[14], x);
+ v[14] = _mm_add_epi32(v[14], rnding);
+ v[14] = _mm_srai_epi32(v[14], bit);
+
+ v[15] = _mm_mullo_epi32(u[14], cospi16);
+ x = _mm_mullo_epi32(u[15], cospim48);
+ v[15] = _mm_sub_epi32(v[15], x);
+ v[15] = _mm_add_epi32(v[15], rnding);
+ v[15] = _mm_srai_epi32(v[15], bit);
+
+ // stage 7
+ u[0] = _mm_add_epi32(v[0], v[2]);
+ u[2] = _mm_sub_epi32(v[0], v[2]);
+ u[1] = _mm_add_epi32(v[1], v[3]);
+ u[3] = _mm_sub_epi32(v[1], v[3]);
+ u[4] = _mm_add_epi32(v[4], v[6]);
+ u[6] = _mm_sub_epi32(v[4], v[6]);
+ u[5] = _mm_add_epi32(v[5], v[7]);
+ u[7] = _mm_sub_epi32(v[5], v[7]);
+ u[8] = _mm_add_epi32(v[8], v[10]);
+ u[10] = _mm_sub_epi32(v[8], v[10]);
+ u[9] = _mm_add_epi32(v[9], v[11]);
+ u[11] = _mm_sub_epi32(v[9], v[11]);
+ u[12] = _mm_add_epi32(v[12], v[14]);
+ u[14] = _mm_sub_epi32(v[12], v[14]);
+ u[13] = _mm_add_epi32(v[13], v[15]);
+ u[15] = _mm_sub_epi32(v[13], v[15]);
+
+ // stage 8
+ v[0] = u[0];
+ v[1] = u[1];
+
+ y = _mm_mullo_epi32(u[2], cospi32);
+ x = _mm_mullo_epi32(u[3], cospi32);
+ v[2] = _mm_add_epi32(y, x);
+ v[2] = _mm_add_epi32(v[2], rnding);
+ v[2] = _mm_srai_epi32(v[2], bit);
+
+ v[3] = _mm_sub_epi32(y, x);
+ v[3] = _mm_add_epi32(v[3], rnding);
+ v[3] = _mm_srai_epi32(v[3], bit);
+
+ v[4] = u[4];
+ v[5] = u[5];
+
+ y = _mm_mullo_epi32(u[6], cospi32);
+ x = _mm_mullo_epi32(u[7], cospi32);
+ v[6] = _mm_add_epi32(y, x);
+ v[6] = _mm_add_epi32(v[6], rnding);
+ v[6] = _mm_srai_epi32(v[6], bit);
+
+ v[7] = _mm_sub_epi32(y, x);
+ v[7] = _mm_add_epi32(v[7], rnding);
+ v[7] = _mm_srai_epi32(v[7], bit);
+
+ v[8] = u[8];
+ v[9] = u[9];
+
+ y = _mm_mullo_epi32(u[10], cospi32);
+ x = _mm_mullo_epi32(u[11], cospi32);
+ v[10] = _mm_add_epi32(y, x);
+ v[10] = _mm_add_epi32(v[10], rnding);
+ v[10] = _mm_srai_epi32(v[10], bit);
+
+ v[11] = _mm_sub_epi32(y, x);
+ v[11] = _mm_add_epi32(v[11], rnding);
+ v[11] = _mm_srai_epi32(v[11], bit);
+
+ v[12] = u[12];
+ v[13] = u[13];
+
+ y = _mm_mullo_epi32(u[14], cospi32);
+ x = _mm_mullo_epi32(u[15], cospi32);
+ v[14] = _mm_add_epi32(y, x);
+ v[14] = _mm_add_epi32(v[14], rnding);
+ v[14] = _mm_srai_epi32(v[14], bit);
+
+ v[15] = _mm_sub_epi32(y, x);
+ v[15] = _mm_add_epi32(v[15], rnding);
+ v[15] = _mm_srai_epi32(v[15], bit);
+
+ // stage 9
+ out[0 * col_num + col] = v[0];
+ out[1 * col_num + col] = _mm_sub_epi32(_mm_set1_epi32(0), v[8]);
+ out[2 * col_num + col] = v[12];
+ out[3 * col_num + col] = _mm_sub_epi32(_mm_set1_epi32(0), v[4]);
+ out[4 * col_num + col] = v[6];
+ out[5 * col_num + col] = _mm_sub_epi32(_mm_set1_epi32(0), v[14]);
+ out[6 * col_num + col] = v[10];
+ out[7 * col_num + col] = _mm_sub_epi32(_mm_set1_epi32(0), v[2]);
+ out[8 * col_num + col] = v[3];
+ out[9 * col_num + col] = _mm_sub_epi32(_mm_set1_epi32(0), v[11]);
+ out[10 * col_num + col] = v[15];
+ out[11 * col_num + col] = _mm_sub_epi32(_mm_set1_epi32(0), v[7]);
+ out[12 * col_num + col] = v[5];
+ out[13 * col_num + col] = _mm_sub_epi32(_mm_set1_epi32(0), v[13]);
+ out[14 * col_num + col] = v[9];
+ out[15 * col_num + col] = _mm_sub_epi32(_mm_set1_epi32(0), v[1]);
+ }
+}
+
+static void col_txfm_16x16_rounding(__m128i *in, int shift) {
+ // Note:
+ // We split 16x16 rounding into 4 sections of 8x8 rounding,
+ // instead of 4 columns
+ col_txfm_8x8_rounding(&in[0], shift);
+ col_txfm_8x8_rounding(&in[16], shift);
+ col_txfm_8x8_rounding(&in[32], shift);
+ col_txfm_8x8_rounding(&in[48], shift);
+}
+
+static void write_buffer_16x16(const __m128i *in, tran_low_t *output) {
+ const int size_8x8 = 16 * 4;
+ write_buffer_8x8(&in[0], output);
+ output += size_8x8;
+ write_buffer_8x8(&in[16], output);
+ output += size_8x8;
+ write_buffer_8x8(&in[32], output);
+ output += size_8x8;
+ write_buffer_8x8(&in[48], output);
+}
+
+void vp10_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *coeff,
+ int stride, int tx_type, int bd) {
+ __m128i in[64], out[64];
+ const TXFM_2D_CFG *cfg = NULL;
+
+ switch (tx_type) {
+ case DCT_DCT:
+ cfg = &fwd_txfm_2d_cfg_dct_dct_16;
+ load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
+ fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+ case ADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_16;
+ load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+ case DCT_ADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_16;
+ load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
+ fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+ case ADST_ADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_16;
+ load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+#if CONFIG_EXT_TX
+ case FLIPADST_DCT:
+ cfg = &fwd_txfm_2d_cfg_adst_dct_16;
+ load_buffer_16x16(input, in, stride, 1, 0, cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+ case DCT_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_dct_adst_16;
+ load_buffer_16x16(input, in, stride, 0, 1, cfg->shift[0]);
+ fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+ case FLIPADST_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_16;
+ load_buffer_16x16(input, in, stride, 1, 1, cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+ case ADST_FLIPADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_16;
+ load_buffer_16x16(input, in, stride, 0, 1, cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+ case FLIPADST_ADST:
+ cfg = &fwd_txfm_2d_cfg_adst_adst_16;
+ load_buffer_16x16(input, in, stride, 1, 0, cfg->shift[0]);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]);
+ col_txfm_16x16_rounding(out, -cfg->shift[1]);
+ transpose_16x16(out, in);
+ fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]);
+ transpose_16x16(out, in);
+ write_buffer_16x16(in, coeff);
+ break;
+#endif // CONFIG_EXT_TX
+ default: assert(0);
+ }
+ (void)bd;
+}
diff --git a/av1/encoder/x86/quantize_sse2.c b/av1/encoder/x86/quantize_sse2.c
new file mode 100644
index 0000000..b8cd0c7
--- /dev/null
+++ b/av1/encoder/x86/quantize_sse2.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h>
+#include <xmmintrin.h>
+
+#include "./vp10_rtcd.h"
+#include "aom/vpx_integer.h"
+
+void vp10_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr,
+ int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan_ptr,
+ const int16_t *iscan_ptr) {
+ __m128i zero;
+ __m128i thr;
+ int16_t nzflag;
+ (void)scan_ptr;
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+
+ coeff_ptr += n_coeffs;
+ iscan_ptr += n_coeffs;
+ qcoeff_ptr += n_coeffs;
+ dqcoeff_ptr += n_coeffs;
+ n_coeffs = -n_coeffs;
+ zero = _mm_setzero_si128();
+
+ if (!skip_block) {
+ __m128i eob;
+ __m128i round, quant, dequant;
+ {
+ __m128i coeff0, coeff1;
+
+ // Setup global values
+ {
+ round = _mm_load_si128((const __m128i *)round_ptr);
+ quant = _mm_load_si128((const __m128i *)quant_ptr);
+ dequant = _mm_load_si128((const __m128i *)dequant_ptr);
+ }
+
+ {
+ __m128i coeff0_sign, coeff1_sign;
+ __m128i qcoeff0, qcoeff1;
+ __m128i qtmp0, qtmp1;
+ // Do DC and first 15 AC
+ coeff0 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs));
+ coeff1 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs) + 1);
+
+ // Poor man's sign extract
+ coeff0_sign = _mm_srai_epi16(coeff0, 15);
+ coeff1_sign = _mm_srai_epi16(coeff1, 15);
+ qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+ round = _mm_unpackhi_epi64(round, round);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+ qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+ quant = _mm_unpackhi_epi64(quant, quant);
+ qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+ coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+ dequant = _mm_unpackhi_epi64(dequant, dequant);
+ coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ }
+
+ {
+ // Scan for eob
+ __m128i zero_coeff0, zero_coeff1;
+ __m128i nzero_coeff0, nzero_coeff1;
+ __m128i iscan0, iscan1;
+ __m128i eob1;
+ zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+ zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+ nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+ nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ // Add one to convert from indices to counts
+ iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+ iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+ eob = _mm_and_si128(iscan0, nzero_coeff0);
+ eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+ eob = _mm_max_epi16(eob, eob1);
+ }
+ n_coeffs += 8 * 2;
+ }
+
+ thr = _mm_srai_epi16(dequant, 1);
+
+ // AC only loop
+ while (n_coeffs < 0) {
+ __m128i coeff0, coeff1;
+ {
+ __m128i coeff0_sign, coeff1_sign;
+ __m128i qcoeff0, qcoeff1;
+ __m128i qtmp0, qtmp1;
+
+ coeff0 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs));
+ coeff1 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs) + 1);
+
+ // Poor man's sign extract
+ coeff0_sign = _mm_srai_epi16(coeff0, 15);
+ coeff1_sign = _mm_srai_epi16(coeff1, 15);
+ qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
+ _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
+
+ if (nzflag) {
+ qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+ qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+ qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+
+ coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+ coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ } else {
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
+
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
+ }
+ }
+
+ if (nzflag) {
+ // Scan for eob
+ __m128i zero_coeff0, zero_coeff1;
+ __m128i nzero_coeff0, nzero_coeff1;
+ __m128i iscan0, iscan1;
+ __m128i eob0, eob1;
+ zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
+ zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
+ nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
+ nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ // Add one to convert from indices to counts
+ iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
+ iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
+ eob0 = _mm_and_si128(iscan0, nzero_coeff0);
+ eob1 = _mm_and_si128(iscan1, nzero_coeff1);
+ eob0 = _mm_max_epi16(eob0, eob1);
+ eob = _mm_max_epi16(eob, eob0);
+ }
+ n_coeffs += 8 * 2;
+ }
+
+ // Accumulate EOB
+ {
+ __m128i eob_shuffled;
+ eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
+ eob = _mm_max_epi16(eob, eob_shuffled);
+ *eob_ptr = _mm_extract_epi16(eob, 1);
+ }
+ } else {
+ do {
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
+ n_coeffs += 8 * 2;
+ } while (n_coeffs < 0);
+ *eob_ptr = 0;
+ }
+}
diff --git a/av1/encoder/x86/quantize_ssse3_x86_64.asm b/av1/encoder/x86/quantize_ssse3_x86_64.asm
new file mode 100644
index 0000000..b8fefa2
--- /dev/null
+++ b/av1/encoder/x86/quantize_ssse3_x86_64.asm
@@ -0,0 +1,201 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%define private_prefix vp10
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION_RODATA
+pw_1: times 8 dw 1
+
+SECTION .text
+
+%macro QUANTIZE_FP 2
+cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
+ shift, qcoeff, dqcoeff, dequant, \
+ eob, scan, iscan
+ cmp dword skipm, 0
+ jne .blank
+
+ ; actual quantize loop - setup pointers, rounders, etc.
+ movifnidn coeffq, coeffmp
+ movifnidn ncoeffq, ncoeffmp
+ mov r2, dequantmp
+ movifnidn zbinq, zbinmp
+ movifnidn roundq, roundmp
+ movifnidn quantq, quantmp
+ mova m1, [roundq] ; m1 = round
+ mova m2, [quantq] ; m2 = quant
+%ifidn %1, fp_32x32
+ pcmpeqw m5, m5
+ psrlw m5, 15
+ paddw m1, m5
+ psrlw m1, 1 ; m1 = (m1 + 1) / 2
+%endif
+ mova m3, [r2q] ; m3 = dequant
+ mov r3, qcoeffmp
+ mov r4, dqcoeffmp
+ mov r5, iscanmp
+%ifidn %1, fp_32x32
+ psllw m2, 1
+%endif
+ pxor m5, m5 ; m5 = dedicated zero
+
+ lea coeffq, [ coeffq+ncoeffq*2]
+ lea r5q, [ r5q+ncoeffq*2]
+ lea r3q, [ r3q+ncoeffq*2]
+ lea r4q, [r4q+ncoeffq*2]
+ neg ncoeffq
+
+ ; get DC and first 15 AC coeffs
+ mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
+ pabsw m6, m9 ; m6 = abs(m9)
+ pabsw m11, m10 ; m11 = abs(m10)
+ pcmpeqw m7, m7
+
+ paddsw m6, m1 ; m6 += round
+ punpckhqdq m1, m1
+ paddsw m11, m1 ; m11 += round
+ pmulhw m8, m6, m2 ; m8 = m6*q>>16
+ punpckhqdq m2, m2
+ pmulhw m13, m11, m2 ; m13 = m11*q>>16
+ psignw m8, m9 ; m8 = reinsert sign
+ psignw m13, m10 ; m13 = reinsert sign
+ mova [r3q+ncoeffq*2+ 0], m8
+ mova [r3q+ncoeffq*2+16], m13
+%ifidn %1, fp_32x32
+ pabsw m8, m8
+ pabsw m13, m13
+%endif
+ pmullw m8, m3 ; r4[i] = r3[i] * q
+ punpckhqdq m3, m3
+ pmullw m13, m3 ; r4[i] = r3[i] * q
+%ifidn %1, fp_32x32
+ psrlw m8, 1
+ psrlw m13, 1
+ psignw m8, m9
+ psignw m13, m10
+ psrlw m0, m3, 2
+%else
+ psrlw m0, m3, 1
+%endif
+ mova [r4q+ncoeffq*2+ 0], m8
+ mova [r4q+ncoeffq*2+16], m13
+ pcmpeqw m8, m5 ; m8 = c[i] == 0
+ pcmpeqw m13, m5 ; m13 = c[i] == 0
+ mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i]
+ mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i]
+ psubw m6, m7 ; m6 = scan[i] + 1
+ psubw m11, m7 ; m11 = scan[i] + 1
+ pandn m8, m6 ; m8 = max(eob)
+ pandn m13, m11 ; m13 = max(eob)
+ pmaxsw m8, m13
+ add ncoeffq, mmsize
+ jz .accumulate_eob
+
+.ac_only_loop:
+ mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
+ pabsw m6, m9 ; m6 = abs(m9)
+ pabsw m11, m10 ; m11 = abs(m10)
+
+ pcmpgtw m7, m6, m0
+ pcmpgtw m12, m11, m0
+ pmovmskb r6d, m7
+ pmovmskb r2d, m12
+
+ or r6, r2
+ jz .skip_iter
+
+ pcmpeqw m7, m7
+
+ paddsw m6, m1 ; m6 += round
+ paddsw m11, m1 ; m11 += round
+ pmulhw m14, m6, m2 ; m14 = m6*q>>16
+ pmulhw m13, m11, m2 ; m13 = m11*q>>16
+ psignw m14, m9 ; m14 = reinsert sign
+ psignw m13, m10 ; m13 = reinsert sign
+ mova [r3q+ncoeffq*2+ 0], m14
+ mova [r3q+ncoeffq*2+16], m13
+%ifidn %1, fp_32x32
+ pabsw m14, m14
+ pabsw m13, m13
+%endif
+ pmullw m14, m3 ; r4[i] = r3[i] * q
+ pmullw m13, m3 ; r4[i] = r3[i] * q
+%ifidn %1, fp_32x32
+ psrlw m14, 1
+ psrlw m13, 1
+ psignw m14, m9
+ psignw m13, m10
+%endif
+ mova [r4q+ncoeffq*2+ 0], m14
+ mova [r4q+ncoeffq*2+16], m13
+ pcmpeqw m14, m5 ; m14 = c[i] == 0
+ pcmpeqw m13, m5 ; m13 = c[i] == 0
+ mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i]
+ mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i]
+ psubw m6, m7 ; m6 = scan[i] + 1
+ psubw m11, m7 ; m11 = scan[i] + 1
+ pandn m14, m6 ; m14 = max(eob)
+ pandn m13, m11 ; m13 = max(eob)
+ pmaxsw m8, m14
+ pmaxsw m8, m13
+ add ncoeffq, mmsize
+ jl .ac_only_loop
+
+ jmp .accumulate_eob
+.skip_iter:
+ mova [r3q+ncoeffq*2+ 0], m5
+ mova [r3q+ncoeffq*2+16], m5
+ mova [r4q+ncoeffq*2+ 0], m5
+ mova [r4q+ncoeffq*2+16], m5
+ add ncoeffq, mmsize
+ jl .ac_only_loop
+
+.accumulate_eob:
+ ; horizontally accumulate/max eobs and write into [eob] memory pointer
+ mov r2, eobmp
+ pshufd m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0x1
+ pmaxsw m8, m7
+ pextrw r6, m8, 0
+ mov [r2], r6
+ RET
+
+ ; skip-block, i.e. just write all zeroes
+.blank:
+ mov r0, dqcoeffmp
+ movifnidn ncoeffq, ncoeffmp
+ mov r2, qcoeffmp
+ mov r3, eobmp
+
+ lea r0q, [r0q+ncoeffq*2]
+ lea r2q, [r2q+ncoeffq*2]
+ neg ncoeffq
+ pxor m7, m7
+.blank_loop:
+ mova [r0q+ncoeffq*2+ 0], m7
+ mova [r0q+ncoeffq*2+16], m7
+ mova [r2q+ncoeffq*2+ 0], m7
+ mova [r2q+ncoeffq*2+16], m7
+ add ncoeffq, mmsize
+ jl .blank_loop
+ mov word [r3q], 0
+ RET
+%endmacro
+
+INIT_XMM ssse3
+QUANTIZE_FP fp, 7
+QUANTIZE_FP fp_32x32, 7
diff --git a/av1/encoder/x86/ssim_opt_x86_64.asm b/av1/encoder/x86/ssim_opt_x86_64.asm
new file mode 100644
index 0000000..29659ee
--- /dev/null
+++ b/av1/encoder/x86/ssim_opt_x86_64.asm
@@ -0,0 +1,216 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "aom_ports/x86_abi_support.asm"
+
+; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr
+%macro TABULATE_SSIM 0
+ paddusw xmm15, xmm3 ; sum_s
+ paddusw xmm14, xmm4 ; sum_r
+ movdqa xmm1, xmm3
+ pmaddwd xmm1, xmm1
+ paddd xmm13, xmm1 ; sum_sq_s
+ movdqa xmm2, xmm4
+ pmaddwd xmm2, xmm2
+ paddd xmm12, xmm2 ; sum_sq_r
+ pmaddwd xmm3, xmm4
+ paddd xmm11, xmm3 ; sum_sxr
+%endmacro
+
+; Sum across the register %1 starting with q words
+%macro SUM_ACROSS_Q 1
+ movdqa xmm2,%1
+ punpckldq %1,xmm0
+ punpckhdq xmm2,xmm0
+ paddq %1,xmm2
+ movdqa xmm2,%1
+ punpcklqdq %1,xmm0
+ punpckhqdq xmm2,xmm0
+ paddq %1,xmm2
+%endmacro
+
+; Sum across the register %1 starting with q words
+%macro SUM_ACROSS_W 1
+ movdqa xmm1, %1
+ punpcklwd %1,xmm0
+ punpckhwd xmm1,xmm0
+ paddd %1, xmm1
+ SUM_ACROSS_Q %1
+%endmacro
+;void ssim_parms_sse2(
+; unsigned char *s,
+; int sp,
+; unsigned char *r,
+; int rp
+; unsigned long *sum_s,
+; unsigned long *sum_r,
+; unsigned long *sum_sq_s,
+; unsigned long *sum_sq_r,
+; unsigned long *sum_sxr);
+;
+; TODO: Use parm passing through structure, probably don't need the pxors
+; ( calling app will initialize to 0 ) could easily fit everything in sse2
+; without too much hastle, and can probably do better estimates with psadw
+; or pavgb At this point this is just meant to be first pass for calculating
+; all the parms needed for 16x16 ssim so we can play with dssim as distortion
+; in mode selection code.
+global sym(vp10_ssim_parms_16x16_sse2) PRIVATE
+sym(vp10_ssim_parms_16x16_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 9
+ SAVE_XMM 15
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;s
+ mov rcx, arg(1) ;sp
+ mov rdi, arg(2) ;r
+ mov rax, arg(3) ;rp
+
+ pxor xmm0, xmm0
+ pxor xmm15,xmm15 ;sum_s
+ pxor xmm14,xmm14 ;sum_r
+ pxor xmm13,xmm13 ;sum_sq_s
+ pxor xmm12,xmm12 ;sum_sq_r
+ pxor xmm11,xmm11 ;sum_sxr
+
+ mov rdx, 16 ;row counter
+.NextRow:
+
+ ;grab source and reference pixels
+ movdqu xmm5, [rsi]
+ movdqu xmm6, [rdi]
+ movdqa xmm3, xmm5
+ movdqa xmm4, xmm6
+ punpckhbw xmm3, xmm0 ; high_s
+ punpckhbw xmm4, xmm0 ; high_r
+
+ TABULATE_SSIM
+
+ movdqa xmm3, xmm5
+ movdqa xmm4, xmm6
+ punpcklbw xmm3, xmm0 ; low_s
+ punpcklbw xmm4, xmm0 ; low_r
+
+ TABULATE_SSIM
+
+ add rsi, rcx ; next s row
+ add rdi, rax ; next r row
+
+ dec rdx ; counter
+ jnz .NextRow
+
+ SUM_ACROSS_W xmm15
+ SUM_ACROSS_W xmm14
+ SUM_ACROSS_Q xmm13
+ SUM_ACROSS_Q xmm12
+ SUM_ACROSS_Q xmm11
+
+ mov rdi,arg(4)
+ movd [rdi], xmm15;
+ mov rdi,arg(5)
+ movd [rdi], xmm14;
+ mov rdi,arg(6)
+ movd [rdi], xmm13;
+ mov rdi,arg(7)
+ movd [rdi], xmm12;
+ mov rdi,arg(8)
+ movd [rdi], xmm11;
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+;void ssim_parms_sse2(
+; unsigned char *s,
+; int sp,
+; unsigned char *r,
+; int rp
+; unsigned long *sum_s,
+; unsigned long *sum_r,
+; unsigned long *sum_sq_s,
+; unsigned long *sum_sq_r,
+; unsigned long *sum_sxr);
+;
+; TODO: Use parm passing through structure, probably don't need the pxors
+; ( calling app will initialize to 0 ) could easily fit everything in sse2
+; without too much hastle, and can probably do better estimates with psadw
+; or pavgb At this point this is just meant to be first pass for calculating
+; all the parms needed for 16x16 ssim so we can play with dssim as distortion
+; in mode selection code.
+global sym(vp10_ssim_parms_8x8_sse2) PRIVATE
+sym(vp10_ssim_parms_8x8_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 9
+ SAVE_XMM 15
+ push rsi
+ push rdi
+ ; end prolog
+
+ mov rsi, arg(0) ;s
+ mov rcx, arg(1) ;sp
+ mov rdi, arg(2) ;r
+ mov rax, arg(3) ;rp
+
+ pxor xmm0, xmm0
+ pxor xmm15,xmm15 ;sum_s
+ pxor xmm14,xmm14 ;sum_r
+ pxor xmm13,xmm13 ;sum_sq_s
+ pxor xmm12,xmm12 ;sum_sq_r
+ pxor xmm11,xmm11 ;sum_sxr
+
+ mov rdx, 8 ;row counter
+.NextRow:
+
+ ;grab source and reference pixels
+ movq xmm3, [rsi]
+ movq xmm4, [rdi]
+ punpcklbw xmm3, xmm0 ; low_s
+ punpcklbw xmm4, xmm0 ; low_r
+
+ TABULATE_SSIM
+
+ add rsi, rcx ; next s row
+ add rdi, rax ; next r row
+
+ dec rdx ; counter
+ jnz .NextRow
+
+ SUM_ACROSS_W xmm15
+ SUM_ACROSS_W xmm14
+ SUM_ACROSS_Q xmm13
+ SUM_ACROSS_Q xmm12
+ SUM_ACROSS_Q xmm11
+
+ mov rdi,arg(4)
+ movd [rdi], xmm15;
+ mov rdi,arg(5)
+ movd [rdi], xmm14;
+ mov rdi,arg(6)
+ movd [rdi], xmm13;
+ mov rdi,arg(7)
+ movd [rdi], xmm12;
+ mov rdi,arg(8)
+ movd [rdi], xmm11;
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
diff --git a/av1/encoder/x86/temporal_filter_apply_sse2.asm b/av1/encoder/x86/temporal_filter_apply_sse2.asm
new file mode 100644
index 0000000..eabe575
--- /dev/null
+++ b/av1/encoder/x86/temporal_filter_apply_sse2.asm
@@ -0,0 +1,212 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "aom_ports/x86_abi_support.asm"
+
+; void vp10_temporal_filter_apply_sse2 | arg
+; (unsigned char *frame1, | 0
+; unsigned int stride, | 1
+; unsigned char *frame2, | 2
+; unsigned int block_width, | 3
+; unsigned int block_height, | 4
+; int strength, | 5
+; int filter_weight, | 6
+; unsigned int *accumulator, | 7
+; unsigned short *count) | 8
+global sym(vp10_temporal_filter_apply_sse2) PRIVATE
+sym(vp10_temporal_filter_apply_sse2):
+
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 9
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ALIGN_STACK 16, rax
+ %define block_width 0
+ %define block_height 16
+ %define strength 32
+ %define filter_weight 48
+ %define rounding_bit 64
+ %define rbp_backup 80
+ %define stack_size 96
+ sub rsp, stack_size
+ mov [rsp + rbp_backup], rbp
+ ; end prolog
+
+ mov edx, arg(3)
+ mov [rsp + block_width], rdx
+ mov edx, arg(4)
+ mov [rsp + block_height], rdx
+ movd xmm6, arg(5)
+ movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read
+
+ ; calculate the rounding bit outside the loop
+ ; 0x8000 >> (16 - strength)
+ mov rdx, 16
+ sub rdx, arg(5) ; 16 - strength
+ movq xmm4, rdx ; can't use rdx w/ shift
+ movdqa xmm5, [GLOBAL(_const_top_bit)]
+ psrlw xmm5, xmm4
+ movdqa [rsp + rounding_bit], xmm5
+
+ mov rsi, arg(0) ; src/frame1
+ mov rdx, arg(2) ; predictor frame
+ mov rdi, arg(7) ; accumulator
+ mov rax, arg(8) ; count
+
+ ; dup the filter weight and store for later
+ movd xmm0, arg(6) ; filter_weight
+ pshuflw xmm0, xmm0, 0
+ punpcklwd xmm0, xmm0
+ movdqa [rsp + filter_weight], xmm0
+
+ mov rbp, arg(1) ; stride
+ pxor xmm7, xmm7 ; zero for extraction
+
+ mov rcx, [rsp + block_width]
+ imul rcx, [rsp + block_height]
+ add rcx, rdx
+ cmp dword ptr [rsp + block_width], 8
+ jne .temporal_filter_apply_load_16
+
+.temporal_filter_apply_load_8:
+ movq xmm0, [rsi] ; first row
+ lea rsi, [rsi + rbp] ; += stride
+ punpcklbw xmm0, xmm7 ; src[ 0- 7]
+ movq xmm1, [rsi] ; second row
+ lea rsi, [rsi + rbp] ; += stride
+ punpcklbw xmm1, xmm7 ; src[ 8-15]
+ jmp .temporal_filter_apply_load_finished
+
+.temporal_filter_apply_load_16:
+ movdqa xmm0, [rsi] ; src (frame1)
+ lea rsi, [rsi + rbp] ; += stride
+ movdqa xmm1, xmm0
+ punpcklbw xmm0, xmm7 ; src[ 0- 7]
+ punpckhbw xmm1, xmm7 ; src[ 8-15]
+
+.temporal_filter_apply_load_finished:
+ movdqa xmm2, [rdx] ; predictor (frame2)
+ movdqa xmm3, xmm2
+ punpcklbw xmm2, xmm7 ; pred[ 0- 7]
+ punpckhbw xmm3, xmm7 ; pred[ 8-15]
+
+ ; modifier = src_byte - pixel_value
+ psubw xmm0, xmm2 ; src - pred[ 0- 7]
+ psubw xmm1, xmm3 ; src - pred[ 8-15]
+
+ ; modifier *= modifier
+ pmullw xmm0, xmm0 ; modifer[ 0- 7]^2
+ pmullw xmm1, xmm1 ; modifer[ 8-15]^2
+
+ ; modifier *= 3
+ pmullw xmm0, [GLOBAL(_const_3w)]
+ pmullw xmm1, [GLOBAL(_const_3w)]
+
+ ; modifer += 0x8000 >> (16 - strength)
+ paddw xmm0, [rsp + rounding_bit]
+ paddw xmm1, [rsp + rounding_bit]
+
+ ; modifier >>= strength
+ psrlw xmm0, [rsp + strength]
+ psrlw xmm1, [rsp + strength]
+
+ ; modifier = 16 - modifier
+ ; saturation takes care of modifier > 16
+ movdqa xmm3, [GLOBAL(_const_16w)]
+ movdqa xmm2, [GLOBAL(_const_16w)]
+ psubusw xmm3, xmm1
+ psubusw xmm2, xmm0
+
+ ; modifier *= filter_weight
+ pmullw xmm2, [rsp + filter_weight]
+ pmullw xmm3, [rsp + filter_weight]
+
+ ; count
+ movdqa xmm4, [rax]
+ movdqa xmm5, [rax+16]
+ ; += modifier
+ paddw xmm4, xmm2
+ paddw xmm5, xmm3
+ ; write back
+ movdqa [rax], xmm4
+ movdqa [rax+16], xmm5
+ lea rax, [rax + 16*2] ; count += 16*(sizeof(short))
+
+ ; load and extract the predictor up to shorts
+ pxor xmm7, xmm7
+ movdqa xmm0, [rdx]
+ lea rdx, [rdx + 16*1] ; pred += 16*(sizeof(char))
+ movdqa xmm1, xmm0
+ punpcklbw xmm0, xmm7 ; pred[ 0- 7]
+ punpckhbw xmm1, xmm7 ; pred[ 8-15]
+
+ ; modifier *= pixel_value
+ pmullw xmm0, xmm2
+ pmullw xmm1, xmm3
+
+ ; expand to double words
+ movdqa xmm2, xmm0
+ punpcklwd xmm0, xmm7 ; [ 0- 3]
+ punpckhwd xmm2, xmm7 ; [ 4- 7]
+ movdqa xmm3, xmm1
+ punpcklwd xmm1, xmm7 ; [ 8-11]
+ punpckhwd xmm3, xmm7 ; [12-15]
+
+ ; accumulator
+ movdqa xmm4, [rdi]
+ movdqa xmm5, [rdi+16]
+ movdqa xmm6, [rdi+32]
+ movdqa xmm7, [rdi+48]
+ ; += modifier
+ paddd xmm4, xmm0
+ paddd xmm5, xmm2
+ paddd xmm6, xmm1
+ paddd xmm7, xmm3
+ ; write back
+ movdqa [rdi], xmm4
+ movdqa [rdi+16], xmm5
+ movdqa [rdi+32], xmm6
+ movdqa [rdi+48], xmm7
+ lea rdi, [rdi + 16*4] ; accumulator += 16*(sizeof(int))
+
+ cmp rdx, rcx
+ je .temporal_filter_apply_epilog
+ pxor xmm7, xmm7 ; zero for extraction
+ cmp dword ptr [rsp + block_width], 16
+ je .temporal_filter_apply_load_16
+ jmp .temporal_filter_apply_load_8
+
+.temporal_filter_apply_epilog:
+ ; begin epilog
+ mov rbp, [rsp + rbp_backup]
+ add rsp, stack_size
+ pop rsp
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+align 16
+_const_3w:
+ times 8 dw 3
+align 16
+_const_top_bit:
+ times 8 dw 1<<15
+align 16
+_const_16w
+ times 8 dw 16
diff --git a/av1/encoder/x86/vp10_highbd_quantize_sse4.c b/av1/encoder/x86/vp10_highbd_quantize_sse4.c
new file mode 100644
index 0000000..8b05c6a
--- /dev/null
+++ b/av1/encoder/x86/vp10_highbd_quantize_sse4.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <smmintrin.h>
+#include <stdint.h>
+
+#include "./vp10_rtcd.h"
+#include "aom_dsp/vpx_dsp_common.h"
+
+// Coefficient quantization phase 1
+// param[0-2] : rounding/quan/dequan constants
+static INLINE void quantize_coeff_phase1(__m128i *coeff, const __m128i *param,
+ const int shift, const int scale,
+ __m128i *qcoeff, __m128i *dquan,
+ __m128i *sign) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i one = _mm_set1_epi32(1);
+
+ *sign = _mm_cmplt_epi32(*coeff, zero);
+ *sign = _mm_or_si128(*sign, one);
+ *coeff = _mm_abs_epi32(*coeff);
+
+ qcoeff[0] = _mm_add_epi32(*coeff, param[0]);
+ qcoeff[1] = _mm_unpackhi_epi32(qcoeff[0], zero);
+ qcoeff[0] = _mm_unpacklo_epi32(qcoeff[0], zero);
+
+ qcoeff[0] = _mm_mul_epi32(qcoeff[0], param[1]);
+ qcoeff[0] = _mm_srli_epi64(qcoeff[0], shift);
+ dquan[0] = _mm_mul_epi32(qcoeff[0], param[2]);
+ dquan[0] = _mm_srli_epi64(dquan[0], scale);
+}
+
+// Coefficient quantization phase 2
+static INLINE void quantize_coeff_phase2(__m128i *qcoeff, __m128i *dquan,
+ const __m128i *sign,
+ const __m128i *param, const int shift,
+ const int scale, tran_low_t *qAddr,
+ tran_low_t *dqAddr) {
+ __m128i mask0L = _mm_set_epi32(-1, -1, 0, 0);
+ __m128i mask0H = _mm_set_epi32(0, 0, -1, -1);
+
+ qcoeff[1] = _mm_mul_epi32(qcoeff[1], param[1]);
+ qcoeff[1] = _mm_srli_epi64(qcoeff[1], shift);
+ dquan[1] = _mm_mul_epi32(qcoeff[1], param[2]);
+ dquan[1] = _mm_srli_epi64(dquan[1], scale);
+
+ // combine L&H
+ qcoeff[0] = _mm_shuffle_epi32(qcoeff[0], 0xd8);
+ qcoeff[1] = _mm_shuffle_epi32(qcoeff[1], 0x8d);
+
+ qcoeff[0] = _mm_and_si128(qcoeff[0], mask0H);
+ qcoeff[1] = _mm_and_si128(qcoeff[1], mask0L);
+
+ dquan[0] = _mm_shuffle_epi32(dquan[0], 0xd8);
+ dquan[1] = _mm_shuffle_epi32(dquan[1], 0x8d);
+
+ dquan[0] = _mm_and_si128(dquan[0], mask0H);
+ dquan[1] = _mm_and_si128(dquan[1], mask0L);
+
+ qcoeff[0] = _mm_or_si128(qcoeff[0], qcoeff[1]);
+ dquan[0] = _mm_or_si128(dquan[0], dquan[1]);
+
+ qcoeff[0] = _mm_sign_epi32(qcoeff[0], *sign);
+ dquan[0] = _mm_sign_epi32(dquan[0], *sign);
+
+ _mm_storeu_si128((__m128i *)qAddr, qcoeff[0]);
+ _mm_storeu_si128((__m128i *)dqAddr, dquan[0]);
+}
+
+static INLINE void find_eob(tran_low_t *qcoeff_ptr, const int16_t *iscan,
+ __m128i *eob) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i mask, iscanIdx;
+ const __m128i q0 = _mm_loadu_si128((__m128i const *)qcoeff_ptr);
+ const __m128i q1 = _mm_loadu_si128((__m128i const *)(qcoeff_ptr + 4));
+ __m128i nz_flag0 = _mm_cmpeq_epi32(q0, zero);
+ __m128i nz_flag1 = _mm_cmpeq_epi32(q1, zero);
+
+ nz_flag0 = _mm_cmpeq_epi32(nz_flag0, zero);
+ nz_flag1 = _mm_cmpeq_epi32(nz_flag1, zero);
+
+ mask = _mm_packs_epi32(nz_flag0, nz_flag1);
+ iscanIdx = _mm_loadu_si128((__m128i const *)iscan);
+ iscanIdx = _mm_sub_epi16(iscanIdx, mask);
+ iscanIdx = _mm_and_si128(iscanIdx, mask);
+ *eob = _mm_max_epi16(*eob, iscanIdx);
+}
+
+static INLINE uint16_t get_accumulated_eob(__m128i *eob) {
+ __m128i eob_shuffled;
+ uint16_t eobValue;
+ eob_shuffled = _mm_shuffle_epi32(*eob, 0xe);
+ *eob = _mm_max_epi16(*eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(*eob, 0xe);
+ *eob = _mm_max_epi16(*eob, eob_shuffled);
+ eob_shuffled = _mm_shufflelo_epi16(*eob, 0x1);
+ *eob = _mm_max_epi16(*eob, eob_shuffled);
+ eobValue = _mm_extract_epi16(*eob, 0);
+ return eobValue;
+}
+
+void vp10_highbd_quantize_fp_sse4_1(
+ const tran_low_t *coeff_ptr, intptr_t count, int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan, int log_scale) {
+ __m128i coeff[2], qcoeff[2], dequant[2], qparam[3], coeff_sign;
+ __m128i eob = _mm_setzero_si128();
+ const tran_low_t *src = coeff_ptr;
+ tran_low_t *quanAddr = qcoeff_ptr;
+ tran_low_t *dquanAddr = dqcoeff_ptr;
+ const int shift = 16 - log_scale;
+ const int coeff_stride = 4;
+ const int quan_stride = coeff_stride;
+ (void)skip_block;
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)scan;
+
+ memset(quanAddr, 0, count * sizeof(quanAddr[0]));
+ memset(dquanAddr, 0, count * sizeof(dquanAddr[0]));
+
+ if (!skip_block) {
+ coeff[0] = _mm_loadu_si128((__m128i const *)src);
+
+ qparam[0] =
+ _mm_set_epi32(round_ptr[1], round_ptr[1], round_ptr[1], round_ptr[0]);
+ qparam[1] = _mm_set_epi64x(quant_ptr[1], quant_ptr[0]);
+ qparam[2] = _mm_set_epi64x(dequant_ptr[1], dequant_ptr[0]);
+
+ // DC and first 3 AC
+ quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff, dequant,
+ &coeff_sign);
+
+ // update round/quan/dquan for AC
+ qparam[0] = _mm_unpackhi_epi64(qparam[0], qparam[0]);
+ qparam[1] = _mm_set_epi64x(quant_ptr[1], quant_ptr[1]);
+ qparam[2] = _mm_set_epi64x(dequant_ptr[1], dequant_ptr[1]);
+
+ quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
+ log_scale, quanAddr, dquanAddr);
+
+ // next 4 AC
+ coeff[1] = _mm_loadu_si128((__m128i const *)(src + coeff_stride));
+ quantize_coeff_phase1(&coeff[1], qparam, shift, log_scale, qcoeff, dequant,
+ &coeff_sign);
+ quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
+ log_scale, quanAddr + quan_stride,
+ dquanAddr + quan_stride);
+
+ find_eob(quanAddr, iscan, &eob);
+
+ count -= 8;
+
+ // loop for the rest of AC
+ while (count > 0) {
+ src += coeff_stride << 1;
+ quanAddr += quan_stride << 1;
+ dquanAddr += quan_stride << 1;
+ iscan += quan_stride << 1;
+
+ coeff[0] = _mm_loadu_si128((__m128i const *)src);
+ coeff[1] = _mm_loadu_si128((__m128i const *)(src + coeff_stride));
+
+ quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff,
+ dequant, &coeff_sign);
+ quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
+ log_scale, quanAddr, dquanAddr);
+
+ quantize_coeff_phase1(&coeff[1], qparam, shift, log_scale, qcoeff,
+ dequant, &coeff_sign);
+ quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift,
+ log_scale, quanAddr + quan_stride,
+ dquanAddr + quan_stride);
+
+ find_eob(quanAddr, iscan, &eob);
+
+ count -= 8;
+ }
+ *eob_ptr = get_accumulated_eob(&eob);
+ } else {
+ *eob_ptr = 0;
+ }
+}
diff --git a/av1/encoder/x86/wedge_utils_sse2.c b/av1/encoder/x86/wedge_utils_sse2.c
new file mode 100644
index 0000000..a6be947
--- /dev/null
+++ b/av1/encoder/x86/wedge_utils_sse2.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <immintrin.h>
+
+#include "aom_dsp/x86/synonyms.h"
+
+#include "aom/vpx_integer.h"
+
+#include "av1/common/reconinter.h"
+
+#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
+
+/**
+ * See vp10_wedge_sse_from_residuals_c
+ */
+uint64_t vp10_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d,
+ const uint8_t *m, int N) {
+ int n = -N;
+ int n8 = n + 8;
+
+ uint64_t csse;
+
+ const __m128i v_mask_max_w = _mm_set1_epi16(MAX_MASK_VALUE);
+ const __m128i v_zext_q = _mm_set_epi32(0, 0xffffffff, 0, 0xffffffff);
+
+ __m128i v_acc0_q = _mm_setzero_si128();
+
+ assert(N % 64 == 0);
+
+ r1 += N;
+ d += N;
+ m += N;
+
+ do {
+ const __m128i v_r0_w = xx_load_128(r1 + n);
+ const __m128i v_r1_w = xx_load_128(r1 + n8);
+ const __m128i v_d0_w = xx_load_128(d + n);
+ const __m128i v_d1_w = xx_load_128(d + n8);
+ const __m128i v_m01_b = xx_load_128(m + n);
+
+ const __m128i v_rd0l_w = _mm_unpacklo_epi16(v_d0_w, v_r0_w);
+ const __m128i v_rd0h_w = _mm_unpackhi_epi16(v_d0_w, v_r0_w);
+ const __m128i v_rd1l_w = _mm_unpacklo_epi16(v_d1_w, v_r1_w);
+ const __m128i v_rd1h_w = _mm_unpackhi_epi16(v_d1_w, v_r1_w);
+ const __m128i v_m0_w = _mm_unpacklo_epi8(v_m01_b, _mm_setzero_si128());
+ const __m128i v_m1_w = _mm_unpackhi_epi8(v_m01_b, _mm_setzero_si128());
+
+ const __m128i v_m0l_w = _mm_unpacklo_epi16(v_m0_w, v_mask_max_w);
+ const __m128i v_m0h_w = _mm_unpackhi_epi16(v_m0_w, v_mask_max_w);
+ const __m128i v_m1l_w = _mm_unpacklo_epi16(v_m1_w, v_mask_max_w);
+ const __m128i v_m1h_w = _mm_unpackhi_epi16(v_m1_w, v_mask_max_w);
+
+ const __m128i v_t0l_d = _mm_madd_epi16(v_rd0l_w, v_m0l_w);
+ const __m128i v_t0h_d = _mm_madd_epi16(v_rd0h_w, v_m0h_w);
+ const __m128i v_t1l_d = _mm_madd_epi16(v_rd1l_w, v_m1l_w);
+ const __m128i v_t1h_d = _mm_madd_epi16(v_rd1h_w, v_m1h_w);
+
+ const __m128i v_t0_w = _mm_packs_epi32(v_t0l_d, v_t0h_d);
+ const __m128i v_t1_w = _mm_packs_epi32(v_t1l_d, v_t1h_d);
+
+ const __m128i v_sq0_d = _mm_madd_epi16(v_t0_w, v_t0_w);
+ const __m128i v_sq1_d = _mm_madd_epi16(v_t1_w, v_t1_w);
+
+ const __m128i v_sum0_q = _mm_add_epi64(_mm_and_si128(v_sq0_d, v_zext_q),
+ _mm_srli_epi64(v_sq0_d, 32));
+ const __m128i v_sum1_q = _mm_add_epi64(_mm_and_si128(v_sq1_d, v_zext_q),
+ _mm_srli_epi64(v_sq1_d, 32));
+
+ v_acc0_q = _mm_add_epi64(v_acc0_q, v_sum0_q);
+ v_acc0_q = _mm_add_epi64(v_acc0_q, v_sum1_q);
+
+ n8 += 16;
+ n += 16;
+ } while (n);
+
+ v_acc0_q = _mm_add_epi64(v_acc0_q, _mm_srli_si128(v_acc0_q, 8));
+
+#if ARCH_X86_64
+ csse = (uint64_t)_mm_cvtsi128_si64(v_acc0_q);
+#else
+ xx_storel_64(&csse, v_acc0_q);
+#endif
+
+ return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
+}
+
+/**
+ * See vp10_wedge_sign_from_residuals_c
+ */
+int vp10_wedge_sign_from_residuals_sse2(const int16_t *ds, const uint8_t *m,
+ int N, int64_t limit) {
+ int64_t acc;
+
+ __m128i v_sign_d;
+ __m128i v_acc0_d = _mm_setzero_si128();
+ __m128i v_acc1_d = _mm_setzero_si128();
+ __m128i v_acc_q;
+
+ // Input size limited to 8192 by the use of 32 bit accumulators and m
+ // being between [0, 64]. Overflow might happen at larger sizes,
+ // though it is practically impossible on real video input.
+ assert(N < 8192);
+ assert(N % 64 == 0);
+
+ do {
+ const __m128i v_m01_b = xx_load_128(m);
+ const __m128i v_m23_b = xx_load_128(m + 16);
+ const __m128i v_m45_b = xx_load_128(m + 32);
+ const __m128i v_m67_b = xx_load_128(m + 48);
+
+ const __m128i v_d0_w = xx_load_128(ds);
+ const __m128i v_d1_w = xx_load_128(ds + 8);
+ const __m128i v_d2_w = xx_load_128(ds + 16);
+ const __m128i v_d3_w = xx_load_128(ds + 24);
+ const __m128i v_d4_w = xx_load_128(ds + 32);
+ const __m128i v_d5_w = xx_load_128(ds + 40);
+ const __m128i v_d6_w = xx_load_128(ds + 48);
+ const __m128i v_d7_w = xx_load_128(ds + 56);
+
+ const __m128i v_m0_w = _mm_unpacklo_epi8(v_m01_b, _mm_setzero_si128());
+ const __m128i v_m1_w = _mm_unpackhi_epi8(v_m01_b, _mm_setzero_si128());
+ const __m128i v_m2_w = _mm_unpacklo_epi8(v_m23_b, _mm_setzero_si128());
+ const __m128i v_m3_w = _mm_unpackhi_epi8(v_m23_b, _mm_setzero_si128());
+ const __m128i v_m4_w = _mm_unpacklo_epi8(v_m45_b, _mm_setzero_si128());
+ const __m128i v_m5_w = _mm_unpackhi_epi8(v_m45_b, _mm_setzero_si128());
+ const __m128i v_m6_w = _mm_unpacklo_epi8(v_m67_b, _mm_setzero_si128());
+ const __m128i v_m7_w = _mm_unpackhi_epi8(v_m67_b, _mm_setzero_si128());
+
+ const __m128i v_p0_d = _mm_madd_epi16(v_d0_w, v_m0_w);
+ const __m128i v_p1_d = _mm_madd_epi16(v_d1_w, v_m1_w);
+ const __m128i v_p2_d = _mm_madd_epi16(v_d2_w, v_m2_w);
+ const __m128i v_p3_d = _mm_madd_epi16(v_d3_w, v_m3_w);
+ const __m128i v_p4_d = _mm_madd_epi16(v_d4_w, v_m4_w);
+ const __m128i v_p5_d = _mm_madd_epi16(v_d5_w, v_m5_w);
+ const __m128i v_p6_d = _mm_madd_epi16(v_d6_w, v_m6_w);
+ const __m128i v_p7_d = _mm_madd_epi16(v_d7_w, v_m7_w);
+
+ const __m128i v_p01_d = _mm_add_epi32(v_p0_d, v_p1_d);
+ const __m128i v_p23_d = _mm_add_epi32(v_p2_d, v_p3_d);
+ const __m128i v_p45_d = _mm_add_epi32(v_p4_d, v_p5_d);
+ const __m128i v_p67_d = _mm_add_epi32(v_p6_d, v_p7_d);
+
+ const __m128i v_p0123_d = _mm_add_epi32(v_p01_d, v_p23_d);
+ const __m128i v_p4567_d = _mm_add_epi32(v_p45_d, v_p67_d);
+
+ v_acc0_d = _mm_add_epi32(v_acc0_d, v_p0123_d);
+ v_acc1_d = _mm_add_epi32(v_acc1_d, v_p4567_d);
+
+ ds += 64;
+ m += 64;
+
+ N -= 64;
+ } while (N);
+
+ v_sign_d = _mm_cmplt_epi32(v_acc0_d, _mm_setzero_si128());
+ v_acc0_d = _mm_add_epi64(_mm_unpacklo_epi32(v_acc0_d, v_sign_d),
+ _mm_unpackhi_epi32(v_acc0_d, v_sign_d));
+
+ v_sign_d = _mm_cmplt_epi32(v_acc1_d, _mm_setzero_si128());
+ v_acc1_d = _mm_add_epi64(_mm_unpacklo_epi32(v_acc1_d, v_sign_d),
+ _mm_unpackhi_epi32(v_acc1_d, v_sign_d));
+
+ v_acc_q = _mm_add_epi64(v_acc0_d, v_acc1_d);
+
+ v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8));
+
+#if ARCH_X86_64
+ acc = (uint64_t)_mm_cvtsi128_si64(v_acc_q);
+#else
+ xx_storel_64(&acc, v_acc_q);
+#endif
+
+ return acc > limit;
+}
+
+// Negate under mask
+static INLINE __m128i negm_epi16(__m128i v_v_w, __m128i v_mask_w) {
+ return _mm_sub_epi16(_mm_xor_si128(v_v_w, v_mask_w), v_mask_w);
+}
+
+/**
+ * vp10_wedge_compute_delta_squares_c
+ */
+void vp10_wedge_compute_delta_squares_sse2(int16_t *d, const int16_t *a,
+ const int16_t *b, int N) {
+ const __m128i v_neg_w =
+ _mm_set_epi16(0xffff, 0, 0xffff, 0, 0xffff, 0, 0xffff, 0);
+
+ assert(N % 64 == 0);
+
+ do {
+ const __m128i v_a0_w = xx_load_128(a);
+ const __m128i v_b0_w = xx_load_128(b);
+ const __m128i v_a1_w = xx_load_128(a + 8);
+ const __m128i v_b1_w = xx_load_128(b + 8);
+ const __m128i v_a2_w = xx_load_128(a + 16);
+ const __m128i v_b2_w = xx_load_128(b + 16);
+ const __m128i v_a3_w = xx_load_128(a + 24);
+ const __m128i v_b3_w = xx_load_128(b + 24);
+
+ const __m128i v_ab0l_w = _mm_unpacklo_epi16(v_a0_w, v_b0_w);
+ const __m128i v_ab0h_w = _mm_unpackhi_epi16(v_a0_w, v_b0_w);
+ const __m128i v_ab1l_w = _mm_unpacklo_epi16(v_a1_w, v_b1_w);
+ const __m128i v_ab1h_w = _mm_unpackhi_epi16(v_a1_w, v_b1_w);
+ const __m128i v_ab2l_w = _mm_unpacklo_epi16(v_a2_w, v_b2_w);
+ const __m128i v_ab2h_w = _mm_unpackhi_epi16(v_a2_w, v_b2_w);
+ const __m128i v_ab3l_w = _mm_unpacklo_epi16(v_a3_w, v_b3_w);
+ const __m128i v_ab3h_w = _mm_unpackhi_epi16(v_a3_w, v_b3_w);
+
+ // Negate top word of pairs
+ const __m128i v_abl0n_w = negm_epi16(v_ab0l_w, v_neg_w);
+ const __m128i v_abh0n_w = negm_epi16(v_ab0h_w, v_neg_w);
+ const __m128i v_abl1n_w = negm_epi16(v_ab1l_w, v_neg_w);
+ const __m128i v_abh1n_w = negm_epi16(v_ab1h_w, v_neg_w);
+ const __m128i v_abl2n_w = negm_epi16(v_ab2l_w, v_neg_w);
+ const __m128i v_abh2n_w = negm_epi16(v_ab2h_w, v_neg_w);
+ const __m128i v_abl3n_w = negm_epi16(v_ab3l_w, v_neg_w);
+ const __m128i v_abh3n_w = negm_epi16(v_ab3h_w, v_neg_w);
+
+ const __m128i v_r0l_w = _mm_madd_epi16(v_ab0l_w, v_abl0n_w);
+ const __m128i v_r0h_w = _mm_madd_epi16(v_ab0h_w, v_abh0n_w);
+ const __m128i v_r1l_w = _mm_madd_epi16(v_ab1l_w, v_abl1n_w);
+ const __m128i v_r1h_w = _mm_madd_epi16(v_ab1h_w, v_abh1n_w);
+ const __m128i v_r2l_w = _mm_madd_epi16(v_ab2l_w, v_abl2n_w);
+ const __m128i v_r2h_w = _mm_madd_epi16(v_ab2h_w, v_abh2n_w);
+ const __m128i v_r3l_w = _mm_madd_epi16(v_ab3l_w, v_abl3n_w);
+ const __m128i v_r3h_w = _mm_madd_epi16(v_ab3h_w, v_abh3n_w);
+
+ const __m128i v_r0_w = _mm_packs_epi32(v_r0l_w, v_r0h_w);
+ const __m128i v_r1_w = _mm_packs_epi32(v_r1l_w, v_r1h_w);
+ const __m128i v_r2_w = _mm_packs_epi32(v_r2l_w, v_r2h_w);
+ const __m128i v_r3_w = _mm_packs_epi32(v_r3l_w, v_r3h_w);
+
+ xx_store_128(d, v_r0_w);
+ xx_store_128(d + 8, v_r1_w);
+ xx_store_128(d + 16, v_r2_w);
+ xx_store_128(d + 24, v_r3_w);
+
+ a += 32;
+ b += 32;
+ d += 32;
+ N -= 32;
+ } while (N);
+}
diff --git a/av1/exports_dec b/av1/exports_dec
new file mode 100644
index 0000000..71c8369
--- /dev/null
+++ b/av1/exports_dec
@@ -0,0 +1,2 @@
+data vpx_codec_vp10_dx_algo
+text vpx_codec_vp10_dx
diff --git a/av1/exports_enc b/av1/exports_enc
new file mode 100644
index 0000000..d1644f2
--- /dev/null
+++ b/av1/exports_enc
@@ -0,0 +1,2 @@
+data vpx_codec_vp10_cx_algo
+text vpx_codec_vp10_cx
diff --git a/av1/vp10_common.mk b/av1/vp10_common.mk
new file mode 100644
index 0000000..e776a8a
--- /dev/null
+++ b/av1/vp10_common.mk
@@ -0,0 +1,139 @@
+##
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+VP10_COMMON_SRCS-yes += vp10_common.mk
+VP10_COMMON_SRCS-yes += vp10_iface_common.h
+VP10_COMMON_SRCS-yes += common/ans.h
+VP10_COMMON_SRCS-yes += common/alloccommon.c
+VP10_COMMON_SRCS-yes += common/blockd.c
+VP10_COMMON_SRCS-yes += common/debugmodes.c
+VP10_COMMON_SRCS-yes += common/divide.h
+VP10_COMMON_SRCS-yes += common/entropy.c
+VP10_COMMON_SRCS-yes += common/entropymode.c
+VP10_COMMON_SRCS-yes += common/entropymv.c
+VP10_COMMON_SRCS-yes += common/frame_buffers.c
+VP10_COMMON_SRCS-yes += common/frame_buffers.h
+VP10_COMMON_SRCS-yes += common/alloccommon.h
+VP10_COMMON_SRCS-yes += common/blockd.h
+VP10_COMMON_SRCS-yes += common/common.h
+VP10_COMMON_SRCS-yes += common/entropy.h
+VP10_COMMON_SRCS-yes += common/entropymode.h
+VP10_COMMON_SRCS-yes += common/entropymv.h
+VP10_COMMON_SRCS-yes += common/enums.h
+VP10_COMMON_SRCS-yes += common/filter.h
+VP10_COMMON_SRCS-yes += common/filter.c
+VP10_COMMON_SRCS-yes += common/idct.h
+VP10_COMMON_SRCS-yes += common/idct.c
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm.h
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm.c
+VP10_COMMON_SRCS-yes += common/loopfilter.h
+VP10_COMMON_SRCS-yes += common/thread_common.h
+VP10_COMMON_SRCS-yes += common/mv.h
+VP10_COMMON_SRCS-yes += common/onyxc_int.h
+VP10_COMMON_SRCS-yes += common/pred_common.h
+VP10_COMMON_SRCS-yes += common/pred_common.c
+VP10_COMMON_SRCS-yes += common/quant_common.h
+VP10_COMMON_SRCS-yes += common/reconinter.h
+VP10_COMMON_SRCS-yes += common/reconintra.h
+VP10_COMMON_SRCS-yes += common/vp10_rtcd.c
+VP10_COMMON_SRCS-yes += common/vp10_rtcd_defs.pl
+VP10_COMMON_SRCS-yes += common/scale.h
+VP10_COMMON_SRCS-yes += common/scale.c
+VP10_COMMON_SRCS-yes += common/seg_common.h
+VP10_COMMON_SRCS-yes += common/seg_common.c
+VP10_COMMON_SRCS-yes += common/tile_common.h
+VP10_COMMON_SRCS-yes += common/tile_common.c
+VP10_COMMON_SRCS-yes += common/loopfilter.c
+VP10_COMMON_SRCS-yes += common/thread_common.c
+VP10_COMMON_SRCS-yes += common/mvref_common.c
+VP10_COMMON_SRCS-yes += common/mvref_common.h
+VP10_COMMON_SRCS-yes += common/quant_common.c
+VP10_COMMON_SRCS-yes += common/reconinter.c
+VP10_COMMON_SRCS-yes += common/reconintra.c
+VP10_COMMON_SRCS-yes += common/restoration.h
+VP10_COMMON_SRCS-yes += common/common_data.h
+VP10_COMMON_SRCS-yes += common/scan.c
+VP10_COMMON_SRCS-yes += common/scan.h
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.h
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.c
+VP10_COMMON_SRCS-yes += common/vp10_txfm.h
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm1d.h
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm1d.c
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm1d.h
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm1d.c
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d.c
+VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm2d_cfg.h
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d.c
+VP10_COMMON_SRCS-yes += common/vp10_inv_txfm2d_cfg.h
+VP10_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp10_convolve_ssse3.c
+VP10_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp10_convolve_filters_ssse3.c
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP10_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp10_highbd_convolve_sse4.c
+VP10_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp10_highbd_convolve_filters_sse4.c
+endif
+VP10_COMMON_SRCS-yes += common/vp10_convolve.c
+VP10_COMMON_SRCS-yes += common/vp10_convolve.h
+VP10_COMMON_SRCS-$(CONFIG_ANS) += common/ans.h
+VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.h
+VP10_COMMON_SRCS-$(CONFIG_ANS) += common/divide.c
+VP10_COMMON_SRCS-$(CONFIG_LOOP_RESTORATION) += common/restoration.h
+VP10_COMMON_SRCS-$(CONFIG_LOOP_RESTORATION) += common/restoration.c
+ifeq (yes,$(filter yes,$(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION)))
+VP10_COMMON_SRCS-yes += common/warped_motion.h
+VP10_COMMON_SRCS-yes += common/warped_motion.c
+endif
+VP10_COMMON_SRCS-yes += common/clpf.c
+VP10_COMMON_SRCS-yes += common/clpf.h
+ifeq ($(CONFIG_DERING),yes)
+VP10_COMMON_SRCS-yes += common/od_dering.c
+VP10_COMMON_SRCS-yes += common/od_dering.h
+VP10_COMMON_SRCS-yes += common/dering.c
+VP10_COMMON_SRCS-yes += common/dering.h
+endif
+VP10_COMMON_SRCS-yes += common/odintrin.c
+VP10_COMMON_SRCS-yes += common/odintrin.h
+
+ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP10_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans4_dspr2.c
+VP10_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans8_dspr2.c
+VP10_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans16_dspr2.c
+endif
+
+# common (msa)
+VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct4x4_msa.c
+VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct8x8_msa.c
+VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct16x16_msa.c
+
+VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_intrin_sse2.c
+VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm_sse2.c
+VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_dct32x32_impl_sse2.h
+VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm_impl_sse2.h
+VP10_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp10_txfm1d_sse4.h
+VP10_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp10_fwd_txfm1d_sse4.c
+VP10_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp10_fwd_txfm2d_sse4.c
+
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP10_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/highbd_txfm_utility_sse4.h
+endif
+
+ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP10_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iht4x4_add_neon.c
+VP10_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iht8x8_add_neon.c
+endif
+
+ifeq ($(CONFIG_EXT_INTRA),yes)
+VP10_COMMON_SRCS-yes += common/intra_filters.h
+VP10_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/reconintra_sse4.c
+endif
+
+VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_inv_txfm_sse2.c
+VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_inv_txfm_sse2.h
+
+$(eval $(call rtcd_h_template,vp10_rtcd,av1/common/vp10_rtcd_defs.pl))
diff --git a/av1/vp10_cx_iface.c b/av1/vp10_cx_iface.c
new file mode 100644
index 0000000..34dd428
--- /dev/null
+++ b/av1/vp10_cx_iface.c
@@ -0,0 +1,1433 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "./vpx_config.h"
+#include "aom/vpx_encoder.h"
+#include "aom_ports/vpx_once.h"
+#include "aom_ports/system_state.h"
+#include "aom/internal/vpx_codec_internal.h"
+#include "./vpx_version.h"
+#include "av1/encoder/encoder.h"
+#include "aom/vp8cx.h"
+#include "av1/encoder/firstpass.h"
+#include "av1/vp10_iface_common.h"
+
+struct vp10_extracfg {
+ int cpu_used; // available cpu percentage in 1/16
+ unsigned int enable_auto_alt_ref;
+#if CONFIG_EXT_REFS
+ unsigned int enable_auto_bwd_ref;
+#endif // CONFIG_EXT_REFS
+ unsigned int noise_sensitivity;
+ unsigned int sharpness;
+ unsigned int static_thresh;
+ unsigned int tile_columns;
+ unsigned int tile_rows;
+ unsigned int arnr_max_frames;
+ unsigned int arnr_strength;
+ unsigned int min_gf_interval;
+ unsigned int max_gf_interval;
+ vpx_tune_metric tuning;
+ unsigned int cq_level; // constrained quality level
+ unsigned int rc_max_intra_bitrate_pct;
+ unsigned int rc_max_inter_bitrate_pct;
+ unsigned int gf_cbr_boost_pct;
+ unsigned int lossless;
+#if CONFIG_AOM_QM
+ unsigned int enable_qm;
+ unsigned int qm_min;
+ unsigned int qm_max;
+#endif
+ unsigned int frame_parallel_decoding_mode;
+ AQ_MODE aq_mode;
+ unsigned int frame_periodic_boost;
+ vpx_bit_depth_t bit_depth;
+ vpx_tune_content content;
+ vpx_color_space_t color_space;
+ int color_range;
+ int render_width;
+ int render_height;
+ vpx_superblock_size_t superblock_size;
+};
+
+static struct vp10_extracfg default_extra_cfg = {
+ 0, // cpu_used
+ 1, // enable_auto_alt_ref
+#if CONFIG_EXT_REFS
+ 0, // enable_auto_bwd_ref
+#endif // CONFIG_EXT_REFS
+ 0, // noise_sensitivity
+ 0, // sharpness
+ 0, // static_thresh
+#if CONFIG_EXT_TILE
+ UINT_MAX, // tile_columns
+ UINT_MAX, // tile_rows
+#else
+ 0, // tile_columns
+ 0, // tile_rows
+#endif // CONFIG_EXT_TILE
+ 7, // arnr_max_frames
+ 5, // arnr_strength
+ 0, // min_gf_interval; 0 -> default decision
+ 0, // max_gf_interval; 0 -> default decision
+ VPX_TUNE_PSNR, // tuning
+ 10, // cq_level
+ 0, // rc_max_intra_bitrate_pct
+ 0, // rc_max_inter_bitrate_pct
+ 0, // gf_cbr_boost_pct
+ 0, // lossless
+#if CONFIG_AOM_QM
+ 0, // enable_qm
+ DEFAULT_QM_FIRST, // qm_min
+ DEFAULT_QM_LAST, // qm_max
+#endif
+ 1, // frame_parallel_decoding_mode
+ NO_AQ, // aq_mode
+ 0, // frame_periodic_delta_q
+ VPX_BITS_8, // Bit depth
+ VPX_CONTENT_DEFAULT, // content
+ VPX_CS_UNKNOWN, // color space
+ 0, // color range
+ 0, // render width
+ 0, // render height
+ VPX_SUPERBLOCK_SIZE_DYNAMIC // superblock_size
+};
+
+struct vpx_codec_alg_priv {
+ vpx_codec_priv_t base;
+ vpx_codec_enc_cfg_t cfg;
+ struct vp10_extracfg extra_cfg;
+ VP10EncoderConfig oxcf;
+ VP10_COMP *cpi;
+ unsigned char *cx_data;
+ size_t cx_data_sz;
+ unsigned char *pending_cx_data;
+ size_t pending_cx_data_sz;
+ int pending_frame_count;
+ size_t pending_frame_sizes[8];
+ vpx_image_t preview_img;
+ vpx_enc_frame_flags_t next_frame_flags;
+ vp8_postproc_cfg_t preview_ppcfg;
+ vpx_codec_pkt_list_decl(256) pkt_list;
+ unsigned int fixed_kf_cntr;
+ // BufferPool that holds all reference frames.
+ BufferPool *buffer_pool;
+};
+
+static vpx_codec_err_t update_error_state(
+ vpx_codec_alg_priv_t *ctx, const struct vpx_internal_error_info *error) {
+ const vpx_codec_err_t res = error->error_code;
+
+ if (res != VPX_CODEC_OK)
+ ctx->base.err_detail = error->has_detail ? error->detail : NULL;
+
+ return res;
+}
+
+#undef ERROR
+#define ERROR(str) \
+ do { \
+ ctx->base.err_detail = str; \
+ return VPX_CODEC_INVALID_PARAM; \
+ } while (0)
+
+#define RANGE_CHECK(p, memb, lo, hi) \
+ do { \
+ if (!(((p)->memb == lo || (p)->memb > (lo)) && (p)->memb <= hi)) \
+ ERROR(#memb " out of range [" #lo ".." #hi "]"); \
+ } while (0)
+
+#define RANGE_CHECK_HI(p, memb, hi) \
+ do { \
+ if (!((p)->memb <= (hi))) ERROR(#memb " out of range [.." #hi "]"); \
+ } while (0)
+
+#define RANGE_CHECK_LO(p, memb, lo) \
+ do { \
+ if (!((p)->memb >= (lo))) ERROR(#memb " out of range [" #lo "..]"); \
+ } while (0)
+
+#define RANGE_CHECK_BOOL(p, memb) \
+ do { \
+ if (!!((p)->memb) != (p)->memb) ERROR(#memb " expected boolean"); \
+ } while (0)
+
+static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
+ const vpx_codec_enc_cfg_t *cfg,
+ const struct vp10_extracfg *extra_cfg) {
+ RANGE_CHECK(cfg, g_w, 1, 65535); // 16 bits available
+ RANGE_CHECK(cfg, g_h, 1, 65535); // 16 bits available
+ RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
+ RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
+ RANGE_CHECK_HI(cfg, g_profile, 3);
+
+ RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
+ RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer);
+ RANGE_CHECK_BOOL(extra_cfg, lossless);
+ RANGE_CHECK(extra_cfg, aq_mode, 0, AQ_MODE_COUNT - 1);
+ RANGE_CHECK(extra_cfg, frame_periodic_boost, 0, 1);
+ RANGE_CHECK_HI(cfg, g_threads, 64);
+ RANGE_CHECK_HI(cfg, g_lag_in_frames, MAX_LAG_BUFFERS);
+ RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_Q);
+ RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100);
+ RANGE_CHECK_HI(cfg, rc_overshoot_pct, 100);
+ RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
+ RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO);
+ RANGE_CHECK_BOOL(cfg, rc_resize_allowed);
+ RANGE_CHECK_HI(cfg, rc_dropframe_thresh, 100);
+ RANGE_CHECK_HI(cfg, rc_resize_up_thresh, 100);
+ RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100);
+ RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS);
+ RANGE_CHECK(extra_cfg, min_gf_interval, 0, (MAX_LAG_BUFFERS - 1));
+ RANGE_CHECK(extra_cfg, max_gf_interval, 0, (MAX_LAG_BUFFERS - 1));
+ if (extra_cfg->max_gf_interval > 0) {
+ RANGE_CHECK(extra_cfg, max_gf_interval, 2, (MAX_LAG_BUFFERS - 1));
+ }
+ if (extra_cfg->min_gf_interval > 0 && extra_cfg->max_gf_interval > 0) {
+ RANGE_CHECK(extra_cfg, max_gf_interval, extra_cfg->min_gf_interval,
+ (MAX_LAG_BUFFERS - 1));
+ }
+
+ if (cfg->rc_resize_allowed == 1) {
+ RANGE_CHECK(cfg, rc_scaled_width, 0, cfg->g_w);
+ RANGE_CHECK(cfg, rc_scaled_height, 0, cfg->g_h);
+ }
+
+ // VP9 does not support a lower bound on the keyframe interval in
+ // automatic keyframe placement mode.
+ if (cfg->kf_mode != VPX_KF_DISABLED && cfg->kf_min_dist != cfg->kf_max_dist &&
+ cfg->kf_min_dist > 0)
+ ERROR(
+ "kf_min_dist not supported in auto mode, use 0 "
+ "or kf_max_dist instead.");
+
+ RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2);
+#if CONFIG_EXT_REFS
+ RANGE_CHECK(extra_cfg, enable_auto_bwd_ref, 0, 2);
+#endif // CONFIG_EXT_REFS
+ RANGE_CHECK(extra_cfg, cpu_used, -8, 8);
+ RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
+ RANGE_CHECK(extra_cfg, superblock_size, VPX_SUPERBLOCK_SIZE_64X64,
+ VPX_SUPERBLOCK_SIZE_DYNAMIC);
+#if CONFIG_EXT_TILE
+// TODO(any): Waring. If CONFIG_EXT_TILE is true, tile_columns really
+// means tile_width, and tile_rows really means tile_hight. The interface
+// should be sanitized.
+#if CONFIG_EXT_PARTITION
+ if (extra_cfg->superblock_size != VPX_SUPERBLOCK_SIZE_64X64) {
+ if (extra_cfg->tile_columns != UINT_MAX)
+ RANGE_CHECK(extra_cfg, tile_columns, 1, 32);
+ if (extra_cfg->tile_rows != UINT_MAX)
+ RANGE_CHECK(extra_cfg, tile_rows, 1, 32);
+ } else
+#endif // CONFIG_EXT_PARTITION
+ {
+ if (extra_cfg->tile_columns != UINT_MAX)
+ RANGE_CHECK(extra_cfg, tile_columns, 1, 64);
+ if (extra_cfg->tile_rows != UINT_MAX)
+ RANGE_CHECK(extra_cfg, tile_rows, 1, 64);
+ }
+#else
+ RANGE_CHECK(extra_cfg, tile_columns, 0, 6);
+ RANGE_CHECK(extra_cfg, tile_rows, 0, 2);
+#endif // CONFIG_EXT_TILE
+ RANGE_CHECK_HI(extra_cfg, sharpness, 7);
+ RANGE_CHECK(extra_cfg, arnr_max_frames, 0, 15);
+ RANGE_CHECK_HI(extra_cfg, arnr_strength, 6);
+ RANGE_CHECK(extra_cfg, cq_level, 0, 63);
+ RANGE_CHECK(cfg, g_bit_depth, VPX_BITS_8, VPX_BITS_12);
+ RANGE_CHECK(cfg, g_input_bit_depth, 8, 12);
+ RANGE_CHECK(extra_cfg, content, VPX_CONTENT_DEFAULT, VPX_CONTENT_INVALID - 1);
+
+ // TODO(yaowu): remove this when ssim tuning is implemented for vp10
+ if (extra_cfg->tuning == VPX_TUNE_SSIM)
+ ERROR("Option --tune=ssim is not currently supported in VP10.");
+
+ if (cfg->g_pass == VPX_RC_LAST_PASS) {
+ const size_t packet_sz = sizeof(FIRSTPASS_STATS);
+ const int n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz);
+ const FIRSTPASS_STATS *stats;
+
+ if (cfg->rc_twopass_stats_in.buf == NULL)
+ ERROR("rc_twopass_stats_in.buf not set.");
+
+ if (cfg->rc_twopass_stats_in.sz % packet_sz)
+ ERROR("rc_twopass_stats_in.sz indicates truncated packet.");
+
+ if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz)
+ ERROR("rc_twopass_stats_in requires at least two packets.");
+
+ stats =
+ (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf + n_packets - 1;
+
+ if ((int)(stats->count + 0.5) != n_packets - 1)
+ ERROR("rc_twopass_stats_in missing EOS stats packet");
+ }
+
+#if !CONFIG_VP9_HIGHBITDEPTH
+ if (cfg->g_profile > (unsigned int)PROFILE_1) {
+ ERROR("Profile > 1 not supported in this build configuration");
+ }
+#endif
+ if (cfg->g_profile <= (unsigned int)PROFILE_1 &&
+ cfg->g_bit_depth > VPX_BITS_8) {
+ ERROR("Codec high bit-depth not supported in profile < 2");
+ }
+ if (cfg->g_profile <= (unsigned int)PROFILE_1 && cfg->g_input_bit_depth > 8) {
+ ERROR("Source high bit-depth not supported in profile < 2");
+ }
+ if (cfg->g_profile > (unsigned int)PROFILE_1 &&
+ cfg->g_bit_depth == VPX_BITS_8) {
+ ERROR("Codec bit-depth 8 not supported in profile > 1");
+ }
+ RANGE_CHECK(extra_cfg, color_space, VPX_CS_UNKNOWN, VPX_CS_SRGB);
+ RANGE_CHECK(extra_cfg, color_range, 0, 1);
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx,
+ const vpx_image_t *img) {
+ switch (img->fmt) {
+ case VPX_IMG_FMT_YV12:
+ case VPX_IMG_FMT_I420:
+ case VPX_IMG_FMT_I42016: break;
+ case VPX_IMG_FMT_I422:
+ case VPX_IMG_FMT_I444:
+ case VPX_IMG_FMT_I440:
+ if (ctx->cfg.g_profile != (unsigned int)PROFILE_1) {
+ ERROR(
+ "Invalid image format. I422, I444, I440 images are "
+ "not supported in profile.");
+ }
+ break;
+ case VPX_IMG_FMT_I42216:
+ case VPX_IMG_FMT_I44416:
+ case VPX_IMG_FMT_I44016:
+ if (ctx->cfg.g_profile != (unsigned int)PROFILE_1 &&
+ ctx->cfg.g_profile != (unsigned int)PROFILE_3) {
+ ERROR(
+ "Invalid image format. 16-bit I422, I444, I440 images are "
+ "not supported in profile.");
+ }
+ break;
+ default:
+ ERROR(
+ "Invalid image format. Only YV12, I420, I422, I444 images are "
+ "supported.");
+ break;
+ }
+
+ if (img->d_w != ctx->cfg.g_w || img->d_h != ctx->cfg.g_h)
+ ERROR("Image size must match encoder init configuration size");
+
+ return VPX_CODEC_OK;
+}
+
+static int get_image_bps(const vpx_image_t *img) {
+ switch (img->fmt) {
+ case VPX_IMG_FMT_YV12:
+ case VPX_IMG_FMT_I420: return 12;
+ case VPX_IMG_FMT_I422: return 16;
+ case VPX_IMG_FMT_I444: return 24;
+ case VPX_IMG_FMT_I440: return 16;
+ case VPX_IMG_FMT_I42016: return 24;
+ case VPX_IMG_FMT_I42216: return 32;
+ case VPX_IMG_FMT_I44416: return 48;
+ case VPX_IMG_FMT_I44016: return 32;
+ default: assert(0 && "Invalid image format"); break;
+ }
+ return 0;
+}
+
+static vpx_codec_err_t set_encoder_config(
+ VP10EncoderConfig *oxcf, const vpx_codec_enc_cfg_t *cfg,
+ const struct vp10_extracfg *extra_cfg) {
+ const int is_vbr = cfg->rc_end_usage == VPX_VBR;
+ oxcf->profile = cfg->g_profile;
+ oxcf->max_threads = (int)cfg->g_threads;
+ oxcf->width = cfg->g_w;
+ oxcf->height = cfg->g_h;
+ oxcf->bit_depth = cfg->g_bit_depth;
+ oxcf->input_bit_depth = cfg->g_input_bit_depth;
+ // guess a frame rate if out of whack, use 30
+ oxcf->init_framerate = (double)cfg->g_timebase.den / cfg->g_timebase.num;
+ if (oxcf->init_framerate > 180) oxcf->init_framerate = 30;
+
+ oxcf->mode = GOOD;
+
+ switch (cfg->g_pass) {
+ case VPX_RC_ONE_PASS: oxcf->pass = 0; break;
+ case VPX_RC_FIRST_PASS: oxcf->pass = 1; break;
+ case VPX_RC_LAST_PASS: oxcf->pass = 2; break;
+ }
+
+ oxcf->lag_in_frames =
+ cfg->g_pass == VPX_RC_FIRST_PASS ? 0 : cfg->g_lag_in_frames;
+ oxcf->rc_mode = cfg->rc_end_usage;
+
+ // Convert target bandwidth from Kbit/s to Bit/s
+ oxcf->target_bandwidth = 1000 * cfg->rc_target_bitrate;
+ oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct;
+ oxcf->rc_max_inter_bitrate_pct = extra_cfg->rc_max_inter_bitrate_pct;
+ oxcf->gf_cbr_boost_pct = extra_cfg->gf_cbr_boost_pct;
+
+ oxcf->best_allowed_q =
+ extra_cfg->lossless ? 0 : vp10_quantizer_to_qindex(cfg->rc_min_quantizer);
+ oxcf->worst_allowed_q =
+ extra_cfg->lossless ? 0 : vp10_quantizer_to_qindex(cfg->rc_max_quantizer);
+ oxcf->cq_level = vp10_quantizer_to_qindex(extra_cfg->cq_level);
+ oxcf->fixed_q = -1;
+
+#if CONFIG_AOM_QM
+ oxcf->using_qm = extra_cfg->enable_qm;
+ oxcf->qm_minlevel = extra_cfg->qm_min;
+ oxcf->qm_maxlevel = extra_cfg->qm_max;
+#endif
+
+ oxcf->under_shoot_pct = cfg->rc_undershoot_pct;
+ oxcf->over_shoot_pct = cfg->rc_overshoot_pct;
+
+ oxcf->scaled_frame_width = cfg->rc_scaled_width;
+ oxcf->scaled_frame_height = cfg->rc_scaled_height;
+ if (cfg->rc_resize_allowed == 1) {
+ oxcf->resize_mode =
+ (oxcf->scaled_frame_width == 0 || oxcf->scaled_frame_height == 0)
+ ? RESIZE_DYNAMIC
+ : RESIZE_FIXED;
+ } else {
+ oxcf->resize_mode = RESIZE_NONE;
+ }
+
+ oxcf->maximum_buffer_size_ms = is_vbr ? 240000 : cfg->rc_buf_sz;
+ oxcf->starting_buffer_level_ms = is_vbr ? 60000 : cfg->rc_buf_initial_sz;
+ oxcf->optimal_buffer_level_ms = is_vbr ? 60000 : cfg->rc_buf_optimal_sz;
+
+ oxcf->drop_frames_water_mark = cfg->rc_dropframe_thresh;
+
+ oxcf->two_pass_vbrbias = cfg->rc_2pass_vbr_bias_pct;
+ oxcf->two_pass_vbrmin_section = cfg->rc_2pass_vbr_minsection_pct;
+ oxcf->two_pass_vbrmax_section = cfg->rc_2pass_vbr_maxsection_pct;
+
+ oxcf->auto_key =
+ cfg->kf_mode == VPX_KF_AUTO && cfg->kf_min_dist != cfg->kf_max_dist;
+
+ oxcf->key_freq = cfg->kf_max_dist;
+
+ oxcf->speed = abs(extra_cfg->cpu_used);
+ oxcf->encode_breakout = extra_cfg->static_thresh;
+ oxcf->enable_auto_arf = extra_cfg->enable_auto_alt_ref;
+#if CONFIG_EXT_REFS
+ oxcf->enable_auto_brf = extra_cfg->enable_auto_bwd_ref;
+#endif // CONFIG_EXT_REFS
+ oxcf->noise_sensitivity = extra_cfg->noise_sensitivity;
+ oxcf->sharpness = extra_cfg->sharpness;
+
+ oxcf->two_pass_stats_in = cfg->rc_twopass_stats_in;
+
+#if CONFIG_FP_MB_STATS
+ oxcf->firstpass_mb_stats_in = cfg->rc_firstpass_mb_stats_in;
+#endif
+
+ oxcf->color_space = extra_cfg->color_space;
+ oxcf->color_range = extra_cfg->color_range;
+ oxcf->render_width = extra_cfg->render_width;
+ oxcf->render_height = extra_cfg->render_height;
+ oxcf->arnr_max_frames = extra_cfg->arnr_max_frames;
+ oxcf->arnr_strength = extra_cfg->arnr_strength;
+ oxcf->min_gf_interval = extra_cfg->min_gf_interval;
+ oxcf->max_gf_interval = extra_cfg->max_gf_interval;
+
+ oxcf->tuning = extra_cfg->tuning;
+ oxcf->content = extra_cfg->content;
+
+#if CONFIG_EXT_PARTITION
+ oxcf->superblock_size = extra_cfg->superblock_size;
+#endif // CONFIG_EXT_PARTITION
+
+#if CONFIG_EXT_TILE
+ {
+#if CONFIG_EXT_PARTITION
+ const unsigned int max =
+ extra_cfg->superblock_size == VPX_SUPERBLOCK_SIZE_64X64 ? 64 : 32;
+#else
+ const unsigned int max = 64;
+#endif // CONFIG_EXT_PARTITION
+ oxcf->tile_columns = VPXMIN(extra_cfg->tile_columns, max);
+ oxcf->tile_rows = VPXMIN(extra_cfg->tile_rows, max);
+ }
+#else
+ oxcf->tile_columns = extra_cfg->tile_columns;
+ oxcf->tile_rows = extra_cfg->tile_rows;
+#endif // CONFIG_EXT_TILE
+
+ oxcf->error_resilient_mode = cfg->g_error_resilient;
+ oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode;
+
+ oxcf->aq_mode = extra_cfg->aq_mode;
+
+ oxcf->frame_periodic_boost = extra_cfg->frame_periodic_boost;
+
+ /*
+ printf("Current VP9 Settings: \n");
+ printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
+ printf("noise_sensitivity: %d\n", oxcf->noise_sensitivity);
+ printf("sharpness: %d\n", oxcf->sharpness);
+ printf("cpu_used: %d\n", oxcf->cpu_used);
+ printf("Mode: %d\n", oxcf->mode);
+ printf("auto_key: %d\n", oxcf->auto_key);
+ printf("key_freq: %d\n", oxcf->key_freq);
+ printf("end_usage: %d\n", oxcf->end_usage);
+ printf("under_shoot_pct: %d\n", oxcf->under_shoot_pct);
+ printf("over_shoot_pct: %d\n", oxcf->over_shoot_pct);
+ printf("starting_buffer_level: %d\n", oxcf->starting_buffer_level);
+ printf("optimal_buffer_level: %d\n", oxcf->optimal_buffer_level);
+ printf("maximum_buffer_size: %d\n", oxcf->maximum_buffer_size);
+ printf("fixed_q: %d\n", oxcf->fixed_q);
+ printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q);
+ printf("best_allowed_q: %d\n", oxcf->best_allowed_q);
+ printf("allow_spatial_resampling: %d\n", oxcf->allow_spatial_resampling);
+ printf("scaled_frame_width: %d\n", oxcf->scaled_frame_width);
+ printf("scaled_frame_height: %d\n", oxcf->scaled_frame_height);
+ printf("two_pass_vbrbias: %d\n", oxcf->two_pass_vbrbias);
+ printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section);
+ printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section);
+ printf("lag_in_frames: %d\n", oxcf->lag_in_frames);
+ printf("enable_auto_arf: %d\n", oxcf->enable_auto_arf);
+ printf("Version: %d\n", oxcf->Version);
+ printf("encode_breakout: %d\n", oxcf->encode_breakout);
+ printf("error resilient: %d\n", oxcf->error_resilient_mode);
+ printf("frame parallel detokenization: %d\n",
+ oxcf->frame_parallel_decoding_mode);
+ */
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t encoder_set_config(vpx_codec_alg_priv_t *ctx,
+ const vpx_codec_enc_cfg_t *cfg) {
+ vpx_codec_err_t res;
+ int force_key = 0;
+
+ if (cfg->g_w != ctx->cfg.g_w || cfg->g_h != ctx->cfg.g_h) {
+ if (cfg->g_lag_in_frames > 1 || cfg->g_pass != VPX_RC_ONE_PASS)
+ ERROR("Cannot change width or height after initialization");
+ if (!valid_ref_frame_size(ctx->cfg.g_w, ctx->cfg.g_h, cfg->g_w, cfg->g_h) ||
+ (ctx->cpi->initial_width && (int)cfg->g_w > ctx->cpi->initial_width) ||
+ (ctx->cpi->initial_height && (int)cfg->g_h > ctx->cpi->initial_height))
+ force_key = 1;
+ }
+
+ // Prevent increasing lag_in_frames. This check is stricter than it needs
+ // to be -- the limit is not increasing past the first lag_in_frames
+ // value, but we don't track the initial config, only the last successful
+ // config.
+ if (cfg->g_lag_in_frames > ctx->cfg.g_lag_in_frames)
+ ERROR("Cannot increase lag_in_frames");
+
+ res = validate_config(ctx, cfg, &ctx->extra_cfg);
+
+ if (res == VPX_CODEC_OK) {
+ ctx->cfg = *cfg;
+ set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
+ // On profile change, request a key frame
+ force_key |= ctx->cpi->common.profile != ctx->oxcf.profile;
+ vp10_change_config(ctx->cpi, &ctx->oxcf);
+ }
+
+ if (force_key) ctx->next_frame_flags |= VPX_EFLAG_FORCE_KF;
+
+ return res;
+}
+
+static vpx_codec_err_t ctrl_get_quantizer(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ int *const arg = va_arg(args, int *);
+ if (arg == NULL) return VPX_CODEC_INVALID_PARAM;
+ *arg = vp10_get_quantizer(ctx->cpi);
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_get_quantizer64(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ int *const arg = va_arg(args, int *);
+ if (arg == NULL) return VPX_CODEC_INVALID_PARAM;
+ *arg = vp10_qindex_to_quantizer(vp10_get_quantizer(ctx->cpi));
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t update_extra_cfg(vpx_codec_alg_priv_t *ctx,
+ const struct vp10_extracfg *extra_cfg) {
+ const vpx_codec_err_t res = validate_config(ctx, &ctx->cfg, extra_cfg);
+ if (res == VPX_CODEC_OK) {
+ ctx->extra_cfg = *extra_cfg;
+ set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
+ vp10_change_config(ctx->cpi, &ctx->oxcf);
+ }
+ return res;
+}
+
+static vpx_codec_err_t ctrl_set_cpuused(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.cpu_used = CAST(VP8E_SET_CPUUSED, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_enable_auto_alt_ref(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.enable_auto_alt_ref = CAST(VP8E_SET_ENABLEAUTOALTREF, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+#if CONFIG_EXT_REFS
+static vpx_codec_err_t ctrl_set_enable_auto_bwd_ref(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.enable_auto_bwd_ref = CAST(VP8E_SET_ENABLEAUTOBWDREF, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+#endif // CONFIG_EXT_REFS
+
+static vpx_codec_err_t ctrl_set_noise_sensitivity(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.noise_sensitivity = CAST(VP9E_SET_NOISE_SENSITIVITY, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_sharpness(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.sharpness = CAST(VP8E_SET_SHARPNESS, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_static_thresh(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.static_thresh = CAST(VP8E_SET_STATIC_THRESHOLD, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_tile_columns(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.tile_columns = CAST(VP9E_SET_TILE_COLUMNS, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_tile_rows(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.tile_rows = CAST(VP9E_SET_TILE_ROWS, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_arnr_max_frames(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.arnr_max_frames = CAST(VP8E_SET_ARNR_MAXFRAMES, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_arnr_strength(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.arnr_strength = CAST(VP8E_SET_ARNR_STRENGTH, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_arnr_type(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ (void)ctx;
+ (void)args;
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_set_tuning(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.tuning = CAST(VP8E_SET_TUNING, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_cq_level(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.cq_level = CAST(VP8E_SET_CQ_LEVEL, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_rc_max_intra_bitrate_pct(
+ vpx_codec_alg_priv_t *ctx, va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.rc_max_intra_bitrate_pct =
+ CAST(VP8E_SET_MAX_INTRA_BITRATE_PCT, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_rc_max_inter_bitrate_pct(
+ vpx_codec_alg_priv_t *ctx, va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.rc_max_inter_bitrate_pct =
+ CAST(VP8E_SET_MAX_INTER_BITRATE_PCT, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_rc_gf_cbr_boost_pct(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.gf_cbr_boost_pct = CAST(VP9E_SET_GF_CBR_BOOST_PCT, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_lossless(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.lossless = CAST(VP9E_SET_LOSSLESS, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+#if CONFIG_AOM_QM
+static vpx_codec_err_t ctrl_set_enable_qm(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.enable_qm = CAST(VP9E_SET_ENABLE_QM, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_qm_min(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.qm_min = CAST(VP9E_SET_QM_MIN, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_qm_max(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.qm_max = CAST(VP9E_SET_QM_MAX, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+#endif
+
+static vpx_codec_err_t ctrl_set_frame_parallel_decoding_mode(
+ vpx_codec_alg_priv_t *ctx, va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.frame_parallel_decoding_mode =
+ CAST(VP9E_SET_FRAME_PARALLEL_DECODING, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_aq_mode(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.aq_mode = CAST(VP9E_SET_AQ_MODE, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_min_gf_interval(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.min_gf_interval = CAST(VP9E_SET_MIN_GF_INTERVAL, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_max_gf_interval(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.max_gf_interval = CAST(VP9E_SET_MAX_GF_INTERVAL, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_frame_periodic_boost(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.frame_periodic_boost = CAST(VP9E_SET_FRAME_PERIODIC_BOOST, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
+ vpx_codec_priv_enc_mr_cfg_t *data) {
+ vpx_codec_err_t res = VPX_CODEC_OK;
+ (void)data;
+
+ if (ctx->priv == NULL) {
+ vpx_codec_alg_priv_t *const priv = vpx_calloc(1, sizeof(*priv));
+ if (priv == NULL) return VPX_CODEC_MEM_ERROR;
+
+ ctx->priv = (vpx_codec_priv_t *)priv;
+ ctx->priv->init_flags = ctx->init_flags;
+ ctx->priv->enc.total_encoders = 1;
+ priv->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool));
+ if (priv->buffer_pool == NULL) return VPX_CODEC_MEM_ERROR;
+
+#if CONFIG_MULTITHREAD
+ if (pthread_mutex_init(&priv->buffer_pool->pool_mutex, NULL)) {
+ return VPX_CODEC_MEM_ERROR;
+ }
+#endif
+
+ if (ctx->config.enc) {
+ // Update the reference to the config structure to an internal copy.
+ priv->cfg = *ctx->config.enc;
+ ctx->config.enc = &priv->cfg;
+ }
+
+ priv->extra_cfg = default_extra_cfg;
+ once(vp10_initialize_enc);
+
+ res = validate_config(priv, &priv->cfg, &priv->extra_cfg);
+
+ if (res == VPX_CODEC_OK) {
+ set_encoder_config(&priv->oxcf, &priv->cfg, &priv->extra_cfg);
+#if CONFIG_VP9_HIGHBITDEPTH
+ priv->oxcf.use_highbitdepth =
+ (ctx->init_flags & VPX_CODEC_USE_HIGHBITDEPTH) ? 1 : 0;
+#endif
+ priv->cpi = vp10_create_compressor(&priv->oxcf, priv->buffer_pool);
+ if (priv->cpi == NULL)
+ res = VPX_CODEC_MEM_ERROR;
+ else
+ priv->cpi->output_pkt_list = &priv->pkt_list.head;
+ }
+ }
+
+ return res;
+}
+
+static vpx_codec_err_t encoder_destroy(vpx_codec_alg_priv_t *ctx) {
+ free(ctx->cx_data);
+ vp10_remove_compressor(ctx->cpi);
+#if CONFIG_MULTITHREAD
+ pthread_mutex_destroy(&ctx->buffer_pool->pool_mutex);
+#endif
+ vpx_free(ctx->buffer_pool);
+ vpx_free(ctx);
+ return VPX_CODEC_OK;
+}
+
+static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx,
+ unsigned long duration,
+ unsigned long deadline) {
+ MODE new_mode = BEST;
+
+ switch (ctx->cfg.g_pass) {
+ case VPX_RC_ONE_PASS:
+ if (deadline > 0) {
+ const vpx_codec_enc_cfg_t *const cfg = &ctx->cfg;
+
+ // Convert duration parameter from stream timebase to microseconds.
+ const uint64_t duration_us = (uint64_t)duration * 1000000 *
+ (uint64_t)cfg->g_timebase.num /
+ (uint64_t)cfg->g_timebase.den;
+
+ // If the deadline is more that the duration this frame is to be shown,
+ // use good quality mode. Otherwise use realtime mode.
+ new_mode = (deadline > duration_us) ? GOOD : REALTIME;
+ } else {
+ new_mode = BEST;
+ }
+ break;
+ case VPX_RC_FIRST_PASS: break;
+ case VPX_RC_LAST_PASS: new_mode = deadline > 0 ? GOOD : BEST; break;
+ }
+
+ if (ctx->oxcf.mode != new_mode) {
+ ctx->oxcf.mode = new_mode;
+ vp10_change_config(ctx->cpi, &ctx->oxcf);
+ }
+}
+
+// Turn on to test if supplemental superframe data breaks decoding
+// #define TEST_SUPPLEMENTAL_SUPERFRAME_DATA
+static int write_superframe_index(vpx_codec_alg_priv_t *ctx) {
+ uint8_t marker = 0xc0;
+ unsigned int mask;
+ int mag, index_sz;
+ int i;
+ size_t max_frame_sz = 0;
+
+ assert(ctx->pending_frame_count);
+ assert(ctx->pending_frame_count <= 8);
+
+ // Add the number of frames to the marker byte
+ marker |= ctx->pending_frame_count - 1;
+ for (i = 0; i < ctx->pending_frame_count - 1; i++) {
+ const size_t frame_sz = (unsigned int)ctx->pending_frame_sizes[i] - 1;
+ max_frame_sz = frame_sz > max_frame_sz ? frame_sz : max_frame_sz;
+ }
+
+ // Choose the magnitude
+ for (mag = 0, mask = 0xff; mag < 4; mag++) {
+ if (max_frame_sz <= mask) break;
+ mask <<= 8;
+ mask |= 0xff;
+ }
+ marker |= mag << 3;
+
+ // Write the index
+ index_sz = 2 + (mag + 1) * (ctx->pending_frame_count - 1);
+ if (ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz) {
+ uint8_t *x = ctx->pending_cx_data + ctx->pending_cx_data_sz;
+ int i, j;
+#ifdef TEST_SUPPLEMENTAL_SUPERFRAME_DATA
+ uint8_t marker_test = 0xc0;
+ int mag_test = 2; // 1 - 4
+ int frames_test = 4; // 1 - 8
+ int index_sz_test = 2 + mag_test * frames_test;
+ marker_test |= frames_test - 1;
+ marker_test |= (mag_test - 1) << 3;
+ *x++ = marker_test;
+ for (i = 0; i < mag_test * frames_test; ++i)
+ *x++ = 0; // fill up with arbitrary data
+ *x++ = marker_test;
+ ctx->pending_cx_data_sz += index_sz_test;
+ printf("Added supplemental superframe data\n");
+#endif
+
+ *x++ = marker;
+ for (i = 0; i < ctx->pending_frame_count - 1; i++) {
+ unsigned int this_sz;
+
+ assert(ctx->pending_frame_sizes[i] > 0);
+ this_sz = (unsigned int)ctx->pending_frame_sizes[i] - 1;
+ for (j = 0; j <= mag; j++) {
+ *x++ = this_sz & 0xff;
+ this_sz >>= 8;
+ }
+ }
+ *x++ = marker;
+ ctx->pending_cx_data_sz += index_sz;
+#ifdef TEST_SUPPLEMENTAL_SUPERFRAME_DATA
+ index_sz += index_sz_test;
+#endif
+ }
+ return index_sz;
+}
+
+// vp9 uses 10,000,000 ticks/second as time stamp
+#define TICKS_PER_SEC 10000000LL
+
+static int64_t timebase_units_to_ticks(const vpx_rational_t *timebase,
+ int64_t n) {
+ return n * TICKS_PER_SEC * timebase->num / timebase->den;
+}
+
+static int64_t ticks_to_timebase_units(const vpx_rational_t *timebase,
+ int64_t n) {
+ const int64_t round = TICKS_PER_SEC * timebase->num / 2 - 1;
+ return (n * timebase->den + round) / timebase->num / TICKS_PER_SEC;
+}
+
+static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP10_COMP *cpi,
+ unsigned int lib_flags) {
+ vpx_codec_frame_flags_t flags = lib_flags << 16;
+
+ if (lib_flags & FRAMEFLAGS_KEY) flags |= VPX_FRAME_IS_KEY;
+
+ if (cpi->droppable) flags |= VPX_FRAME_IS_DROPPABLE;
+
+ return flags;
+}
+
+static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
+ const vpx_image_t *img,
+ vpx_codec_pts_t pts,
+ unsigned long duration,
+ vpx_enc_frame_flags_t enc_flags,
+ unsigned long deadline) {
+ volatile vpx_codec_err_t res = VPX_CODEC_OK;
+ volatile vpx_enc_frame_flags_t flags = enc_flags;
+ VP10_COMP *const cpi = ctx->cpi;
+ const vpx_rational_t *const timebase = &ctx->cfg.g_timebase;
+ size_t data_sz;
+
+ if (cpi == NULL) return VPX_CODEC_INVALID_PARAM;
+
+ if (img != NULL) {
+ res = validate_img(ctx, img);
+ // TODO(jzern) the checks related to cpi's validity should be treated as a
+ // failure condition, encoder setup is done fully in init() currently.
+ if (res == VPX_CODEC_OK) {
+#if CONFIG_EXT_REFS
+ data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img);
+#else
+ // There's no codec control for multiple alt-refs so check the encoder
+ // instance for its status to determine the compressed data size.
+ data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 *
+ (cpi->multi_arf_allowed ? 8 : 2);
+#endif // CONFIG_EXT_REFS
+ if (data_sz < 4096) data_sz = 4096;
+ if (ctx->cx_data == NULL || ctx->cx_data_sz < data_sz) {
+ ctx->cx_data_sz = data_sz;
+ free(ctx->cx_data);
+ ctx->cx_data = (unsigned char *)malloc(ctx->cx_data_sz);
+ if (ctx->cx_data == NULL) {
+ return VPX_CODEC_MEM_ERROR;
+ }
+ }
+ }
+ }
+
+ pick_quickcompress_mode(ctx, duration, deadline);
+ vpx_codec_pkt_list_init(&ctx->pkt_list);
+
+ // Handle Flags
+ if (((flags & VP8_EFLAG_NO_UPD_GF) && (flags & VP8_EFLAG_FORCE_GF)) ||
+ ((flags & VP8_EFLAG_NO_UPD_ARF) && (flags & VP8_EFLAG_FORCE_ARF))) {
+ ctx->base.err_detail = "Conflicting flags.";
+ return VPX_CODEC_INVALID_PARAM;
+ }
+
+ if (setjmp(cpi->common.error.jmp)) {
+ cpi->common.error.setjmp = 0;
+ res = update_error_state(ctx, &cpi->common.error);
+ vpx_clear_system_state();
+ return res;
+ }
+ cpi->common.error.setjmp = 1;
+
+ vp10_apply_encoding_flags(cpi, flags);
+
+ // Handle fixed keyframe intervals
+ if (ctx->cfg.kf_mode == VPX_KF_AUTO &&
+ ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) {
+ if (++ctx->fixed_kf_cntr > ctx->cfg.kf_min_dist) {
+ flags |= VPX_EFLAG_FORCE_KF;
+ ctx->fixed_kf_cntr = 1;
+ }
+ }
+
+ if (res == VPX_CODEC_OK) {
+ unsigned int lib_flags = 0;
+ YV12_BUFFER_CONFIG sd;
+ int64_t dst_time_stamp = timebase_units_to_ticks(timebase, pts);
+ int64_t dst_end_time_stamp =
+ timebase_units_to_ticks(timebase, pts + duration);
+ size_t size, cx_data_sz;
+ unsigned char *cx_data;
+
+ // Set up internal flags
+ if (ctx->base.init_flags & VPX_CODEC_USE_PSNR) cpi->b_calculate_psnr = 1;
+
+ if (img != NULL) {
+ res = image2yuvconfig(img, &sd);
+
+ // Store the original flags in to the frame buffer. Will extract the
+ // key frame flag when we actually encode this frame.
+ if (vp10_receive_raw_frame(cpi, flags | ctx->next_frame_flags, &sd,
+ dst_time_stamp, dst_end_time_stamp)) {
+ res = update_error_state(ctx, &cpi->common.error);
+ }
+ ctx->next_frame_flags = 0;
+ }
+
+ cx_data = ctx->cx_data;
+ cx_data_sz = ctx->cx_data_sz;
+
+ /* Any pending invisible frames? */
+ if (ctx->pending_cx_data) {
+ memmove(cx_data, ctx->pending_cx_data, ctx->pending_cx_data_sz);
+ ctx->pending_cx_data = cx_data;
+ cx_data += ctx->pending_cx_data_sz;
+ cx_data_sz -= ctx->pending_cx_data_sz;
+
+ /* TODO: this is a minimal check, the underlying codec doesn't respect
+ * the buffer size anyway.
+ */
+ if (cx_data_sz < ctx->cx_data_sz / 2) {
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_ERROR,
+ "Compressed data buffer too small");
+ return VPX_CODEC_ERROR;
+ }
+ }
+
+ while (cx_data_sz >= ctx->cx_data_sz / 2 &&
+ -1 != vp10_get_compressed_data(cpi, &lib_flags, &size, cx_data,
+ &dst_time_stamp, &dst_end_time_stamp,
+ !img)) {
+ if (size) {
+ vpx_codec_cx_pkt_t pkt;
+
+ // Pack invisible frames with the next visible frame
+ if (!cpi->common.show_frame) {
+ if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;
+ ctx->pending_cx_data_sz += size;
+ ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+ cx_data += size;
+ cx_data_sz -= size;
+
+ continue;
+ }
+
+ // Add the frame packet to the list of returned packets.
+ pkt.kind = VPX_CODEC_CX_FRAME_PKT;
+ pkt.data.frame.pts = ticks_to_timebase_units(timebase, dst_time_stamp);
+ pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
+ timebase, dst_end_time_stamp - dst_time_stamp);
+ pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
+
+ if (ctx->pending_cx_data) {
+ ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+ ctx->pending_cx_data_sz += size;
+ size += write_superframe_index(ctx);
+ pkt.data.frame.buf = ctx->pending_cx_data;
+ pkt.data.frame.sz = ctx->pending_cx_data_sz;
+ ctx->pending_cx_data = NULL;
+ ctx->pending_cx_data_sz = 0;
+ ctx->pending_frame_count = 0;
+ } else {
+ pkt.data.frame.buf = cx_data;
+ pkt.data.frame.sz = size;
+ }
+ pkt.data.frame.partition_id = -1;
+
+ vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
+
+ cx_data += size;
+ cx_data_sz -= size;
+ }
+ }
+ }
+
+ cpi->common.error.setjmp = 0;
+ return res;
+}
+
+static const vpx_codec_cx_pkt_t *encoder_get_cxdata(vpx_codec_alg_priv_t *ctx,
+ vpx_codec_iter_t *iter) {
+ return vpx_codec_pkt_list_get(&ctx->pkt_list.head, iter);
+}
+
+static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *);
+
+ if (frame != NULL) {
+ YV12_BUFFER_CONFIG sd;
+
+ image2yuvconfig(&frame->img, &sd);
+ vp10_set_reference_enc(ctx->cpi,
+ ref_frame_to_vp10_reframe(frame->frame_type), &sd);
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
+static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *);
+
+ if (frame != NULL) {
+ YV12_BUFFER_CONFIG sd;
+
+ image2yuvconfig(&frame->img, &sd);
+ vp10_copy_reference_enc(ctx->cpi,
+ ref_frame_to_vp10_reframe(frame->frame_type), &sd);
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
+static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vp9_ref_frame_t *const frame = va_arg(args, vp9_ref_frame_t *);
+
+ if (frame != NULL) {
+ YV12_BUFFER_CONFIG *fb = get_ref_frame(&ctx->cpi->common, frame->idx);
+ if (fb == NULL) return VPX_CODEC_ERROR;
+
+ yuvconfig2image(&frame->img, fb, NULL);
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
+static vpx_codec_err_t ctrl_get_new_frame_image(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_image_t *const new_img = va_arg(args, vpx_image_t *);
+
+ if (new_img != NULL) {
+ YV12_BUFFER_CONFIG new_frame;
+
+ if (vp10_get_last_show_frame(ctx->cpi, &new_frame) == 0) {
+ yuvconfig2image(new_img, &new_frame, NULL);
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_ERROR;
+ }
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
+static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ (void)ctx;
+ (void)args;
+ return VPX_CODEC_INCAPABLE;
+}
+
+static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) {
+ YV12_BUFFER_CONFIG sd;
+
+ if (vp10_get_preview_raw_frame(ctx->cpi, &sd) == 0) {
+ yuvconfig2image(&ctx->preview_img, &sd, NULL);
+ return &ctx->preview_img;
+ } else {
+ return NULL;
+ }
+}
+
+static vpx_codec_err_t ctrl_use_reference(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ const int reference_flag = va_arg(args, int);
+
+ vp10_use_as_reference(ctx->cpi, reference_flag);
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ (void)ctx;
+ (void)args;
+
+ // TODO(yaowu): Need to re-implement and test for VP9.
+ return VPX_CODEC_INVALID_PARAM;
+}
+
+static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *);
+
+ if (map) {
+ if (!vp10_set_active_map(ctx->cpi, map->active_map, (int)map->rows,
+ (int)map->cols))
+ return VPX_CODEC_OK;
+ else
+ return VPX_CODEC_INVALID_PARAM;
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
+static vpx_codec_err_t ctrl_get_active_map(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *);
+
+ if (map) {
+ if (!vp10_get_active_map(ctx->cpi, map->active_map, (int)map->rows,
+ (int)map->cols))
+ return VPX_CODEC_OK;
+ else
+ return VPX_CODEC_INVALID_PARAM;
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
+static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_scaling_mode_t *const mode = va_arg(args, vpx_scaling_mode_t *);
+
+ if (mode) {
+ const int res =
+ vp10_set_internal_size(ctx->cpi, (VPX_SCALING)mode->h_scaling_mode,
+ (VPX_SCALING)mode->v_scaling_mode);
+ return (res == 0) ? VPX_CODEC_OK : VPX_CODEC_INVALID_PARAM;
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
+static vpx_codec_err_t ctrl_set_tune_content(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.content = CAST(VP9E_SET_TUNE_CONTENT, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_color_space(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.color_space = CAST(VP9E_SET_COLOR_SPACE, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_color_range(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.color_range = CAST(VP9E_SET_COLOR_RANGE, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_render_size(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ int *const render_size = va_arg(args, int *);
+ extra_cfg.render_width = render_size[0];
+ extra_cfg.render_height = render_size[1];
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_superblock_size(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.superblock_size = CAST(VP10E_SET_SUPERBLOCK_SIZE, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
+ { VP8_COPY_REFERENCE, ctrl_copy_reference },
+ { VP8E_USE_REFERENCE, ctrl_use_reference },
+
+ // Setters
+ { VP8_SET_REFERENCE, ctrl_set_reference },
+ { VP8_SET_POSTPROC, ctrl_set_previewpp },
+ { VP8E_SET_ROI_MAP, ctrl_set_roi_map },
+ { VP8E_SET_ACTIVEMAP, ctrl_set_active_map },
+ { VP8E_SET_SCALEMODE, ctrl_set_scale_mode },
+ { VP8E_SET_CPUUSED, ctrl_set_cpuused },
+ { VP8E_SET_ENABLEAUTOALTREF, ctrl_set_enable_auto_alt_ref },
+#if CONFIG_EXT_REFS
+ { VP8E_SET_ENABLEAUTOBWDREF, ctrl_set_enable_auto_bwd_ref },
+#endif // CONFIG_EXT_REFS
+ { VP8E_SET_SHARPNESS, ctrl_set_sharpness },
+ { VP8E_SET_STATIC_THRESHOLD, ctrl_set_static_thresh },
+ { VP9E_SET_TILE_COLUMNS, ctrl_set_tile_columns },
+ { VP9E_SET_TILE_ROWS, ctrl_set_tile_rows },
+ { VP8E_SET_ARNR_MAXFRAMES, ctrl_set_arnr_max_frames },
+ { VP8E_SET_ARNR_STRENGTH, ctrl_set_arnr_strength },
+ { VP8E_SET_ARNR_TYPE, ctrl_set_arnr_type },
+ { VP8E_SET_TUNING, ctrl_set_tuning },
+ { VP8E_SET_CQ_LEVEL, ctrl_set_cq_level },
+ { VP8E_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_rc_max_intra_bitrate_pct },
+ { VP9E_SET_MAX_INTER_BITRATE_PCT, ctrl_set_rc_max_inter_bitrate_pct },
+ { VP9E_SET_GF_CBR_BOOST_PCT, ctrl_set_rc_gf_cbr_boost_pct },
+ { VP9E_SET_LOSSLESS, ctrl_set_lossless },
+#if CONFIG_AOM_QM
+ { VP9E_SET_ENABLE_QM, ctrl_set_enable_qm },
+ { VP9E_SET_QM_MIN, ctrl_set_qm_min },
+ { VP9E_SET_QM_MAX, ctrl_set_qm_max },
+#endif
+ { VP9E_SET_FRAME_PARALLEL_DECODING, ctrl_set_frame_parallel_decoding_mode },
+ { VP9E_SET_AQ_MODE, ctrl_set_aq_mode },
+ { VP9E_SET_FRAME_PERIODIC_BOOST, ctrl_set_frame_periodic_boost },
+ { VP9E_SET_TUNE_CONTENT, ctrl_set_tune_content },
+ { VP9E_SET_COLOR_SPACE, ctrl_set_color_space },
+ { VP9E_SET_COLOR_RANGE, ctrl_set_color_range },
+ { VP9E_SET_NOISE_SENSITIVITY, ctrl_set_noise_sensitivity },
+ { VP9E_SET_MIN_GF_INTERVAL, ctrl_set_min_gf_interval },
+ { VP9E_SET_MAX_GF_INTERVAL, ctrl_set_max_gf_interval },
+ { VP9E_SET_RENDER_SIZE, ctrl_set_render_size },
+ { VP10E_SET_SUPERBLOCK_SIZE, ctrl_set_superblock_size },
+
+ // Getters
+ { VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer },
+ { VP8E_GET_LAST_QUANTIZER_64, ctrl_get_quantizer64 },
+ { VP9_GET_REFERENCE, ctrl_get_reference },
+ { VP9E_GET_ACTIVEMAP, ctrl_get_active_map },
+ { VP10_GET_NEW_FRAME_IMAGE, ctrl_get_new_frame_image },
+
+ { -1, NULL },
+};
+
+static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
+ { 0,
+ {
+ // NOLINT
+ 0, // g_usage
+ 8, // g_threads
+ 0, // g_profile
+
+ 320, // g_width
+ 240, // g_height
+ VPX_BITS_8, // g_bit_depth
+ 8, // g_input_bit_depth
+
+ { 1, 30 }, // g_timebase
+
+ 0, // g_error_resilient
+
+ VPX_RC_ONE_PASS, // g_pass
+
+ 25, // g_lag_in_frames
+
+ 0, // rc_dropframe_thresh
+ 0, // rc_resize_allowed
+ 0, // rc_scaled_width
+ 0, // rc_scaled_height
+ 60, // rc_resize_down_thresold
+ 30, // rc_resize_up_thresold
+
+ VPX_VBR, // rc_end_usage
+ { NULL, 0 }, // rc_twopass_stats_in
+ { NULL, 0 }, // rc_firstpass_mb_stats_in
+ 256, // rc_target_bandwidth
+ 0, // rc_min_quantizer
+ 63, // rc_max_quantizer
+ 25, // rc_undershoot_pct
+ 25, // rc_overshoot_pct
+
+ 6000, // rc_max_buffer_size
+ 4000, // rc_buffer_initial_size
+ 5000, // rc_buffer_optimal_size
+
+ 50, // rc_two_pass_vbrbias
+ 0, // rc_two_pass_vbrmin_section
+ 2000, // rc_two_pass_vbrmax_section
+
+ // keyframing settings (kf)
+ VPX_KF_AUTO, // g_kfmode
+ 0, // kf_min_dist
+ 9999, // kf_max_dist
+ } },
+};
+
+#ifndef VERSION_STRING
+#define VERSION_STRING
+#endif
+CODEC_INTERFACE(vpx_codec_vp10_cx) = {
+ "WebM Project VP10 Encoder" VERSION_STRING,
+ VPX_CODEC_INTERNAL_ABI_VERSION,
+#if CONFIG_VP9_HIGHBITDEPTH
+ VPX_CODEC_CAP_HIGHBITDEPTH |
+#endif
+ VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR, // vpx_codec_caps_t
+ encoder_init, // vpx_codec_init_fn_t
+ encoder_destroy, // vpx_codec_destroy_fn_t
+ encoder_ctrl_maps, // vpx_codec_ctrl_fn_map_t
+ {
+ // NOLINT
+ NULL, // vpx_codec_peek_si_fn_t
+ NULL, // vpx_codec_get_si_fn_t
+ NULL, // vpx_codec_decode_fn_t
+ NULL, // vpx_codec_frame_get_fn_t
+ NULL // vpx_codec_set_fb_fn_t
+ },
+ {
+ // NOLINT
+ 1, // 1 cfg map
+ encoder_usage_cfg_map, // vpx_codec_enc_cfg_map_t
+ encoder_encode, // vpx_codec_encode_fn_t
+ encoder_get_cxdata, // vpx_codec_get_cx_data_fn_t
+ encoder_set_config, // vpx_codec_enc_config_set_fn_t
+ NULL, // vpx_codec_get_global_headers_fn_t
+ encoder_get_preview, // vpx_codec_get_preview_frame_fn_t
+ NULL // vpx_codec_enc_mr_get_mem_loc_fn_t
+ }
+};
diff --git a/av1/vp10_dx_iface.c b/av1/vp10_dx_iface.c
new file mode 100644
index 0000000..9e17c5a
--- /dev/null
+++ b/av1/vp10_dx_iface.c
@@ -0,0 +1,1159 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "./vpx_config.h"
+#include "./vpx_version.h"
+
+#include "aom/internal/vpx_codec_internal.h"
+#include "aom/vp8dx.h"
+#include "aom/vpx_decoder.h"
+#include "aom_dsp/bitreader_buffer.h"
+#include "aom_dsp/vpx_dsp_common.h"
+#include "aom_util/vpx_thread.h"
+
+#include "av1/common/alloccommon.h"
+#include "av1/common/frame_buffers.h"
+#include "av1/common/enums.h"
+
+#include "av1/decoder/decoder.h"
+#include "av1/decoder/decodeframe.h"
+
+#include "av1/vp10_iface_common.h"
+
+typedef vpx_codec_stream_info_t vp10_stream_info_t;
+
+// This limit is due to framebuffer numbers.
+// TODO(hkuang): Remove this limit after implementing ondemand framebuffers.
+#define FRAME_CACHE_SIZE 6 // Cache maximum 6 decoded frames.
+
+typedef struct cache_frame {
+ int fb_idx;
+ vpx_image_t img;
+} cache_frame;
+
+struct vpx_codec_alg_priv {
+ vpx_codec_priv_t base;
+ vpx_codec_dec_cfg_t cfg;
+ vp10_stream_info_t si;
+ int postproc_cfg_set;
+ vp8_postproc_cfg_t postproc_cfg;
+ vpx_decrypt_cb decrypt_cb;
+ void *decrypt_state;
+ vpx_image_t img;
+ int img_avail;
+ int flushed;
+ int invert_tile_order;
+ int last_show_frame; // Index of last output frame.
+ int byte_alignment;
+ int skip_loop_filter;
+ int decode_tile_row;
+ int decode_tile_col;
+
+ // Frame parallel related.
+ int frame_parallel_decode; // frame-based threading.
+ VPxWorker *frame_workers;
+ int num_frame_workers;
+ int next_submit_worker_id;
+ int last_submit_worker_id;
+ int next_output_worker_id;
+ int available_threads;
+ cache_frame frame_cache[FRAME_CACHE_SIZE];
+ int frame_cache_write;
+ int frame_cache_read;
+ int num_cache_frames;
+ int need_resync; // wait for key/intra-only frame
+ // BufferPool that holds all reference frames. Shared by all the FrameWorkers.
+ BufferPool *buffer_pool;
+
+ // External frame buffer info to save for VP10 common.
+ void *ext_priv; // Private data associated with the external frame buffers.
+ vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb;
+ vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb;
+};
+
+static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx,
+ vpx_codec_priv_enc_mr_cfg_t *data) {
+ // This function only allocates space for the vpx_codec_alg_priv_t
+ // structure. More memory may be required at the time the stream
+ // information becomes known.
+ (void)data;
+
+ if (!ctx->priv) {
+ vpx_codec_alg_priv_t *const priv =
+ (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv));
+ if (priv == NULL) return VPX_CODEC_MEM_ERROR;
+
+ ctx->priv = (vpx_codec_priv_t *)priv;
+ ctx->priv->init_flags = ctx->init_flags;
+ priv->si.sz = sizeof(priv->si);
+ priv->flushed = 0;
+ // Only do frame parallel decode when threads > 1.
+ priv->frame_parallel_decode =
+ (ctx->config.dec && (ctx->config.dec->threads > 1) &&
+ (ctx->init_flags & VPX_CODEC_USE_FRAME_THREADING))
+ ? 1
+ : 0;
+ if (ctx->config.dec) {
+ priv->cfg = *ctx->config.dec;
+ ctx->config.dec = &priv->cfg;
+ }
+ }
+
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) {
+ if (ctx->frame_workers != NULL) {
+ int i;
+ for (i = 0; i < ctx->num_frame_workers; ++i) {
+ VPxWorker *const worker = &ctx->frame_workers[i];
+ FrameWorkerData *const frame_worker_data =
+ (FrameWorkerData *)worker->data1;
+ vpx_get_worker_interface()->end(worker);
+ vp10_remove_common(&frame_worker_data->pbi->common);
+#if CONFIG_LOOP_RESTORATION
+ vp10_free_restoration_buffers(&frame_worker_data->pbi->common);
+#endif // CONFIG_LOOP_RESTORATION
+ vp10_decoder_remove(frame_worker_data->pbi);
+ vpx_free(frame_worker_data->scratch_buffer);
+#if CONFIG_MULTITHREAD
+ pthread_mutex_destroy(&frame_worker_data->stats_mutex);
+ pthread_cond_destroy(&frame_worker_data->stats_cond);
+#endif
+ vpx_free(frame_worker_data);
+ }
+#if CONFIG_MULTITHREAD
+ pthread_mutex_destroy(&ctx->buffer_pool->pool_mutex);
+#endif
+ }
+
+ if (ctx->buffer_pool) {
+ vp10_free_ref_frame_buffers(ctx->buffer_pool);
+ vp10_free_internal_frame_buffers(&ctx->buffer_pool->int_frame_buffers);
+ }
+
+ vpx_free(ctx->frame_workers);
+ vpx_free(ctx->buffer_pool);
+ vpx_free(ctx);
+ return VPX_CODEC_OK;
+}
+
+static int parse_bitdepth_colorspace_sampling(BITSTREAM_PROFILE profile,
+ struct vpx_read_bit_buffer *rb) {
+ vpx_color_space_t color_space;
+ if (profile >= PROFILE_2) rb->bit_offset += 1; // Bit-depth 10 or 12.
+ color_space = (vpx_color_space_t)vpx_rb_read_literal(rb, 3);
+ if (color_space != VPX_CS_SRGB) {
+ rb->bit_offset += 1; // [16,235] (including xvycc) vs [0,255] range.
+ if (profile == PROFILE_1 || profile == PROFILE_3) {
+ rb->bit_offset += 2; // subsampling x/y.
+ rb->bit_offset += 1; // unused.
+ }
+ } else {
+ if (profile == PROFILE_1 || profile == PROFILE_3) {
+ rb->bit_offset += 1; // unused
+ } else {
+ // RGB is only available in version 1.
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static vpx_codec_err_t decoder_peek_si_internal(
+ const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si,
+ int *is_intra_only, vpx_decrypt_cb decrypt_cb, void *decrypt_state) {
+ int intra_only_flag = 0;
+ uint8_t clear_buffer[9];
+
+ if (data + data_sz <= data) return VPX_CODEC_INVALID_PARAM;
+
+ si->is_kf = 0;
+ si->w = si->h = 0;
+
+ if (decrypt_cb) {
+ data_sz = VPXMIN(sizeof(clear_buffer), data_sz);
+ decrypt_cb(decrypt_state, data, clear_buffer, data_sz);
+ data = clear_buffer;
+ }
+
+ {
+ int show_frame;
+ int error_resilient;
+ struct vpx_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL };
+ const int frame_marker = vpx_rb_read_literal(&rb, 2);
+ const BITSTREAM_PROFILE profile = vp10_read_profile(&rb);
+
+ if (frame_marker != VPX_FRAME_MARKER) return VPX_CODEC_UNSUP_BITSTREAM;
+
+ if (profile >= MAX_PROFILES) return VPX_CODEC_UNSUP_BITSTREAM;
+
+ if ((profile >= 2 && data_sz <= 1) || data_sz < 1)
+ return VPX_CODEC_UNSUP_BITSTREAM;
+
+ if (vpx_rb_read_bit(&rb)) { // show an existing frame
+ vpx_rb_read_literal(&rb, 3); // Frame buffer to show.
+ return VPX_CODEC_OK;
+ }
+
+ if (data_sz <= 8) return VPX_CODEC_UNSUP_BITSTREAM;
+
+ si->is_kf = !vpx_rb_read_bit(&rb);
+ show_frame = vpx_rb_read_bit(&rb);
+ error_resilient = vpx_rb_read_bit(&rb);
+
+ if (si->is_kf) {
+ if (!vp10_read_sync_code(&rb)) return VPX_CODEC_UNSUP_BITSTREAM;
+
+ if (!parse_bitdepth_colorspace_sampling(profile, &rb))
+ return VPX_CODEC_UNSUP_BITSTREAM;
+ vp10_read_frame_size(&rb, (int *)&si->w, (int *)&si->h);
+ } else {
+ intra_only_flag = show_frame ? 0 : vpx_rb_read_bit(&rb);
+
+ rb.bit_offset += error_resilient ? 0 : 2; // reset_frame_context
+
+ if (intra_only_flag) {
+ if (!vp10_read_sync_code(&rb)) return VPX_CODEC_UNSUP_BITSTREAM;
+ if (profile > PROFILE_0) {
+ if (!parse_bitdepth_colorspace_sampling(profile, &rb))
+ return VPX_CODEC_UNSUP_BITSTREAM;
+ }
+ rb.bit_offset += REF_FRAMES; // refresh_frame_flags
+ vp10_read_frame_size(&rb, (int *)&si->w, (int *)&si->h);
+ }
+ }
+ }
+ if (is_intra_only != NULL) *is_intra_only = intra_only_flag;
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t decoder_peek_si(const uint8_t *data,
+ unsigned int data_sz,
+ vpx_codec_stream_info_t *si) {
+ return decoder_peek_si_internal(data, data_sz, si, NULL, NULL, NULL);
+}
+
+static vpx_codec_err_t decoder_get_si(vpx_codec_alg_priv_t *ctx,
+ vpx_codec_stream_info_t *si) {
+ const size_t sz = (si->sz >= sizeof(vp10_stream_info_t))
+ ? sizeof(vp10_stream_info_t)
+ : sizeof(vpx_codec_stream_info_t);
+ memcpy(si, &ctx->si, sz);
+ si->sz = (unsigned int)sz;
+
+ return VPX_CODEC_OK;
+}
+
+static void set_error_detail(vpx_codec_alg_priv_t *ctx,
+ const char *const error) {
+ ctx->base.err_detail = error;
+}
+
+static vpx_codec_err_t update_error_state(
+ vpx_codec_alg_priv_t *ctx, const struct vpx_internal_error_info *error) {
+ if (error->error_code)
+ set_error_detail(ctx, error->has_detail ? error->detail : NULL);
+
+ return error->error_code;
+}
+
+static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) {
+ int i;
+
+ for (i = 0; i < ctx->num_frame_workers; ++i) {
+ VPxWorker *const worker = &ctx->frame_workers[i];
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ VP10_COMMON *const cm = &frame_worker_data->pbi->common;
+ BufferPool *const pool = cm->buffer_pool;
+
+ cm->new_fb_idx = INVALID_IDX;
+ cm->byte_alignment = ctx->byte_alignment;
+ cm->skip_loop_filter = ctx->skip_loop_filter;
+
+ if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) {
+ pool->get_fb_cb = ctx->get_ext_fb_cb;
+ pool->release_fb_cb = ctx->release_ext_fb_cb;
+ pool->cb_priv = ctx->ext_priv;
+ } else {
+ pool->get_fb_cb = vp10_get_frame_buffer;
+ pool->release_fb_cb = vp10_release_frame_buffer;
+
+ if (vp10_alloc_internal_frame_buffers(&pool->int_frame_buffers))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to initialize internal frame buffers");
+
+ pool->cb_priv = &pool->int_frame_buffers;
+ }
+ }
+}
+
+static void set_default_ppflags(vp8_postproc_cfg_t *cfg) {
+ cfg->post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK;
+ cfg->deblocking_level = 4;
+ cfg->noise_level = 0;
+}
+
+static int frame_worker_hook(void *arg1, void *arg2) {
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)arg1;
+ const uint8_t *data = frame_worker_data->data;
+ (void)arg2;
+
+ frame_worker_data->result = vp10_receive_compressed_data(
+ frame_worker_data->pbi, frame_worker_data->data_size, &data);
+ frame_worker_data->data_end = data;
+
+ if (frame_worker_data->pbi->common.frame_parallel_decode) {
+ // In frame parallel decoding, a worker thread must successfully decode all
+ // the compressed data.
+ if (frame_worker_data->result != 0 ||
+ frame_worker_data->data + frame_worker_data->data_size - 1 > data) {
+ VPxWorker *const worker = frame_worker_data->pbi->frame_worker_owner;
+ BufferPool *const pool = frame_worker_data->pbi->common.buffer_pool;
+ // Signal all the other threads that are waiting for this frame.
+ vp10_frameworker_lock_stats(worker);
+ frame_worker_data->frame_context_ready = 1;
+ lock_buffer_pool(pool);
+ frame_worker_data->pbi->cur_buf->buf.corrupted = 1;
+ unlock_buffer_pool(pool);
+ frame_worker_data->pbi->need_resync = 1;
+ vp10_frameworker_signal_stats(worker);
+ vp10_frameworker_unlock_stats(worker);
+ return 0;
+ }
+ } else if (frame_worker_data->result != 0) {
+ // Check decode result in serial decode.
+ frame_worker_data->pbi->cur_buf->buf.corrupted = 1;
+ frame_worker_data->pbi->need_resync = 1;
+ }
+ return !frame_worker_data->result;
+}
+
+static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) {
+ int i;
+ const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+
+ ctx->last_show_frame = -1;
+ ctx->next_submit_worker_id = 0;
+ ctx->last_submit_worker_id = 0;
+ ctx->next_output_worker_id = 0;
+ ctx->frame_cache_read = 0;
+ ctx->frame_cache_write = 0;
+ ctx->num_cache_frames = 0;
+ ctx->need_resync = 1;
+ ctx->num_frame_workers =
+ (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads : 1;
+ if (ctx->num_frame_workers > MAX_DECODE_THREADS)
+ ctx->num_frame_workers = MAX_DECODE_THREADS;
+ ctx->available_threads = ctx->num_frame_workers;
+ ctx->flushed = 0;
+
+ ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool));
+ if (ctx->buffer_pool == NULL) return VPX_CODEC_MEM_ERROR;
+
+#if CONFIG_MULTITHREAD
+ if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) {
+ set_error_detail(ctx, "Failed to allocate buffer pool mutex");
+ return VPX_CODEC_MEM_ERROR;
+ }
+#endif
+
+ ctx->frame_workers = (VPxWorker *)vpx_malloc(ctx->num_frame_workers *
+ sizeof(*ctx->frame_workers));
+ if (ctx->frame_workers == NULL) {
+ set_error_detail(ctx, "Failed to allocate frame_workers");
+ return VPX_CODEC_MEM_ERROR;
+ }
+
+ for (i = 0; i < ctx->num_frame_workers; ++i) {
+ VPxWorker *const worker = &ctx->frame_workers[i];
+ FrameWorkerData *frame_worker_data = NULL;
+ winterface->init(worker);
+ worker->data1 = vpx_memalign(32, sizeof(FrameWorkerData));
+ if (worker->data1 == NULL) {
+ set_error_detail(ctx, "Failed to allocate frame_worker_data");
+ return VPX_CODEC_MEM_ERROR;
+ }
+ frame_worker_data = (FrameWorkerData *)worker->data1;
+ frame_worker_data->pbi = vp10_decoder_create(ctx->buffer_pool);
+ if (frame_worker_data->pbi == NULL) {
+ set_error_detail(ctx, "Failed to allocate frame_worker_data");
+ return VPX_CODEC_MEM_ERROR;
+ }
+ frame_worker_data->pbi->frame_worker_owner = worker;
+ frame_worker_data->worker_id = i;
+ frame_worker_data->scratch_buffer = NULL;
+ frame_worker_data->scratch_buffer_size = 0;
+ frame_worker_data->frame_context_ready = 0;
+ frame_worker_data->received_frame = 0;
+#if CONFIG_MULTITHREAD
+ if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) {
+ set_error_detail(ctx, "Failed to allocate frame_worker_data mutex");
+ return VPX_CODEC_MEM_ERROR;
+ }
+
+ if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) {
+ set_error_detail(ctx, "Failed to allocate frame_worker_data cond");
+ return VPX_CODEC_MEM_ERROR;
+ }
+#endif
+ // If decoding in serial mode, FrameWorker thread could create tile worker
+ // thread or loopfilter thread.
+ frame_worker_data->pbi->max_threads =
+ (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0;
+
+ frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order;
+ frame_worker_data->pbi->common.frame_parallel_decode =
+ ctx->frame_parallel_decode;
+ worker->hook = (VPxWorkerHook)frame_worker_hook;
+ if (!winterface->reset(worker)) {
+ set_error_detail(ctx, "Frame Worker thread creation failed");
+ return VPX_CODEC_MEM_ERROR;
+ }
+ }
+
+ // If postprocessing was enabled by the application and a
+ // configuration has not been provided, default it.
+ if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC))
+ set_default_ppflags(&ctx->postproc_cfg);
+
+ init_buffer_callbacks(ctx);
+
+ return VPX_CODEC_OK;
+}
+
+static INLINE void check_resync(vpx_codec_alg_priv_t *const ctx,
+ const VP10Decoder *const pbi) {
+ // Clear resync flag if worker got a key frame or intra only frame.
+ if (ctx->need_resync == 1 && pbi->need_resync == 0 &&
+ (pbi->common.intra_only || pbi->common.frame_type == KEY_FRAME))
+ ctx->need_resync = 0;
+}
+
+static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
+ const uint8_t **data, unsigned int data_sz,
+ void *user_priv, int64_t deadline) {
+ const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+ (void)deadline;
+
+ // Determine the stream parameters. Note that we rely on peek_si to
+ // validate that we have a buffer that does not wrap around the top
+ // of the heap.
+ if (!ctx->si.h) {
+ int is_intra_only = 0;
+ const vpx_codec_err_t res =
+ decoder_peek_si_internal(*data, data_sz, &ctx->si, &is_intra_only,
+ ctx->decrypt_cb, ctx->decrypt_state);
+ if (res != VPX_CODEC_OK) return res;
+
+ if (!ctx->si.is_kf && !is_intra_only) return VPX_CODEC_ERROR;
+ }
+
+ if (!ctx->frame_parallel_decode) {
+ VPxWorker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ frame_worker_data->data = *data;
+ frame_worker_data->data_size = data_sz;
+ frame_worker_data->user_priv = user_priv;
+ frame_worker_data->received_frame = 1;
+
+ // Set these even if already initialized. The caller may have changed the
+ // decrypt config between frames.
+ frame_worker_data->pbi->decrypt_cb = ctx->decrypt_cb;
+ frame_worker_data->pbi->decrypt_state = ctx->decrypt_state;
+
+#if CONFIG_EXT_TILE
+ frame_worker_data->pbi->dec_tile_row = ctx->decode_tile_row;
+ frame_worker_data->pbi->dec_tile_col = ctx->decode_tile_col;
+#endif // CONFIG_EXT_TILE
+
+ worker->had_error = 0;
+ winterface->execute(worker);
+
+ // Update data pointer after decode.
+ *data = frame_worker_data->data_end;
+
+ if (worker->had_error)
+ return update_error_state(ctx, &frame_worker_data->pbi->common.error);
+
+ check_resync(ctx, frame_worker_data->pbi);
+ } else {
+ VPxWorker *const worker = &ctx->frame_workers[ctx->next_submit_worker_id];
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ // Copy context from last worker thread to next worker thread.
+ if (ctx->next_submit_worker_id != ctx->last_submit_worker_id)
+ vp10_frameworker_copy_context(
+ &ctx->frame_workers[ctx->next_submit_worker_id],
+ &ctx->frame_workers[ctx->last_submit_worker_id]);
+
+ frame_worker_data->pbi->ready_for_new_data = 0;
+ // Copy the compressed data into worker's internal buffer.
+ // TODO(hkuang): Will all the workers allocate the same size
+ // as the size of the first intra frame be better? This will
+ // avoid too many deallocate and allocate.
+ if (frame_worker_data->scratch_buffer_size < data_sz) {
+ frame_worker_data->scratch_buffer =
+ (uint8_t *)vpx_realloc(frame_worker_data->scratch_buffer, data_sz);
+ if (frame_worker_data->scratch_buffer == NULL) {
+ set_error_detail(ctx, "Failed to reallocate scratch buffer");
+ return VPX_CODEC_MEM_ERROR;
+ }
+ frame_worker_data->scratch_buffer_size = data_sz;
+ }
+ frame_worker_data->data_size = data_sz;
+ memcpy(frame_worker_data->scratch_buffer, *data, data_sz);
+
+ frame_worker_data->frame_decoded = 0;
+ frame_worker_data->frame_context_ready = 0;
+ frame_worker_data->received_frame = 1;
+ frame_worker_data->data = frame_worker_data->scratch_buffer;
+ frame_worker_data->user_priv = user_priv;
+
+ if (ctx->next_submit_worker_id != ctx->last_submit_worker_id)
+ ctx->last_submit_worker_id =
+ (ctx->last_submit_worker_id + 1) % ctx->num_frame_workers;
+
+ ctx->next_submit_worker_id =
+ (ctx->next_submit_worker_id + 1) % ctx->num_frame_workers;
+ --ctx->available_threads;
+ worker->had_error = 0;
+ winterface->launch(worker);
+ }
+
+ return VPX_CODEC_OK;
+}
+
+static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) {
+ YV12_BUFFER_CONFIG sd;
+ const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+ VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ ctx->next_output_worker_id =
+ (ctx->next_output_worker_id + 1) % ctx->num_frame_workers;
+ // TODO(hkuang): Add worker error handling here.
+ winterface->sync(worker);
+ frame_worker_data->received_frame = 0;
+ ++ctx->available_threads;
+
+ check_resync(ctx, frame_worker_data->pbi);
+
+ if (vp10_get_raw_frame(frame_worker_data->pbi, &sd) == 0) {
+ VP10_COMMON *const cm = &frame_worker_data->pbi->common;
+ RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+ ctx->frame_cache[ctx->frame_cache_write].fb_idx = cm->new_fb_idx;
+ yuvconfig2image(&ctx->frame_cache[ctx->frame_cache_write].img, &sd,
+ frame_worker_data->user_priv);
+ ctx->frame_cache[ctx->frame_cache_write].img.fb_priv =
+ frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
+ ctx->frame_cache_write = (ctx->frame_cache_write + 1) % FRAME_CACHE_SIZE;
+ ++ctx->num_cache_frames;
+ }
+}
+
+static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx,
+ const uint8_t *data, unsigned int data_sz,
+ void *user_priv, long deadline) {
+ const uint8_t *data_start = data;
+ const uint8_t *const data_end = data + data_sz;
+ vpx_codec_err_t res;
+ uint32_t frame_sizes[8];
+ int frame_count;
+
+ if (data == NULL && data_sz == 0) {
+ ctx->flushed = 1;
+ return VPX_CODEC_OK;
+ }
+
+ // Reset flushed when receiving a valid frame.
+ ctx->flushed = 0;
+
+ // Initialize the decoder workers on the first frame.
+ if (ctx->frame_workers == NULL) {
+ const vpx_codec_err_t res = init_decoder(ctx);
+ if (res != VPX_CODEC_OK) return res;
+ }
+
+ res = vp10_parse_superframe_index(data, data_sz, frame_sizes, &frame_count,
+ ctx->decrypt_cb, ctx->decrypt_state);
+ if (res != VPX_CODEC_OK) return res;
+
+ if (ctx->frame_parallel_decode) {
+ // Decode in frame parallel mode. When decoding in this mode, the frame
+ // passed to the decoder must be either a normal frame or a superframe with
+ // superframe index so the decoder could get each frame's start position
+ // in the superframe.
+ if (frame_count > 0) {
+ int i;
+
+ for (i = 0; i < frame_count; ++i) {
+ const uint8_t *data_start_copy = data_start;
+ const uint32_t frame_size = frame_sizes[i];
+ if (data_start < data ||
+ frame_size > (uint32_t)(data_end - data_start)) {
+ set_error_detail(ctx, "Invalid frame size in index");
+ return VPX_CODEC_CORRUPT_FRAME;
+ }
+
+ if (ctx->available_threads == 0) {
+ // No more threads for decoding. Wait until the next output worker
+ // finishes decoding. Then copy the decoded frame into cache.
+ if (ctx->num_cache_frames < FRAME_CACHE_SIZE) {
+ wait_worker_and_cache_frame(ctx);
+ } else {
+ // TODO(hkuang): Add unit test to test this path.
+ set_error_detail(ctx, "Frame output cache is full.");
+ return VPX_CODEC_ERROR;
+ }
+ }
+
+ res =
+ decode_one(ctx, &data_start_copy, frame_size, user_priv, deadline);
+ if (res != VPX_CODEC_OK) return res;
+ data_start += frame_size;
+ }
+ } else {
+ if (ctx->available_threads == 0) {
+ // No more threads for decoding. Wait until the next output worker
+ // finishes decoding. Then copy the decoded frame into cache.
+ if (ctx->num_cache_frames < FRAME_CACHE_SIZE) {
+ wait_worker_and_cache_frame(ctx);
+ } else {
+ // TODO(hkuang): Add unit test to test this path.
+ set_error_detail(ctx, "Frame output cache is full.");
+ return VPX_CODEC_ERROR;
+ }
+ }
+
+ res = decode_one(ctx, &data, data_sz, user_priv, deadline);
+ if (res != VPX_CODEC_OK) return res;
+ }
+ } else {
+ // Decode in serial mode.
+ if (frame_count > 0) {
+ int i;
+
+ for (i = 0; i < frame_count; ++i) {
+ const uint8_t *data_start_copy = data_start;
+ const uint32_t frame_size = frame_sizes[i];
+ vpx_codec_err_t res;
+ if (data_start < data ||
+ frame_size > (uint32_t)(data_end - data_start)) {
+ set_error_detail(ctx, "Invalid frame size in index");
+ return VPX_CODEC_CORRUPT_FRAME;
+ }
+
+ res =
+ decode_one(ctx, &data_start_copy, frame_size, user_priv, deadline);
+ if (res != VPX_CODEC_OK) return res;
+
+ data_start += frame_size;
+ }
+ } else {
+ while (data_start < data_end) {
+ const uint32_t frame_size = (uint32_t)(data_end - data_start);
+ const vpx_codec_err_t res =
+ decode_one(ctx, &data_start, frame_size, user_priv, deadline);
+ if (res != VPX_CODEC_OK) return res;
+
+ // Account for suboptimal termination by the encoder.
+ while (data_start < data_end) {
+ const uint8_t marker =
+ read_marker(ctx->decrypt_cb, ctx->decrypt_state, data_start);
+ if (marker) break;
+ ++data_start;
+ }
+ }
+ }
+ }
+
+ return res;
+}
+
+static void release_last_output_frame(vpx_codec_alg_priv_t *ctx) {
+ RefCntBuffer *const frame_bufs = ctx->buffer_pool->frame_bufs;
+ // Decrease reference count of last output frame in frame parallel mode.
+ if (ctx->frame_parallel_decode && ctx->last_show_frame >= 0) {
+ BufferPool *const pool = ctx->buffer_pool;
+ lock_buffer_pool(pool);
+ decrease_ref_count(ctx->last_show_frame, frame_bufs, pool);
+ unlock_buffer_pool(pool);
+ }
+}
+
+static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx,
+ vpx_codec_iter_t *iter) {
+ vpx_image_t *img = NULL;
+
+ // Only return frame when all the cpu are busy or
+ // application fluhsed the decoder in frame parallel decode.
+ if (ctx->frame_parallel_decode && ctx->available_threads > 0 &&
+ !ctx->flushed) {
+ return NULL;
+ }
+
+ // Output the frames in the cache first.
+ if (ctx->num_cache_frames > 0) {
+ release_last_output_frame(ctx);
+ ctx->last_show_frame = ctx->frame_cache[ctx->frame_cache_read].fb_idx;
+ if (ctx->need_resync) return NULL;
+ img = &ctx->frame_cache[ctx->frame_cache_read].img;
+ ctx->frame_cache_read = (ctx->frame_cache_read + 1) % FRAME_CACHE_SIZE;
+ --ctx->num_cache_frames;
+ return img;
+ }
+
+ // iter acts as a flip flop, so an image is only returned on the first
+ // call to get_frame.
+ if (*iter == NULL && ctx->frame_workers != NULL) {
+ do {
+ YV12_BUFFER_CONFIG sd;
+ const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+ VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
+ FrameWorkerData *const frame_worker_data =
+ (FrameWorkerData *)worker->data1;
+ ctx->next_output_worker_id =
+ (ctx->next_output_worker_id + 1) % ctx->num_frame_workers;
+ // Wait for the frame from worker thread.
+ if (winterface->sync(worker)) {
+ // Check if worker has received any frames.
+ if (frame_worker_data->received_frame == 1) {
+ ++ctx->available_threads;
+ frame_worker_data->received_frame = 0;
+ check_resync(ctx, frame_worker_data->pbi);
+ }
+ if (vp10_get_raw_frame(frame_worker_data->pbi, &sd) == 0) {
+ VP10_COMMON *const cm = &frame_worker_data->pbi->common;
+ RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+ release_last_output_frame(ctx);
+ ctx->last_show_frame = frame_worker_data->pbi->common.new_fb_idx;
+ if (ctx->need_resync) return NULL;
+ yuvconfig2image(&ctx->img, &sd, frame_worker_data->user_priv);
+
+#if CONFIG_EXT_TILE
+ if (frame_worker_data->pbi->dec_tile_row >= 0) {
+ const int tile_row =
+ VPXMIN(frame_worker_data->pbi->dec_tile_row, cm->tile_rows - 1);
+ const int mi_row = tile_row * cm->tile_height;
+ const int ssy = ctx->img.y_chroma_shift;
+ int plane;
+ ctx->img.planes[0] += mi_row * MI_SIZE * ctx->img.stride[0];
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+ ctx->img.planes[plane] +=
+ mi_row * (MI_SIZE >> ssy) * ctx->img.stride[plane];
+ }
+ ctx->img.d_h =
+ VPXMIN(cm->tile_height, cm->mi_rows - mi_row) * MI_SIZE;
+ }
+
+ if (frame_worker_data->pbi->dec_tile_col >= 0) {
+ const int tile_col =
+ VPXMIN(frame_worker_data->pbi->dec_tile_col, cm->tile_cols - 1);
+ const int mi_col = tile_col * cm->tile_width;
+ const int ssx = ctx->img.x_chroma_shift;
+ int plane;
+ ctx->img.planes[0] += mi_col * MI_SIZE;
+ for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
+ ctx->img.planes[plane] += mi_col * (MI_SIZE >> ssx);
+ }
+ ctx->img.d_w =
+ VPXMIN(cm->tile_width, cm->mi_cols - mi_col) * MI_SIZE;
+ }
+#endif // CONFIG_EXT_TILE
+
+ ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
+ img = &ctx->img;
+ return img;
+ }
+ } else {
+ // Decoding failed. Release the worker thread.
+ frame_worker_data->received_frame = 0;
+ ++ctx->available_threads;
+ ctx->need_resync = 1;
+ if (ctx->flushed != 1) return NULL;
+ }
+ } while (ctx->next_output_worker_id != ctx->next_submit_worker_id);
+ }
+ return NULL;
+}
+
+static vpx_codec_err_t decoder_set_fb_fn(
+ vpx_codec_alg_priv_t *ctx, vpx_get_frame_buffer_cb_fn_t cb_get,
+ vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) {
+ if (cb_get == NULL || cb_release == NULL) {
+ return VPX_CODEC_INVALID_PARAM;
+ } else if (ctx->frame_workers == NULL) {
+ // If the decoder has already been initialized, do not accept changes to
+ // the frame buffer functions.
+ ctx->get_ext_fb_cb = cb_get;
+ ctx->release_ext_fb_cb = cb_release;
+ ctx->ext_priv = cb_priv;
+ return VPX_CODEC_OK;
+ }
+
+ return VPX_CODEC_ERROR;
+}
+
+static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *);
+
+ // Only support this function in serial decode.
+ if (ctx->frame_parallel_decode) {
+ set_error_detail(ctx, "Not supported in frame parallel decode");
+ return VPX_CODEC_INCAPABLE;
+ }
+
+ if (data) {
+ vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data;
+ YV12_BUFFER_CONFIG sd;
+ VPxWorker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ image2yuvconfig(&frame->img, &sd);
+ return vp10_set_reference_dec(&frame_worker_data->pbi->common,
+ ref_frame_to_vp10_reframe(frame->frame_type),
+ &sd);
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
+static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
+
+ // Only support this function in serial decode.
+ if (ctx->frame_parallel_decode) {
+ set_error_detail(ctx, "Not supported in frame parallel decode");
+ return VPX_CODEC_INCAPABLE;
+ }
+
+ if (data) {
+ vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data;
+ YV12_BUFFER_CONFIG sd;
+ VPxWorker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ image2yuvconfig(&frame->img, &sd);
+ return vp10_copy_reference_dec(frame_worker_data->pbi,
+ (VPX_REFFRAME)frame->frame_type, &sd);
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
+static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *);
+
+ // Only support this function in serial decode.
+ if (ctx->frame_parallel_decode) {
+ set_error_detail(ctx, "Not supported in frame parallel decode");
+ return VPX_CODEC_INCAPABLE;
+ }
+
+ if (data) {
+ YV12_BUFFER_CONFIG *fb;
+ VPxWorker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ fb = get_ref_frame(&frame_worker_data->pbi->common, data->idx);
+ if (fb == NULL) return VPX_CODEC_ERROR;
+ yuvconfig2image(&data->img, fb, NULL);
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
+static vpx_codec_err_t ctrl_get_new_frame_image(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_image_t *new_img = va_arg(args, vpx_image_t *);
+
+ // Only support this function in serial decode.
+ if (ctx->frame_parallel_decode) {
+ set_error_detail(ctx, "Not supported in frame parallel decode");
+ return VPX_CODEC_INCAPABLE;
+ }
+
+ if (new_img) {
+ YV12_BUFFER_CONFIG new_frame;
+ VPxWorker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+
+ if (vp10_get_frame_to_show(frame_worker_data->pbi, &new_frame) == 0) {
+ yuvconfig2image(new_img, &new_frame, NULL);
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_ERROR;
+ }
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+}
+
+static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ (void)ctx;
+ (void)args;
+ return VPX_CODEC_INCAPABLE;
+}
+
+static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ (void)ctx;
+ (void)args;
+ return VPX_CODEC_INCAPABLE;
+}
+
+static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ int *const update_info = va_arg(args, int *);
+
+ // Only support this function in serial decode.
+ if (ctx->frame_parallel_decode) {
+ set_error_detail(ctx, "Not supported in frame parallel decode");
+ return VPX_CODEC_INCAPABLE;
+ }
+
+ if (update_info) {
+ if (ctx->frame_workers) {
+ VPxWorker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data =
+ (FrameWorkerData *)worker->data1;
+ *update_info = frame_worker_data->pbi->refresh_frame_flags;
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_ERROR;
+ }
+ }
+
+ return VPX_CODEC_INVALID_PARAM;
+}
+
+static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ int *corrupted = va_arg(args, int *);
+
+ if (corrupted) {
+ if (ctx->frame_workers) {
+ VPxWorker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data =
+ (FrameWorkerData *)worker->data1;
+ RefCntBuffer *const frame_bufs =
+ frame_worker_data->pbi->common.buffer_pool->frame_bufs;
+ if (frame_worker_data->pbi->common.frame_to_show == NULL)
+ return VPX_CODEC_ERROR;
+ if (ctx->last_show_frame >= 0)
+ *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted;
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_ERROR;
+ }
+ }
+
+ return VPX_CODEC_INVALID_PARAM;
+}
+
+static vpx_codec_err_t ctrl_get_frame_size(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ int *const frame_size = va_arg(args, int *);
+
+ // Only support this function in serial decode.
+ if (ctx->frame_parallel_decode) {
+ set_error_detail(ctx, "Not supported in frame parallel decode");
+ return VPX_CODEC_INCAPABLE;
+ }
+
+ if (frame_size) {
+ if (ctx->frame_workers) {
+ VPxWorker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data =
+ (FrameWorkerData *)worker->data1;
+ const VP10_COMMON *const cm = &frame_worker_data->pbi->common;
+ frame_size[0] = cm->width;
+ frame_size[1] = cm->height;
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_ERROR;
+ }
+ }
+
+ return VPX_CODEC_INVALID_PARAM;
+}
+
+static vpx_codec_err_t ctrl_get_render_size(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ int *const render_size = va_arg(args, int *);
+
+ // Only support this function in serial decode.
+ if (ctx->frame_parallel_decode) {
+ set_error_detail(ctx, "Not supported in frame parallel decode");
+ return VPX_CODEC_INCAPABLE;
+ }
+
+ if (render_size) {
+ if (ctx->frame_workers) {
+ VPxWorker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data =
+ (FrameWorkerData *)worker->data1;
+ const VP10_COMMON *const cm = &frame_worker_data->pbi->common;
+ render_size[0] = cm->render_width;
+ render_size[1] = cm->render_height;
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_ERROR;
+ }
+ }
+
+ return VPX_CODEC_INVALID_PARAM;
+}
+
+static vpx_codec_err_t ctrl_get_bit_depth(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ unsigned int *const bit_depth = va_arg(args, unsigned int *);
+ VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
+
+ if (bit_depth) {
+ if (worker) {
+ FrameWorkerData *const frame_worker_data =
+ (FrameWorkerData *)worker->data1;
+ const VP10_COMMON *const cm = &frame_worker_data->pbi->common;
+ *bit_depth = cm->bit_depth;
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_ERROR;
+ }
+ }
+
+ return VPX_CODEC_INVALID_PARAM;
+}
+
+static vpx_codec_err_t ctrl_set_invert_tile_order(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->invert_tile_order = va_arg(args, int);
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_set_decryptor(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ vpx_decrypt_init *init = va_arg(args, vpx_decrypt_init *);
+ ctx->decrypt_cb = init ? init->decrypt_cb : NULL;
+ ctx->decrypt_state = init ? init->decrypt_state : NULL;
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_set_byte_alignment(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ const int legacy_byte_alignment = 0;
+ const int min_byte_alignment = 32;
+ const int max_byte_alignment = 1024;
+ const int byte_alignment = va_arg(args, int);
+
+ if (byte_alignment != legacy_byte_alignment &&
+ (byte_alignment < min_byte_alignment ||
+ byte_alignment > max_byte_alignment ||
+ (byte_alignment & (byte_alignment - 1)) != 0))
+ return VPX_CODEC_INVALID_PARAM;
+
+ ctx->byte_alignment = byte_alignment;
+ if (ctx->frame_workers) {
+ VPxWorker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ frame_worker_data->pbi->common.byte_alignment = byte_alignment;
+ }
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_set_skip_loop_filter(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->skip_loop_filter = va_arg(args, int);
+
+ if (ctx->frame_workers) {
+ VPxWorker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ frame_worker_data->pbi->common.skip_loop_filter = ctx->skip_loop_filter;
+ }
+
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_set_decode_tile_row(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->decode_tile_row = va_arg(args, int);
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_set_decode_tile_col(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->decode_tile_col = va_arg(args, int);
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
+ { VP8_COPY_REFERENCE, ctrl_copy_reference },
+
+ // Setters
+ { VP8_SET_REFERENCE, ctrl_set_reference },
+ { VP8_SET_POSTPROC, ctrl_set_postproc },
+ { VP8_SET_DBG_COLOR_REF_FRAME, ctrl_set_dbg_options },
+ { VP8_SET_DBG_COLOR_MB_MODES, ctrl_set_dbg_options },
+ { VP8_SET_DBG_COLOR_B_MODES, ctrl_set_dbg_options },
+ { VP8_SET_DBG_DISPLAY_MV, ctrl_set_dbg_options },
+ { VP9_INVERT_TILE_DECODE_ORDER, ctrl_set_invert_tile_order },
+ { VPXD_SET_DECRYPTOR, ctrl_set_decryptor },
+ { VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment },
+ { VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter },
+ { VP10_SET_DECODE_TILE_ROW, ctrl_set_decode_tile_row },
+ { VP10_SET_DECODE_TILE_COL, ctrl_set_decode_tile_col },
+
+ // Getters
+ { VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates },
+ { VP8D_GET_FRAME_CORRUPTED, ctrl_get_frame_corrupted },
+ { VP9_GET_REFERENCE, ctrl_get_reference },
+ { VP9D_GET_DISPLAY_SIZE, ctrl_get_render_size },
+ { VP9D_GET_BIT_DEPTH, ctrl_get_bit_depth },
+ { VP9D_GET_FRAME_SIZE, ctrl_get_frame_size },
+ { VP10_GET_NEW_FRAME_IMAGE, ctrl_get_new_frame_image },
+
+ { -1, NULL },
+};
+
+#ifndef VERSION_STRING
+#define VERSION_STRING
+#endif
+CODEC_INTERFACE(vpx_codec_vp10_dx) = {
+ "WebM Project VP10 Decoder" VERSION_STRING,
+ VPX_CODEC_INTERNAL_ABI_VERSION,
+ VPX_CODEC_CAP_DECODER |
+ VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER, // vpx_codec_caps_t
+ decoder_init, // vpx_codec_init_fn_t
+ decoder_destroy, // vpx_codec_destroy_fn_t
+ decoder_ctrl_maps, // vpx_codec_ctrl_fn_map_t
+ {
+ // NOLINT
+ decoder_peek_si, // vpx_codec_peek_si_fn_t
+ decoder_get_si, // vpx_codec_get_si_fn_t
+ decoder_decode, // vpx_codec_decode_fn_t
+ decoder_get_frame, // vpx_codec_frame_get_fn_t
+ decoder_set_fb_fn, // vpx_codec_set_fb_fn_t
+ },
+ {
+ // NOLINT
+ 0,
+ NULL, // vpx_codec_enc_cfg_map_t
+ NULL, // vpx_codec_encode_fn_t
+ NULL, // vpx_codec_get_cx_data_fn_t
+ NULL, // vpx_codec_enc_config_set_fn_t
+ NULL, // vpx_codec_get_global_headers_fn_t
+ NULL, // vpx_codec_get_preview_frame_fn_t
+ NULL // vpx_codec_enc_mr_get_mem_loc_fn_t
+ }
+};
diff --git a/av1/vp10_iface_common.h b/av1/vp10_iface_common.h
new file mode 100644
index 0000000..37a9cc1
--- /dev/null
+++ b/av1/vp10_iface_common.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VP10_VP10_IFACE_COMMON_H_
+#define VP10_VP10_IFACE_COMMON_H_
+
+#include "aom_ports/mem.h"
+
+static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
+ void *user_priv) {
+ /** vpx_img_wrap() doesn't allow specifying independent strides for
+ * the Y, U, and V planes, nor other alignment adjustments that
+ * might be representable by a YV12_BUFFER_CONFIG, so we just
+ * initialize all the fields.*/
+ int bps;
+ if (!yv12->subsampling_y) {
+ if (!yv12->subsampling_x) {
+ img->fmt = VPX_IMG_FMT_I444;
+ bps = 24;
+ } else {
+ img->fmt = VPX_IMG_FMT_I422;
+ bps = 16;
+ }
+ } else {
+ if (!yv12->subsampling_x) {
+ img->fmt = VPX_IMG_FMT_I440;
+ bps = 16;
+ } else {
+ img->fmt = VPX_IMG_FMT_I420;
+ bps = 12;
+ }
+ }
+ img->cs = yv12->color_space;
+ img->range = yv12->color_range;
+ img->bit_depth = 8;
+ img->w = yv12->y_stride;
+ img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VPX_ENC_BORDER_IN_PIXELS, 3);
+ img->d_w = yv12->y_crop_width;
+ img->d_h = yv12->y_crop_height;
+ img->r_w = yv12->render_width;
+ img->r_h = yv12->render_height;
+ img->x_chroma_shift = yv12->subsampling_x;
+ img->y_chroma_shift = yv12->subsampling_y;
+ img->planes[VPX_PLANE_Y] = yv12->y_buffer;
+ img->planes[VPX_PLANE_U] = yv12->u_buffer;
+ img->planes[VPX_PLANE_V] = yv12->v_buffer;
+ img->planes[VPX_PLANE_ALPHA] = NULL;
+ img->stride[VPX_PLANE_Y] = yv12->y_stride;
+ img->stride[VPX_PLANE_U] = yv12->uv_stride;
+ img->stride[VPX_PLANE_V] = yv12->uv_stride;
+ img->stride[VPX_PLANE_ALPHA] = yv12->y_stride;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) {
+ // vpx_image_t uses byte strides and a pointer to the first byte
+ // of the image.
+ img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH);
+ img->bit_depth = yv12->bit_depth;
+ img->planes[VPX_PLANE_Y] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->y_buffer);
+ img->planes[VPX_PLANE_U] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->u_buffer);
+ img->planes[VPX_PLANE_V] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->v_buffer);
+ img->planes[VPX_PLANE_ALPHA] = NULL;
+ img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride;
+ img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride;
+ img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride;
+ img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ img->bps = bps;
+ img->user_priv = user_priv;
+ img->img_data = yv12->buffer_alloc;
+ img->img_data_owner = 0;
+ img->self_allocd = 0;
+}
+
+static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
+ YV12_BUFFER_CONFIG *yv12) {
+ yv12->y_buffer = img->planes[VPX_PLANE_Y];
+ yv12->u_buffer = img->planes[VPX_PLANE_U];
+ yv12->v_buffer = img->planes[VPX_PLANE_V];
+
+ yv12->y_crop_width = img->d_w;
+ yv12->y_crop_height = img->d_h;
+ yv12->render_width = img->r_w;
+ yv12->render_height = img->r_h;
+ yv12->y_width = img->d_w;
+ yv12->y_height = img->d_h;
+
+ yv12->uv_width =
+ img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width;
+ yv12->uv_height =
+ img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height;
+ yv12->uv_crop_width = yv12->uv_width;
+ yv12->uv_crop_height = yv12->uv_height;
+
+ yv12->y_stride = img->stride[VPX_PLANE_Y];
+ yv12->uv_stride = img->stride[VPX_PLANE_U];
+ yv12->color_space = img->cs;
+ yv12->color_range = img->range;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
+ // In vpx_image_t
+ // planes point to uint8 address of start of data
+ // stride counts uint8s to reach next row
+ // In YV12_BUFFER_CONFIG
+ // y_buffer, u_buffer, v_buffer point to uint16 address of data
+ // stride and border counts in uint16s
+ // This means that all the address calculations in the main body of code
+ // should work correctly.
+ // However, before we do any pixel operations we need to cast the address
+ // to a uint16 ponter and double its value.
+ yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer);
+ yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer);
+ yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer);
+ yv12->y_stride >>= 1;
+ yv12->uv_stride >>= 1;
+ yv12->flags = YV12_FLAG_HIGHBITDEPTH;
+ } else {
+ yv12->flags = 0;
+ }
+ yv12->border = (yv12->y_stride - img->w) / 2;
+#else
+ yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ yv12->subsampling_x = img->x_chroma_shift;
+ yv12->subsampling_y = img->y_chroma_shift;
+ return VPX_CODEC_OK;
+}
+
+static VPX_REFFRAME ref_frame_to_vp10_reframe(vpx_ref_frame_type_t frame) {
+ switch (frame) {
+ case VP8_LAST_FRAME: return VPX_LAST_FLAG;
+ case VP8_GOLD_FRAME: return VPX_GOLD_FLAG;
+ case VP8_ALTR_FRAME: return VPX_ALT_FLAG;
+ }
+ assert(0 && "Invalid Reference Frame");
+ return VPX_LAST_FLAG;
+}
+#endif // VP10_VP10_IFACE_COMMON_H_
diff --git a/av1/vp10cx.mk b/av1/vp10cx.mk
new file mode 100644
index 0000000..e4d40c8
--- /dev/null
+++ b/av1/vp10cx.mk
@@ -0,0 +1,147 @@
+##
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+VP10_CX_EXPORTS += exports_enc
+
+VP10_CX_SRCS-yes += $(VP10_COMMON_SRCS-yes)
+VP10_CX_SRCS-no += $(VP10_COMMON_SRCS-no)
+VP10_CX_SRCS_REMOVE-yes += $(VP10_COMMON_SRCS_REMOVE-yes)
+VP10_CX_SRCS_REMOVE-no += $(VP10_COMMON_SRCS_REMOVE-no)
+
+VP10_CX_SRCS-yes += vp10_cx_iface.c
+
+VP10_CX_SRCS-yes += encoder/bitstream.c
+VP10_CX_SRCS-yes += encoder/bitwriter.h
+VP10_CX_SRCS-yes += encoder/context_tree.c
+VP10_CX_SRCS-yes += encoder/context_tree.h
+VP10_CX_SRCS-yes += encoder/variance_tree.c
+VP10_CX_SRCS-yes += encoder/variance_tree.h
+VP10_CX_SRCS-yes += encoder/cost.h
+VP10_CX_SRCS-yes += encoder/cost.c
+VP10_CX_SRCS-yes += encoder/dct.c
+VP10_CX_SRCS-yes += encoder/hybrid_fwd_txfm.c
+VP10_CX_SRCS-yes += encoder/hybrid_fwd_txfm.h
+VP10_CX_SRCS-yes += encoder/encodeframe.c
+VP10_CX_SRCS-yes += encoder/encodeframe.h
+VP10_CX_SRCS-yes += encoder/encodemb.c
+VP10_CX_SRCS-yes += encoder/encodemv.c
+VP10_CX_SRCS-yes += encoder/ethread.h
+VP10_CX_SRCS-yes += encoder/ethread.c
+VP10_CX_SRCS-yes += encoder/extend.c
+VP10_CX_SRCS-yes += encoder/firstpass.c
+VP10_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += ../third_party/fastfeat/fast.h
+VP10_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += ../third_party/fastfeat/nonmax.c
+VP10_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += ../third_party/fastfeat/fast_9.c
+VP10_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += ../third_party/fastfeat/fast.c
+VP10_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/corner_match.c
+VP10_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/corner_match.h
+VP10_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/corner_detect.c
+VP10_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/corner_detect.h
+VP10_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/global_motion.c
+VP10_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/global_motion.h
+VP10_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/ransac.c
+VP10_CX_SRCS-$(CONFIG_GLOBAL_MOTION) += encoder/ransac.h
+VP10_CX_SRCS-yes += encoder/block.h
+VP10_CX_SRCS-yes += encoder/bitstream.h
+VP10_CX_SRCS-yes += encoder/encodemb.h
+VP10_CX_SRCS-yes += encoder/encodemv.h
+VP10_CX_SRCS-yes += encoder/extend.h
+VP10_CX_SRCS-yes += encoder/firstpass.h
+VP10_CX_SRCS-yes += encoder/lookahead.c
+VP10_CX_SRCS-yes += encoder/lookahead.h
+VP10_CX_SRCS-yes += encoder/mcomp.h
+VP10_CX_SRCS-yes += encoder/encoder.h
+VP10_CX_SRCS-yes += encoder/quantize.h
+VP10_CX_SRCS-yes += encoder/ratectrl.h
+VP10_CX_SRCS-yes += encoder/rd.h
+VP10_CX_SRCS-yes += encoder/rdopt.h
+VP10_CX_SRCS-yes += encoder/tokenize.h
+VP10_CX_SRCS-yes += encoder/treewriter.h
+VP10_CX_SRCS-yes += encoder/mcomp.c
+VP10_CX_SRCS-yes += encoder/encoder.c
+VP10_CX_SRCS-yes += encoder/palette.h
+VP10_CX_SRCS-yes += encoder/palette.c
+VP10_CX_SRCS-yes += encoder/picklpf.c
+VP10_CX_SRCS-yes += encoder/picklpf.h
+VP10_CX_SRCS-$(CONFIG_LOOP_RESTORATION) += encoder/pickrst.c
+VP10_CX_SRCS-$(CONFIG_LOOP_RESTORATION) += encoder/pickrst.h
+VP10_CX_SRCS-yes += encoder/quantize.c
+VP10_CX_SRCS-yes += encoder/ratectrl.c
+VP10_CX_SRCS-yes += encoder/rd.c
+VP10_CX_SRCS-yes += encoder/rdopt.c
+VP10_CX_SRCS-yes += encoder/segmentation.c
+VP10_CX_SRCS-yes += encoder/segmentation.h
+VP10_CX_SRCS-yes += encoder/speed_features.c
+VP10_CX_SRCS-yes += encoder/speed_features.h
+VP10_CX_SRCS-yes += encoder/subexp.c
+VP10_CX_SRCS-yes += encoder/subexp.h
+VP10_CX_SRCS-yes += encoder/resize.c
+VP10_CX_SRCS-yes += encoder/resize.h
+VP10_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/blockiness.c
+VP10_CX_SRCS-$(CONFIG_ANS) += encoder/buf_ans.h
+VP10_CX_SRCS-$(CONFIG_ANS) += encoder/buf_ans.c
+
+VP10_CX_SRCS-yes += encoder/tokenize.c
+VP10_CX_SRCS-yes += encoder/treewriter.c
+VP10_CX_SRCS-yes += encoder/aq_variance.c
+VP10_CX_SRCS-yes += encoder/aq_variance.h
+VP10_CX_SRCS-yes += encoder/aq_cyclicrefresh.c
+VP10_CX_SRCS-yes += encoder/aq_cyclicrefresh.h
+VP10_CX_SRCS-yes += encoder/aq_complexity.c
+VP10_CX_SRCS-yes += encoder/aq_complexity.h
+VP10_CX_SRCS-yes += encoder/temporal_filter.c
+VP10_CX_SRCS-yes += encoder/temporal_filter.h
+VP10_CX_SRCS-yes += encoder/mbgraph.c
+VP10_CX_SRCS-yes += encoder/mbgraph.h
+ifeq ($(CONFIG_DERING),yes)
+VP10_CX_SRCS-yes += encoder/pickdering.c
+endif
+VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
+VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/highbd_block_error_intrin_sse2.c
+endif
+
+VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
+VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/error_sse2.asm
+
+ifeq ($(ARCH_X86_64),yes)
+VP10_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3_x86_64.asm
+endif
+
+VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_intrin_sse2.c
+VP10_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP10_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_fwd_txfm_sse4.c
+VP10_CX_SRCS-$(HAVE_SSE4_1) += common/x86/highbd_inv_txfm_sse4.c
+VP10_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp10_highbd_quantize_sse4.c
+endif
+
+ifeq ($(CONFIG_EXT_INTER),yes)
+VP10_CX_SRCS-yes += encoder/wedge_utils.c
+VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/wedge_utils_sse2.c
+endif
+
+VP10_CX_SRCS-$(HAVE_AVX2) += encoder/x86/error_intrin_avx2.c
+
+ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP10_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/dct_neon.c
+VP10_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/error_neon.c
+endif
+VP10_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/quantize_neon.c
+
+VP10_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/error_msa.c
+VP10_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct4x4_msa.c
+VP10_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct8x8_msa.c
+VP10_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct16x16_msa.c
+VP10_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct_msa.h
+VP10_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/temporal_filter_msa.c
+
+VP10_CX_SRCS-yes := $(filter-out $(VP10_CX_SRCS_REMOVE-yes),$(VP10_CX_SRCS-yes))
diff --git a/av1/vp10dx.mk b/av1/vp10dx.mk
new file mode 100644
index 0000000..ae68475
--- /dev/null
+++ b/av1/vp10dx.mk
@@ -0,0 +1,34 @@
+##
+## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+VP10_DX_EXPORTS += exports_dec
+
+VP10_DX_SRCS-yes += $(VP10_COMMON_SRCS-yes)
+VP10_DX_SRCS-no += $(VP10_COMMON_SRCS-no)
+VP10_DX_SRCS_REMOVE-yes += $(VP10_COMMON_SRCS_REMOVE-yes)
+VP10_DX_SRCS_REMOVE-no += $(VP10_COMMON_SRCS_REMOVE-no)
+
+VP10_DX_SRCS-yes += vp10_dx_iface.c
+
+VP10_DX_SRCS-yes += decoder/decodemv.c
+VP10_DX_SRCS-yes += decoder/decodeframe.c
+VP10_DX_SRCS-yes += decoder/decodeframe.h
+VP10_DX_SRCS-yes += decoder/detokenize.c
+VP10_DX_SRCS-yes += decoder/decodemv.h
+VP10_DX_SRCS-yes += decoder/detokenize.h
+VP10_DX_SRCS-yes += decoder/dthread.c
+VP10_DX_SRCS-yes += decoder/dthread.h
+VP10_DX_SRCS-yes += decoder/decoder.c
+VP10_DX_SRCS-yes += decoder/decoder.h
+VP10_DX_SRCS-yes += decoder/dsubexp.c
+VP10_DX_SRCS-yes += decoder/dsubexp.h
+VP10_DX_SRCS-yes += decoder/bitreader.h
+
+VP10_DX_SRCS-yes := $(filter-out $(VP10_DX_SRCS_REMOVE-yes),$(VP10_DX_SRCS-yes))